In [43]:
import torch
from torch.nn import Linear
from torch_geometric.nn import MessagePassing, GAT, GATConv, Linear, to_hetero, HANConv
from torch_geometric.utils import degree, add_self_loops
from torch_geometric.data import HeteroData
from torch_geometric.datasets import DBLP
import torch_geometric.transforms as T
from torch import nn
from torch.nn import functional as F

In [9]:
data = HeteroData()
data['user'].x = torch.Tensor([[1,1,1,1],[2,2,2,2],[3,3,3,3]])
data['game'].x = torch.Tensor([[1,1],[2,2]])
data['dev'].x = torch.Tensor([[1],[2]])

data['user', 'follows', 'user'].edge_index = torch.tensor([[0, 1], [1, 2]])
data['user', 'plays', 'game'].edge_index = torch.tensor([[0, 1,1 ,2], [0, 0, 1,1]])
data['user', 'develops', 'game'].edge_index = torch.tensor([[0, 1], [0, 1]])

data['user', 'plays', 'game'].edge_attr = torch.tensor([[2],[0.5] ,[10], [12]])
data
                              

HeteroData(
  user={ x=[3, 4] },
  game={ x=[2, 2] },
  dev={ x=[2, 1] },
  (user, follows, user)={ edge_index=[2, 2] },
  (user, plays, game)={
    edge_index=[2, 4],
    edge_attr=[4, 1],
  },
  (user, develops, game)={ edge_index=[2, 2] }
)

In [38]:
metapaths = [[('author','paper'), ('paper','author')]]
transform = T.AddMetaPaths(metapaths, drop_orig_edge_types=True)
DBLP_dataset = DBLP(root='../data/DBLP', transform=transform)
data = DBLP_dataset[0]
print(data)


HeteroData(
  metapath_dict={ (author, metapath_0, author)=[2] },
  author={
    x=[4057, 334],
    y=[4057],
    train_mask=[4057],
    val_mask=[4057],
    test_mask=[4057],
  },
  paper={ x=[14328, 4231] },
  term={ x=[7723, 50] },
  conference={ num_nodes=20 },
  (author, metapath_0, author)={ edge_index=[2, 11113] }
)


In [41]:
DBLP_dataset = DBLP(root='../data/DBLP', transform=transform)
data = DBLP_dataset[0]

model = GAT(in_channels=-1, out_channels=4,hidden_channels=64, num_layers=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)


@torch.no_grad()
def test(mask):
    model.eval()
    optimizer.zero_grad()
    out = model(data.x_dict['author'], data.edge_index_dict['author', 'metapath_0','author'])

    pred = out.argmax(dim=-1)
    acc = pred[mask].eq(data['author'].y[mask]).sum().item() / mask.sum().item()
    return float(acc)

for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict['author'], data.edge_index_dict['author', 'metapath_0','author'])
    mask = data['author'].train_mask
    loss = F.cross_entropy(out[mask], data['author'].y[mask])
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        train_acc = test(data['author'].train_mask)
        val_acc = test(data['author'].val_mask)
        print(f'Epoch: {epoch:03d}, '
              f'Loss: {loss:.4f}, '
              f'Train: {train_acc:.4f}, '
              f'Val: {val_acc:.4f}')

test_acc = test(data['author'].test_mask)
print(f'Test: {test_acc:.4f}')


Epoch: 020, Loss: 1.2387, Train: 0.5150, Val: 0.4850
Epoch: 040, Loss: 1.1202, Train: 0.6675, Val: 0.6375
Epoch: 060, Loss: 1.0195, Train: 0.7575, Val: 0.7175
Epoch: 080, Loss: 0.9355, Train: 0.7975, Val: 0.7350
Epoch: 100, Loss: 0.8656, Train: 0.8250, Val: 0.7500
Test: 0.7384


In [45]:
DBLP_dataset = DBLP(root='../data/DBLP')
data = DBLP_dataset[0]
data['conference'].x = torch.zeros(20,1)
class GAT_hetero(torch.nn.Module):
    def __init__(self, dim_h, dim_out):
        super(GAT_hetero, self).__init__()
        self.conv = GATConv((-1,-1), dim_h, add_self_loops= False)
        self.linear = nn.Linear(dim_h, dim_out)
        
    def forward(self, x, edge_index):
        h = self.conv(x, edge_index).relu()
        h = self.linear(h)
        return h
    
model = GAT_hetero(64, 4)
model = to_hetero(model, data.metadata(), aggr='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)
model = model.to(device)
@torch.no_grad()
def test(mask):
    model.eval()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)['author']
    pred = out.argmax(dim=-1)
    acc = pred[mask].eq(data['author'].y[mask]).sum().item() / mask.sum().item()
    return float(acc)

for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)['author']
    mask = data['author'].train_mask
    loss = F.cross_entropy(out[mask], data['author'].y[mask])
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        train_acc = test(data['author'].train_mask)
        val_acc = test(data['author'].val_mask)
        print(f'Epoch: {epoch:03d}, '
              f'Loss: {loss:.4f}, '
              f'Train: {train_acc:.4f}, '
              f'Val: {val_acc:.4f}')
test_acc = test(data['author'].test_mask)
print(f'Test: {test_acc:.4f}')

Epoch: 020, Loss: 1.2371, Train: 0.8650, Val: 0.5275
Epoch: 040, Loss: 0.9118, Train: 0.9675, Val: 0.7275
Epoch: 060, Loss: 0.5449, Train: 0.9825, Val: 0.7675
Epoch: 080, Loss: 0.2878, Train: 0.9925, Val: 0.7875
Epoch: 100, Loss: 0.1645, Train: 1.0000, Val: 0.7650
Test: 0.7961


In [48]:
DBLP_dataset = DBLP(root='../data/DBLP')
data = DBLP_dataset[0]
data['conference'].x = torch.zeros(20,1)
class HAN(torch.nn.Module):
    def __init__(self, dim_in,dim_h, dim_out, heads):
        super(HAN, self).__init__()
        self.han = HANConv(dim_in, dim_h, heads=heads, dropout=0.6, metadata = data.metadata())
        self.linear = nn.Linear(dim_h, dim_out)
        
    def forward(self, x, edge_index):
        out = self.han(x, edge_index)
        out = self.linear(out['author'])
        return out
model = HAN(-1, 128, 4, heads=8)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)
model = model.to(device)
@torch.no_grad()
def test(mask):
    model.eval()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)
    pred = out.argmax(dim=-1)
    acc = pred[mask].eq(data['author'].y[mask]).sum().item() / mask.sum().item()
    return float(acc)
for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data['author'].train_mask
    loss = F.cross_entropy(out[mask], data['author'].y[mask])
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        train_acc = test(data['author'].train_mask)
        val_acc = test(data['author'].val_mask)
        print(f'Epoch: {epoch:03d}, '
              f'Loss: {loss:.4f}, '
              f'Train: {train_acc:.4f}, '
              f'Val: {val_acc:.4f}')
test_acc = test(data['author'].test_mask)
print(f'Test: {test_acc:.4f}')

Epoch: 020, Loss: 1.1620, Train: 0.9350, Val: 0.6875
Epoch: 040, Loss: 0.7918, Train: 0.9650, Val: 0.7300
Epoch: 060, Loss: 0.4677, Train: 0.9825, Val: 0.7875
Epoch: 080, Loss: 0.3169, Train: 0.9925, Val: 0.7950
Epoch: 100, Loss: 0.2120, Train: 0.9975, Val: 0.7925
Test: 0.8176
