# Experiment 1

**Target Task:** Cora 

**Transfer learning Source Task:** Citeseer

**Meta-learning Source Task:** Citeseer, Pubmed
***
## Installs & imports

In [None]:
# ! pip install torch==1.5.0+cu101 torchvision==0.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html

# ! pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip install torch-geometric

# ! pip install --ignore-installed "jsonschema>=2.6.0,<3.1.0"
! pip install comet_ml --upgrade --ignore-installed

In [1]:
import numpy as np
from comet_ml import Experiment
import copy
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv, SAGEConv, GATConv, VGAE
from torch_geometric.data import DataLoader
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import train_test_split_edges
from tqdm import tqdm
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

***
## Data & Experiment Setup

In [2]:
proj_name = 'experiment-1'

cora = Planetoid('./data', 'Cora')

print('Cora')
print('-----')
print('{} Nodes; {} Edges'.format(cora[0].num_nodes, cora[0].num_edges))
print('No. of features: {}'.format(cora[0].num_features))
print('No. of classes: {}'.format(cora.num_classes))

cora_data = cora[0].to(device)

Cora
-----
2708 Nodes; 10556 Edges
No. of features: 1433
No. of classes: 7


***
## Training base models on Cora

### GCN

In [15]:
gcn_hyperparams = {
    'hidden_dim' : 256,
    'n_features' : cora[0].num_features,
    'n_classes' : cora.num_classes,
    'learning_rate': 0.001,
    'num_epochs': 200,
}

In [22]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_dim, n_features, n_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(n_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, n_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

#### Training GCN on Cora

In [23]:
experiment = Experiment(project_name=proj_name, display_summary_level=0)
experiment.log_parameters(gcn_hyperparams)
experiment.add_tags(['GCN', 'Base'])

gcn = GCN(hidden_dim=gcn_hyperparams['hidden_dim'],
        n_features=gcn_hyperparams['n_features'],
        n_classes=gcn_hyperparams['n_classes']
        ).to(device)
                             
optimizer = torch.optim.Adam(gcn.parameters(), lr=gcn_hyperparams['learning_rate'])

for epoch in tqdm(range(gcn_hyperparams['num_epochs'])):
    # Training
    gcn.train()
    
    optimizer.zero_grad()
    out = gcn(cora_data)
    loss = F.nll_loss(out[cora_data.train_mask], cora_data.y[cora_data.train_mask])
    loss.backward()
    optimizer.step()
    
    experiment.log_metric('loss', loss.item(), step=epoch)

    # Validation
    gcn.eval()
    
    _, pred = gcn(cora_data).max(dim=1)
    f1 = f1_score(
        cora_data.y[cora_data.val_mask].cpu().numpy(),
        pred[cora_data.val_mask].cpu().numpy(),
        average='weighted'
    )

    experiment.log_metric('test_F1_score', f1, step=epoch)
    
experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/graph-net-experiments/experiment-1/5b88affff91047ad9e9dcfce1a9044aa

100%|██████████| 200/200 [00:02<00:00, 70.18it/s]
COMET INFO: Uploading stats to Comet before program termination (may take several seconds)


### GVAE

See [PyG example](https://github.com/rusty1s/pytorch_geometric/blob/master/examples/autoencoder.py).

In [16]:
gvae_hyperparams = {
    'hidden_dim' : 256,
    'n_features' : cora[0].num_features,
    'n_classes' : cora.num_classes,
    'learning_rate': 0.001,
    'num_epochs': 200,
}

cora_data_new = copy.copy(cora_data)
cora_data_new.train_mask = cora_data_new.val_mask = cora_data_new.test_mask = cora_data_new.y = None
cora_data_new = train_test_split_edges(cora_data_new)
cora_data_new

Data(test_neg_edge_index=[2, 527], test_pos_edge_index=[2, 527], train_neg_adj_mask=[2708, 2708], train_pos_edge_index=[2, 8976], val_neg_edge_index=[2, 263], val_pos_edge_index=[2, 263], x=[2708, 1433])

In [17]:
class Encoder(torch.nn.Module):
    def __init__(self, hidden_dim, n_features, n_classes):
        super(Encoder, self).__init__()
        self.conv1 = GCNConv(n_features, hidden_dim)
        self.conv_mu = GCNConv(hidden_dim, n_classes)
        self.conv_logvar = GCNConv(hidden_dim, n_classes)

    def forward(self, data):
        x, edge_index = data.x, data.train_pos_edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        return self.conv_mu(x, edge_index), self.conv_logvar(x, edge_index)

In [26]:
experiment = Experiment(project_name=proj_name, display_summary_level=0)
experiment.log_parameters(gvae_hyperparams)
experiment.add_tags(['GVAE', 'Base'])

gvae = VGAE(Encoder(
        hidden_dim=gvae_hyperparams['hidden_dim'],
        n_features=gvae_hyperparams['n_features'],
        n_classes=gvae_hyperparams['n_classes']
    )).to(device)
                             
optimizer = torch.optim.Adam(gvae.parameters(), lr=gvae_hyperparams['learning_rate'])

for epoch in tqdm(range(gvae_hyperparams['num_epochs'])):
    # Training
    gvae.train()
    
    optimizer.zero_grad()
    z = gvae.encode(cora_data_new)
    gvae.recon_loss(z, cora_data_new.train_pos_edge_index)
    loss = gvae.recon_loss(z, cora_data_new.train_pos_edge_index) + (1 / cora_data_new.num_nodes) * gvae.kl_loss()
    loss.backward()
    optimizer.step()
    
    experiment.log_metric('loss', loss.item(), step=epoch)

    # Validation
    gvae.eval()
    with torch.no_grad():
        z = gvae.encode(cora_data_new)
    auc, ap = gvae.test(z, cora_data_new.val_pos_edge_index, cora_data_new.val_neg_edge_index)

    experiment.log_metric('auc', auc, step=epoch)
    experiment.log_metric('ap', ap, step=epoch)

    
experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/graph-net-experiments/experiment-1/c277037d3c414528b3fe6a00351ecf1a

100%|██████████| 200/200 [00:10<00:00, 18.32it/s]
COMET INFO: Uploading stats to Comet before program termination (may take several seconds)


### GraphSAGE

In [55]:
graphsage_hyperparams = {
    'hidden_dim' : 256,
    'n_features' : cora[0].num_features,
    'n_classes' : cora.num_classes,
    'learning_rate': 0.001,
    'num_epochs': 200,
}

In [56]:
class GraphSAGE(torch.nn.Module):
    def __init__(self, hidden_dim, n_features, n_classes):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(n_features, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, n_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [62]:
experiment = Experiment(project_name=proj_name, display_summary_level=0)
experiment.log_parameters(graphsage_hyperparams)
experiment.add_tags(['GraphSAGE', 'Base'])

graphsage = GraphSAGE(hidden_dim=graphsage_hyperparams['hidden_dim'],
        n_features=graphsage_hyperparams['n_features'],
        n_classes=graphsage_hyperparams['n_classes']
        ).to(device)
                             
optimizer = torch.optim.Adam(graphsage.parameters(), lr=graphsage_hyperparams['learning_rate'])

for epoch in tqdm(range(graphsage_hyperparams['num_epochs'])):
    # Training
    graphsage.train()
    
    optimizer.zero_grad()
    out = graphsage(cora_data)
    loss = F.nll_loss(out[cora_data.train_mask], cora_data.y[cora_data.train_mask])
    loss.backward()
    optimizer.step()
    
    experiment.log_metric('loss', loss.item(), step=epoch)

    # Validation
    graphsage.eval()
    
    _, pred = graphsage(cora_data).max(dim=1)
    f1 = f1_score(
        cora_data.y[cora_data.val_mask].cpu().numpy(),
        pred[cora_data.val_mask].cpu().numpy(),
        average='weighted'
    )
    
    experiment.log_metric('test_F1_score', f1, step=epoch)
    
experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/graph-net-experiments/experiment-1/db4852b3b7e94fc7b06b424e39cf1767

100%|██████████| 200/200 [00:04<00:00, 43.18it/s]
COMET INFO: Uploading stats to Comet before program termination (may take several seconds)


In [11]:
gvae.test?

### GAT

In [63]:
gat_hyperparams = {
    'hidden_dim' : 256,
    'n_features' : cora[0].num_features,
    'n_classes' : cora.num_classes,
    'learning_rate': 0.001,
    'num_epochs': 200,
}

In [64]:
class GAT(torch.nn.Module):
    def __init__(self, hidden_dim, n_features, n_classes):
        super(GAT, self).__init__()
        self.conv1 = GATConv(n_features, hidden_dim)
        self.conv2 = GATConv(hidden_dim, n_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [65]:
experiment = Experiment(project_name=proj_name, display_summary_level=0)
experiment.log_parameters(gat_hyperparams)
experiment.add_tags(['GAT', 'Base'])

gat = GAT(hidden_dim=gat_hyperparams['hidden_dim'],
        n_features=gat_hyperparams['n_features'],
        n_classes=gat_hyperparams['n_classes']
        ).to(device)
                             
optimizer = torch.optim.Adam(gat.parameters(), lr=gat_hyperparams['learning_rate'])

for epoch in tqdm(range(gat_hyperparams['num_epochs'])):
    # Training
    gat.train()
    
    optimizer.zero_grad()
    out = gat(cora_data)
    loss = F.nll_loss(out[cora_data.train_mask], cora_data.y[cora_data.train_mask])
    loss.backward()
    optimizer.step()
    
    experiment.log_metric('loss', loss.item(), step=epoch)

    # Validation
    gat.eval()
    
    _, pred = gat(cora_data).max(dim=1)
    f1 = f1_score(
        cora_data.y[cora_data.val_mask].cpu().numpy(),
        pred[cora_data.val_mask].cpu().numpy(),
        average='weighted'
    )
    
    experiment.log_metric('test_F1_score', f1, step=epoch)
    
experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/graph-net-experiments/experiment-1/fe47b05265584e43ad0212f9262d767a

100%|██████████| 200/200 [00:04<00:00, 40.66it/s]
COMET INFO: Uploading stats to Comet before program termination (may take several seconds)
