# DropGNN + Contrastive Learning Prototyping

Work based off of DropGNN

https://arxiv.org/pdf/2111.06283.pdf 

https://github.com/KarolisMart/DropGNN 

In [1]:
import os.path as osp
import numpy as np
import networkx as nx
import time
import random
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold


# Torch Geometric 
try: 
    from torch_geometric.data import DataLoader, Data
    from torch_geometric.data.dataloader import Collater
    from torch_geometric.datasets import TUDataset
    from torch_geometric.utils import degree
    from torch_geometric.utils.convert import from_networkx
    from torch_geometric.nn import GINConv, GINEConv, global_add_pool
except ModuleNotFoundError: 
    !pip install torch_geometric
    from torch_geometric.data import DataLoader, Data
    from torch_geometric.data.dataloader import Collater
    from torch_geometric.datasets import TUDataset
    from torch_geometric.utils import degree
    from torch_geometric.utils.convert import from_networkx
    from torch_geometric.nn import GINConv, GINEConv, global_add_pool
    
# Pytorch Metric Learning
try: 
    from pytorch_metric_learning import losses
except ModuleNotFoundError:
    !pip install pytorch-metric-learning
    from pytorch_metric_learning import losses

  from .autonotebook import tqdm as notebook_tqdm


## Import Dataset

Use the IMDB-Binary dataset

In [2]:
class MyFilter(object):
    def __call__(self, data):
        return data.num_nodes <= 70

class MyPreTransform(object):
    def __call__(self, data):
        data.x = degree(data.edge_index[0], data.num_nodes, dtype=torch.long)
        data.x = F.one_hot(data.x, num_classes=69).to(torch.float)
        return data 

In [3]:
# Download data 
path = osp.join(osp.dirname(osp.realpath("./")), 'data', f'IMDB-BINARY')

dataset = TUDataset(
    path, 
    name = "IMDB-BINARY", 
    pre_transform = MyPreTransform(), 
    pre_filter = MyFilter()
)

In [4]:
print(dataset)

IMDB-BINARY(996)


Use the IMDB-Multi dataset

In [5]:
# Download data 
path = osp.join(osp.dirname(osp.realpath("./")), 'data', f'IMDB-MULTI')

dataset2 = TUDataset(
    path, 
    name = "IMDB-MULTI", 
    pre_transform = MyPreTransform(), 
    pre_filter = MyFilter()
)

In [6]:
print(dataset2)

IMDB-MULTI(1498)


## DropGNN + Contrastive Learning Model

DropGNN Model with Contrastive Learning Methods using SimCLR Contrastive Learning Framework

In [7]:
class DropGNN_Contrastive(nn.Module):
    def __init__(self, num_features, num_reps, num_classes, hidden_units, use_aux_loss=True):
        super(DropGNN_Contrastive, self).__init__()

        # Set starting parameters for model 
        self.num_features = num_features   # Number of initial features 
        self.num_reps = num_reps           # Number of features in representation vector 
        self.num_classes = num_classes     # Number of different classes
        self.dim = hidden_units            # Number of units for hidden layers
        self.use_aux_loss = use_aux_loss   # Whether to include aux loss to total loss

        # Number of layers in model
        self.num_layers = 4

        self.convs = nn.ModuleList()                     # Made of num_layers GINConv (linear -> batchnorm1d -> relu -> linear)
        self.bns = nn.ModuleList()                       # Made of num_layers BatchNorm1d 
        self.reps = nn.ModuleList() # nn.Linear(self.dim, self.num_reps)   # Layer between base model and contrastive learning representation
        self.fcs = nn.ModuleList()                       # Made of num_layers + 1 Linear layers mapping from num_features or dim to num_reps

        # Add initial layer from num_features to dim 
        self.convs.append(GINConv(nn.Sequential(nn.Linear(self.num_features, self.dim), nn.BatchNorm1d(self.dim), nn.ReLU(), nn.Linear(self.dim, self.dim))))
        self.bns.append(nn.BatchNorm1d(self.dim))
        self.reps.append(nn.Linear(self.num_features, self.num_reps))
        self.reps.append(nn.Linear(self.dim, self.num_reps))
        self.fcs.append(nn.Linear(self.num_features, self.num_classes))
        self.fcs.append(nn.Linear(self.dim, self.num_classes))

        # Add additional layers from dim to dim 
        for i in range(self.num_layers-1):
            self.convs.append(GINConv(nn.Sequential(nn.Linear(self.dim, self.dim), nn.BatchNorm1d(self.dim), nn.ReLU(), nn.Linear(self.dim, self.dim))))
            self.bns.append(nn.BatchNorm1d(self.dim))
            self.reps.append(nn.Linear(self.dim, self.num_reps))
            self.fcs.append(nn.Linear(self.dim, self.num_classes))

        # Use aux_loss for dropGNN: made of num_layers + 1 linear layers 
        # Adds new module list of linear layers 
        if self.use_aux_loss:
            self.aux_fcs = nn.ModuleList()
            self.aux_fcs.append(nn.Linear(self.num_features, self.num_classes))
            for i in range(self.num_layers):
                self.aux_fcs.append(nn.Linear(self.dim, self.num_classes))
        
    def reset_parameters(self):
        # Resets parameters for Linear, GINConv, and BatchNorm1d layers
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.reset_parameters()
            elif isinstance(m, GINConv):
                m.reset_parameters()
            elif isinstance(m, nn.BatchNorm1d):
                m.reset_parameters()
                
    def forward(self, data, mode="test", p=None, dropout=None, num_runs=20):
        # Runs different modes based on whether running contrastive loss or making predictions
        if mode == 'contrastive':
            return self.contrastive(data, p, num_runs)
        else:
            return self.prediction(data, dropout, num_runs)
        
    def contrastive(self, data, p, num_runs):
        # Trains contrastive model and representation vector model 
        
        # Note: num_runs in DropGNN is average number of nodes in each graph in dataset
        # Note: p is 2 * 1 / (1 + gamma), but for this project, p is selected to create augmented views 
        
        self.p = p
        self.num_runs = num_runs
        
        # Store all graphs in sampled batch as one large graph with separate components
        x = data.x                     # All nodes and their features (# nodes x # node features)
        edge_index = data.edge_index   # All edge index pairs from large single graph
        batch = data.batch             # Batch numbers that group nodes within the same graph with same batch number
        
        # Do runs in parallel by repeating nodes and creating num_runs different views
        x = x.unsqueeze(0).expand(self.num_runs, -1, -1).clone()   # Creates num_runs copy of node features
        drop = torch.bernoulli(torch.ones([x.size(0), x.size(1)], device=x.device) * self.p).bool()   #  Randomly determine whether node is dropped within each copy of num_runs
        x[drop] = torch.zeros([drop.sum().long().item(), x.size(-1)], device=x.device)  # Drop nodes from graphs  
        del drop
        
        # Allow gradients to update base model 
        if self.training:
            for layer in self.convs: 
                for p in layer.parameters():
                    p.requires_grad = True

            for layer in self.bns:
                for p in layer.parameters():
                    p.requires_grad = True
        
        # Run augmented subgraph through model 
        outs = [x]  # Used to store n-hop neighborhood representations, after running through model n times
        x = x.view(-1, x.size(-1))  # Concat all num_run copies of nodes 
        run_edge_index = edge_index.repeat(1, self.num_runs) + torch.arange(self.num_runs, device=edge_index.device).repeat_interleave(edge_index.size(1)) * (edge_index.max() + 1) # Transform edge_index to correspond to the same nodes in concatenated form  
        for i in range(self.num_layers):
            x = self.convs[i](x, run_edge_index)  # Run node features and edge indices through CONV layer 
            x = self.bns[i](x)  # Run resulting values through BatchNorm1d
            x = F.relu(x)   # Run final values through RELU
            outs.append(x.view(self.num_runs, -1, x.size(-1)))  # Return x back to original stacked form 
        del run_edge_index
        
        # Aggregates results of runs by taking mean of each run and summing results of runs
        out = None
        for i, x in enumerate(outs):
            x = x.mean(dim=0)                  # Take average of all node features of same nodes 
            x = global_add_pool(x, batch)      # Take the sum of all node features for nodes in same graph 
            x = self.reps[i](x)                # Run graph features into linear layer to get contrastive representation
            if out is None:
                out = x
            else:
                out += x
                
        # Returns all contrastive graph embeddings in batch 
        return out
    
    def prediction(self, data, dropout, num_runs):
        self.dropout = dropout
        self.num_runs = num_runs
        
        # Create intermediate representations 
        x = data.x 
        edge_index = data.edge_index
        batch = data.batch 
        
        # Do runs in parallel, by repeating the graphs in the batch
        x = x.unsqueeze(0).expand(self.num_runs, -1, -1).clone()   # Flattens features and creates num_runs copy of them 
        drop = torch.bernoulli(torch.ones([x.size(0), x.size(1)], device=x.device) * self.p).bool()   #  Returns a tensor of randomly dropped nodes based on p (p = probability of dropping) 
        x[drop] = torch.zeros([drop.sum().long().item(), x.size(-1)], device=x.device)  # Drop nodes from data  
        del drop
        
        # Stop gradients from updating base model 
        for layer in self.convs:
            for p in layer.parameters():
                p.requires_grad = False
                
        for layer in self.bns:
            for p in layer.parameters():
                p.requires_grad = False
        
        # Run augmented subgraph through model 
        outs = [x]  # Used to store view of x after each layer 
        x = x.view(-1, x.size(-1))  # Swap dimensions of data features 
        run_edge_index = edge_index.repeat(1, self.num_runs) + torch.arange(self.num_runs, device=edge_index.device).repeat_interleave(edge_index.size(1)) * (edge_index.max() + 1) # Expand edge_index and augment values
        for i in range(self.num_layers):
            x = self.convs[i](x, run_edge_index)  # Run node features and edge indices through CONV layer 
            x = self.bns[i](x)  # Run resulting values through BatchNorm1d
            x = F.relu(x)   # Run final values through RELU
            outs.append(x.view(self.num_runs, -1, x.size(-1)))    # Rearrange dimensions and append to outs 
        del run_edge_index
        
        # Aggregates results of runs by summing mean and applying random dropout (not dropping out nodes)
        out = None
        for i, x in enumerate(outs):
            x = x.mean(dim=0)
            x = global_add_pool(x, batch)
            x = F.dropout(self.fcs[i](x), p=self.dropout, training=self.training)
            if out is None:
                out = x
            else:
                out += x
        
        # Returns the likelihood of each outcome class
        return F.log_softmax(out, dim=-1)  # Swap this with code block below to allow aux loss, removed for simplicity
    
#         if self.use_aux_loss:
#             # Use predictions from individual runs instead of aggregating results of all runs (no mean(x))
#             aux_out = torch.zeros(self.num_runs, out.size(0), out.size(1), device=out.device)
#             run_batch = batch.repeat(self.num_runs) + torch.arange(self.num_runs, device=edge_index.device).repeat_interleave(batch.size(0)) * (batch.max() + 1) # Assign updated batch numbers such that each individual run is counted separately
#             for i, x in enumerate(outs):
#                 x = x.view(-1, x.size(-1))
#                 x = global_add_pool(x, run_batch)
#                 x = x.view(self.num_runs, -1, x.size(-1))
#                 x = F.dropout(self.aux_fcs[i](x), p=self.dropout, training=self.training)
#                 aux_out += x

#             # Returns probabilities of each class based on aggregated results 
#             return F.log_softmax(out, dim=-1), F.log_softmax(aux_out, dim=-1)
#         else:
#             return F.log_softmax(out, dim=-1), 0

## Training Modules

Training Functions to run Contrastive Learning and Downstream Task Prediction

### Contrastive Learning Training Modules

In [11]:
# Used to train contrastive model 
def train_contrastive(model, loader, optimizer, loss_fn, p1 = 0.1, p2 = 0.2):
    # Set model to training
    model.train()
    
#     # Create self-supervised loss function
#     loss_func = losses.SelfSupervisedLoss(losses.TripletMarginLoss())
    
    # Run data through model and update model
    loss_all = 0
    n = 0 
    for data in loader: 
        data = data.to(device)
        optimizer.zero_grad()
        embeddings_1 = model(data, mode = "contrastive", p = p1)
        embeddings_2 = model(data, mode = "contrastive", p = p2)
        
        # Used as loss(embeddings, labels)
        loss = loss_fn(embeddings_1, embeddings_2)
        loss.backward()
        optimizer.step() 
        
        loss_all += data.num_graphs * loss.item()
        n += data.num_graphs
    return loss_all / n

In [12]:
# Used to validate contrastive model 
def valid_contrastive(model, loader, loss_fn, p1 = 0.1, p2 = 0.2):
    # Set model to eval
    model.eval()
    
#     # Create self-supervised loss function
#     loss_func = losses.SelfSupervisedLoss(losses.TripletMarginLoss())
    
    with torch.no_grad():
        loss_all = 0
        n = 0
        for data in loader: 
            data = data.to(device)
            embeddings_1 = model(data, mode = "contrastive", p = p1)
            embeddings_2 = model(data, mode = "contrastive", p = p2)
            loss = loss_fn(embeddings_1, embeddings_2)
            
            loss_all += data.num_graphs * loss.item()
            n += data.num_graphs
    return loss_all / n

### Prediction Training Modules

In [13]:
# Used to train prediction model AFTER contrastive learning 
def train_prediction(model, loader, optimizer, dropout = 0.5):
    # Set model to training
    model.train()
    
    # Run data through model and update model 
    loss_all = 0
    n = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        results = model(data, mode = "prediction", dropout = dropout)
        loss = F.nll_loss(results, data.y)
        
#         if model.use_aux_loss:
#             aux_loss = F.nll_loss(aux_logs.view(-1, aux_logs.size(-1)), data.y.unsqueeze(0).expand(aux_logs.size(0), -1).clone().view(-1))
#             loss = 0.75 * loss + 0.25 * aux_loss 
    
        loss.backward()
        loss_all += data.num_graphs * loss.item()
        n += len(data.y)
        optimizer.step()

    return loss_all / n

In [14]:
# Used to validate prediction model AFTER contrastive learning, returns loss 
def valid_prediction(model, loader, dropout = 0.5):
    # Set model to eval
    model.eval()
    
    # Run data through model
    with torch.no_grad():
        loss_all = 0
        n = 0
        for data in loader:
            data = data.to(device)
            results = model(data, mode = "prediction", dropout = dropout)
            loss = F.nll_loss(results, data.y)

#             if model.use_aux_loss:
#                 aux_loss = F.nll_loss(aux_logs.view(-1, aux_logs.size(-1)), data.y.unsqueeze(0).expand(aux_logs.size(0), -1).clone().view(-1))
#                 loss = 0.75 * loss + 0.25 * aux_loss 
                
            loss_all += data.num_graphs * loss.item()
            n += len(data.y)

    return loss_all / n

In [15]:
# Used to test prediction model AFTER contrastive learning, returns accuracy
def test_prediction(model, loader, dropout = 0.5):
    # Set model to eval
    model.eval() 
    
    # Run data through model and make predictions
    with torch.no_grad():
        correct = 0
        for data in loader: 
            data = data.to(device)
            results = model(data, mode = "prediction", dropout = dropout)
            pred = results.max(1)[1]
            correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)

## Split Dataset into K-Folds

In [16]:
def separate_data(dataset_len, seed=0):
    folds = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
    idx_list = []
    for idx in folds.split(np.zeros(dataset_len), np.zeros(dataset_len)):
        idx_list.append(idx)
    return idx_list

## Training and Evaluation Loop

In [17]:
def training_loop(model, dataset, train_idx, test_idx, batch_size, epochs, p1, p2, dropout, device, lr=0.001, seed=0, m=10):
    """
    Runs a single training loop based on given training and testing indices 
    """
    # Set random seeds 
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    # Set batch size and number of epochs 
    BATCH = batch_size
    NUM_EPOCHS = epochs 
    
    # Create training and testing datasets
    train_dataset = dataset[train_idx.tolist()]
    test_dataset = dataset[test_idx.tolist()]
    train_loader = torch.utils.data.DataLoader(train_dataset, sampler=torch.utils.data.RandomSampler(train_dataset, replacement=True, num_samples=int(len(train_dataset)*50/(len(train_dataset)/BATCH))), batch_size=BATCH, drop_last=False, collate_fn=Collater(follow_batch=[],exclude_keys=[]))
    test_loader = DataLoader(test_dataset, batch_size=BATCH)
    
    # Set up for contrastive learning
    loss_func = losses.SelfSupervisedLoss(losses.NTXentLoss())   # Specify contrastive loss function to use 
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)      # Optimizer for model to use 
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # Used to adjust learning rate while training 
    
    # CONTRASTIVE LEARNING: Train model on contrastive representation 
    contrastive_losses = []
    print("STARTING CONTRASTIVE LEARNING")
    for epoch in range(NUM_EPOCHS):
        if epoch % m == 0:
            start = time.time()

        lr = scheduler.optimizer.param_groups[0]['lr']
        train_loss = train_contrastive(model, train_loader, optimizer, loss_func, p1=p1, p2=p2)
        scheduler.step()
        test_loss = valid_contrastive(model, test_loader, loss_func, p1=p1, p2=p2)
        contrastive_losses.append(test_loss)

        if epoch % m == 0:
            print('Epoch: {:03d}, LR: {:7f}, Train Loss: {:.7f}, '
                'Val Loss: {:.7f}, Time: {:7f}'.format(
                    epoch, lr, train_loss, test_loss, time.time() - start), flush=True)
            
    # Set up for prediction 
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)      # Optimizer for model to use 
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # Used to adjust learning rate while training 
    
    # PREDICTION: Train model using contrastive representations to make predictions 
    prediction_losses = []
    print("\nSTARTING PREDICTION LEARNING")
    for epoch in range(NUM_EPOCHS):
        if epoch % m == 0:
            start = time.time()

        lr = scheduler.optimizer.param_groups[0]['lr']
        train_loss = train_prediction(model, train_loader, optimizer, dropout=dropout)
        scheduler.step()
        test_loss = valid_prediction(model, test_loader, dropout=dropout)
        prediction_losses.append(test_loss)

        if epoch % m == 0:
            print('Epoch: {:03d}, LR: {:7f}, Train Loss: {:.7f}, '
                'Val Loss: {:.7f}, Time: {:7f}'.format(
                    epoch, lr, train_loss, test_loss, time.time() - start), flush=True)
            
    # Test final accuracy of final model 
    test_acc = test_prediction(model, test_loader, dropout=dropout)
    print(f"\nFinal Prediction Accuracy: {test_acc}\n")
    
    return contrastive_losses, prediction_losses, test_acc

In [18]:
def evaluation_loop(model, dataset, splits, batch_size, epochs, p1, p2, dropout, device, lr=0.001, seed=0, m=10):
    # Train model on different splits, meant to evaluate model, not save best model
    contrastive_loss = []
    prediction_loss = []
    test_accuracies = []
    
    # Train a new model on every fold for evaluation
    for i, (train_idx, test_idx) in enumerate(splits): 
        print(f"Running Split {i}")
        model.reset_parameters()    # Resets upon every new fold 
        c_loss, p_loss, t_acc = training_loop(model, dataset, train_idx, test_idx, batch_size, epochs, p1, p2, dropout, device, lr, seed, m)
        contrastive_loss.append(torch.tensor(c_loss))
        prediction_loss.append(torch.tensor(p_loss))
        test_accuracies.append(torch.tensor(t_acc))
        
    # Calculate average contrastive loss and return best epoch for contrastive loss
    contrastive_loss = torch.stack(contrastive_loss, dim=0)
    contrastive_loss_mean = contrastive_loss.mean(dim=0)
    best_contrastive_epoch = contrastive_loss_mean.argmin().item()

    # Calculate average prediction loss and return best epoch for predictions 
    prediction_loss = torch.stack(prediction_loss, dim=0)
    prediction_loss_mean = prediction_loss.mean(dim=0)
    best_prediction_epoch = prediction_loss_mean.argmin().item()
    
    return (contrastive_loss, contrastive_loss_mean, best_contrastive_epoch), (prediction_loss, prediction_loss_mean, best_prediction_epoch), test_accuracies

## Run DropGNN + Contrastive Learning Model

### Train Model on IMDB-Binary

In [26]:
"""
SET UP PARAMETERS FOR TRAINING
"""

# Set random seeds
torch.manual_seed(0)
np.random.seed(0)

# Set batch size
BATCH = 32    # Default batch size in DropGNN

# Set number of epochs
NUM_EPOCHS = 100

# Set size of contrastive representation
NUM_REPS = 32

# Set node dropout probabilities 
p1 = 0.1
p2 = 0.2

# Set embedding dropout probabilities 
dropout = 0.5

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Device: {device}")

Device: cuda


In [27]:
"""
CREATE MODEL AND CREATE DATA SPLITS
"""

# Create model
model = DropGNN_Contrastive(num_features=dataset.num_features, num_reps=NUM_REPS, num_classes=dataset.num_classes, hidden_units=64).to(device)

# Split dataset
n = len(dataset)
splits = separate_data(n, seed=2)

In [19]:
# Run one test of training a single split 
con_loss, pred_loss, test_acc = training_loop(model, dataset, train_idx=splits[0][0], test_idx=splits[0][0], batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.2182636, Val Loss: 0.2284868, Time: 2.268660
Epoch: 010, LR: 0.001000, Train Loss: 0.1397976, Val Loss: 0.1764665, Time: 1.740617
Epoch: 020, LR: 0.001000, Train Loss: 0.1519121, Val Loss: 0.1772696, Time: 1.676606
Epoch: 030, LR: 0.001000, Train Loss: 0.1576299, Val Loss: 0.1751140, Time: 1.913610
Epoch: 040, LR: 0.001000, Train Loss: 0.1557956, Val Loss: 0.1684278, Time: 1.787359
Epoch: 050, LR: 0.000500, Train Loss: 0.1466705, Val Loss: 0.1639943, Time: 1.690199
Epoch: 060, LR: 0.000500, Train Loss: 0.1393766, Val Loss: 0.1641586, Time: 1.700798
Epoch: 070, LR: 0.000500, Train Loss: 0.1576357, Val Loss: 0.1643149, Time: 1.966390
Epoch: 080, LR: 0.000500, Train Loss: 0.1486617, Val Loss: 0.1641138, Time: 1.697750
Epoch: 090, LR: 0.000500, Train Loss: 0.1440432, Val Loss: 0.1634369, Time: 1.685822

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 5.1914301, Val Loss: 1.6985630, Time: 0.693039
Epoch

In [32]:
# Evaluate model performance over several different splits
eval_results = evaluation_loop(model, dataset, splits[:2], batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

Running Split 0
STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.8577212, Val Loss: 0.7581988, Time: 1.481414
Epoch: 010, LR: 0.001000, Train Loss: 0.7217305, Val Loss: 0.6846188, Time: 1.481297
Epoch: 020, LR: 0.001000, Train Loss: 0.7343596, Val Loss: 0.6867907, Time: 1.486436
Epoch: 030, LR: 0.001000, Train Loss: 0.7185254, Val Loss: 0.6825860, Time: 1.572199
Epoch: 040, LR: 0.001000, Train Loss: 0.7567702, Val Loss: 0.6926438, Time: 1.653671
Epoch: 050, LR: 0.000500, Train Loss: 0.7936304, Val Loss: 0.6865512, Time: 1.413609
Epoch: 060, LR: 0.000500, Train Loss: 0.7132576, Val Loss: 0.6893845, Time: 1.486974
Epoch: 070, LR: 0.000500, Train Loss: 0.6701815, Val Loss: 0.6822991, Time: 1.385326
Epoch: 080, LR: 0.000500, Train Loss: 0.7677331, Val Loss: 0.6827128, Time: 1.464864
Epoch: 090, LR: 0.000500, Train Loss: 0.7038253, Val Loss: 0.6834503, Time: 1.410454

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 4.7479936, Val Loss: 2.4119128, Time

### Train Model on IMDB-Multi

In [22]:
"""
SET UP PARAMETERS FOR TRAINING
"""

# Set random seeds
torch.manual_seed(0)
np.random.seed(0)

# Set batch size
BATCH = 32    # Default batch size in DropGNN

# Set number of epochs
NUM_EPOCHS = 100

# Set size of contrastive representation 
NUM_REPS = 64

# Set node dropout probabilities 
p1 = 0.1
p2 = 0.2

# Set embedding dropout probabilities 
dropout = 0.5

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Device: {device}")

Device: cuda


In [23]:
"""
CREATE MODEL AND CREATE DATA SPLITS
"""

# Create model
model2 = DropGNN_Contrastive(num_features=dataset2.num_features, num_reps=NUM_REPS, num_classes=dataset2.num_classes, hidden_units=64).to(device)

# Split dataset
n = len(dataset2)
splits = separate_data(n, seed=2)

In [21]:
# Run one test of training a single split 
con_loss2, pred_loss2, test_acc2 = training_loop(model2, dataset2, train_idx=splits[0][0], test_idx=splits[0][0], batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.8356679, Val Loss: 0.7995140, Time: 2.505042
Epoch: 010, LR: 0.001000, Train Loss: 0.7122133, Val Loss: 0.7579982, Time: 1.750991
Epoch: 020, LR: 0.001000, Train Loss: 0.6926598, Val Loss: 0.7495884, Time: 1.773495
Epoch: 030, LR: 0.001000, Train Loss: 0.7395013, Val Loss: 0.7509951, Time: 1.826457
Epoch: 040, LR: 0.001000, Train Loss: 0.7283752, Val Loss: 0.7460425, Time: 1.827289
Epoch: 050, LR: 0.000500, Train Loss: 0.6993063, Val Loss: 0.7454823, Time: 1.727684
Epoch: 060, LR: 0.000500, Train Loss: 0.7110714, Val Loss: 0.7454571, Time: 1.723190
Epoch: 070, LR: 0.000500, Train Loss: 0.7526240, Val Loss: 0.7442305, Time: 1.766463
Epoch: 080, LR: 0.000500, Train Loss: 0.7469680, Val Loss: 0.7427226, Time: 1.820799
Epoch: 090, LR: 0.000500, Train Loss: 0.7023168, Val Loss: 0.7445388, Time: 1.789928

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 7.6022769, Val Loss: 6.3104228, Time: 0.722889
Epoch