# DropGNN + Contrastive Learning Prototyping

Work based off of DropGNN

https://arxiv.org/pdf/2111.06283.pdf 

https://github.com/KarolisMart/DropGNN 

In [1]:
import os.path as osp
import numpy as np
import networkx as nx
import time
import random
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold


# Torch Geometric 
try: 
    from torch_geometric.data import DataLoader, Data
    from torch_geometric.data.dataloader import Collater
    from torch_geometric.datasets import TUDataset
    from torch_geometric.utils import degree
    from torch_geometric.utils.convert import from_networkx
    from torch_geometric.nn import GINConv, GINEConv, global_add_pool
except ModuleNotFoundError: 
    !pip install torch_geometric
    from torch_geometric.data import DataLoader, Data
    from torch_geometric.data.dataloader import Collater
    from torch_geometric.datasets import TUDataset
    from torch_geometric.utils import degree
    from torch_geometric.utils.convert import from_networkx
    from torch_geometric.nn import GINConv, GINEConv, global_add_pool
    
# Pytorch Metric Learning
try: 
    from pytorch_metric_learning import losses
except ModuleNotFoundError:
    !pip install pytorch-metric-learning
    from pytorch_metric_learning import losses

  from .autonotebook import tqdm as notebook_tqdm


## Import Dataset

Use the IMDB-Binary dataset

In [2]:
class MyFilter(object):
    def __call__(self, data):
        return data.num_nodes <= 70

class MyPreTransform(object):
    def __call__(self, data):
        data.x = degree(data.edge_index[0], data.num_nodes, dtype=torch.long)
        data.x = F.one_hot(data.x, num_classes=69).to(torch.float)
        return data 

In [3]:
# Download data 
path = osp.join(osp.dirname(osp.realpath("./")), 'data', f'IMDB-BINARY')

dataset = TUDataset(
    path, 
    name = "IMDB-BINARY", 
    pre_transform = MyPreTransform(), 
    pre_filter = MyFilter()
)

In [4]:
print(dataset)

IMDB-BINARY(996)


Use the IMDB-Multi dataset

In [5]:
# Download data 
path = osp.join(osp.dirname(osp.realpath("./")), 'data', f'IMDB-MULTI')

dataset2 = TUDataset(
    path, 
    name = "IMDB-MULTI", 
    pre_transform = MyPreTransform(), 
    pre_filter = MyFilter()
)

In [6]:
print(dataset2)

IMDB-MULTI(1498)


## DropGNN + Contrastive Learning Model

DropGNN Model with Contrastive Learning Methods using SimCLR Contrastive Learning Framework

In [7]:
class DropGNN_Contrastive(nn.Module):
    def __init__(self, num_features, num_reps, num_classes, hidden_units, use_aux_loss=True):
        super(DropGNN_Contrastive, self).__init__()

        # Set starting parameters for model 
        self.num_features = num_features   # Number of initial features 
        self.num_reps = num_reps           # Number of features in representation vector 
        self.num_classes = num_classes     # Number of different classes
        self.dim = hidden_units            # Number of units for hidden layers
        self.use_aux_loss = use_aux_loss   # Whether to include aux loss to total loss
        
        self.device = device

        # Number of layers in model
        self.num_layers = 4

        self.convs = nn.ModuleList()        # Made of num_layers GINConv (linear -> batchnorm1d -> relu -> linear)
        self.bns = nn.ModuleList()          # Made of num_layers BatchNorm1d 
        self.reps = nn.ModuleList()         # Layer between base model and contrastive learning representation
        self.fcs = nn.ModuleList()          # Made of num_layers + 1 Linear layers mapping from num_features or dim to num_reps

        # Add initial layer from num_features to dim 
        self.convs.append(GINConv(nn.Sequential(nn.Linear(self.num_features, self.dim), nn.BatchNorm1d(self.dim), nn.ReLU(), nn.Linear(self.dim, self.dim))))
        self.bns.append(nn.BatchNorm1d(self.dim))
        self.reps.append(nn.Linear(self.num_features, self.num_reps))
        self.reps.append(nn.Linear(self.dim, self.num_reps))
        self.fcs.append(nn.Linear(self.num_features, self.num_classes))
        self.fcs.append(nn.Linear(self.dim, self.num_classes))

        # Add additional layers from dim to dim 
        for i in range(self.num_layers-1):
            self.convs.append(GINConv(nn.Sequential(nn.Linear(self.dim, self.dim), nn.BatchNorm1d(self.dim), nn.ReLU(), nn.Linear(self.dim, self.dim))))
            self.bns.append(nn.BatchNorm1d(self.dim))
            self.reps.append(nn.Linear(self.dim, self.num_reps))
            self.fcs.append(nn.Linear(self.dim, self.num_classes))

        # Use aux_loss for dropGNN: made of num_layers + 1 linear layers 
        # Adds new module list of linear layers 
        if self.use_aux_loss:
            self.aux_fcs = nn.ModuleList()
            self.aux_fcs.append(nn.Linear(self.num_features, self.num_classes))
            for i in range(self.num_layers):
                self.aux_fcs.append(nn.Linear(self.dim, self.num_classes))
        
    def reset_parameters(self):
        # Resets parameters for Linear, GINConv, and BatchNorm1d layers
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.reset_parameters()
            elif isinstance(m, GINConv):
                m.reset_parameters()
            elif isinstance(m, nn.BatchNorm1d):
                m.reset_parameters()
                
    def forward(self, data, mode="test", p=None, dropout=None, num_runs=20):
        # Runs different modes based on whether running contrastive loss or making predictions
        if mode == 'contrastive':
            return self.contrastive(data, p, num_runs)
        else:
            return self.prediction(data, p, dropout, num_runs)
        
    def contrastive(self, data, p, num_runs):
        # Trains contrastive model and representation vector model 
        
        # Note: num_runs in DropGNN is average number of nodes in each graph in dataset
        # Note: p is 2 * 1 / (1 + gamma), but for this project, p is selected to create augmented views 
        
        self.p = p
        self.num_runs = num_runs
        
        # Store all graphs in sampled batch as one large graph with separate components
        x = data.x                     # All nodes and their features (# nodes x # node features)
        edge_index = data.edge_index   # All edge index pairs from large single graph
        batch = data.batch             # Batch numbers that group nodes within the same graph with same batch number
        
        # Do runs in parallel by repeating nodes and creating num_runs different views
        x = x.unsqueeze(0).expand(self.num_runs, -1, -1).clone()   # Creates num_runs copy of node features
        drop = torch.bernoulli(torch.ones([x.size(0), x.size(1)], device=x.device) * self.p).bool()   #  Randomly determine whether node is dropped within each copy of num_runs
        x[drop] = torch.zeros([drop.sum().long().item(), x.size(-1)], device=x.device)  # Drop nodes from graphs  
        del drop
        
        # Allow gradients to update base model 
        if self.training:
            for layer in self.convs: 
                for p in layer.parameters():
                    p.requires_grad = True

            for layer in self.bns:
                for p in layer.parameters():
                    p.requires_grad = True
        
        # Run augmented subgraph through model 
        outs = [x]  # Used to store n-hop neighborhood representations, after running through model n times
        x = x.view(-1, x.size(-1))  # Concat all num_run copies of nodes 
        run_edge_index = edge_index.repeat(1, self.num_runs) + torch.arange(self.num_runs, device=edge_index.device).repeat_interleave(edge_index.size(1)) * (edge_index.max() + 1) # Transform edge_index to correspond to the same nodes in concatenated form  
        for i in range(self.num_layers):
            x = self.convs[i](x, run_edge_index)  # Run node features and edge indices through CONV layer 
            x = self.bns[i](x)  # Run resulting values through BatchNorm1d
            x = F.relu(x)   # Run final values through RELU
            outs.append(x.view(self.num_runs, -1, x.size(-1)))  # Return x back to original stacked form 
        del run_edge_index
        
        # Aggregates results of runs by taking mean of each run and summing results of runs
        out = None
        for i, x in enumerate(outs):
            x = x.mean(dim=0)                  # Take average of all node features of same nodes 
            x = global_add_pool(x, batch)      # Take the sum of all node features for nodes in same graph 
            x = self.reps[i](x)                # Run graph features into linear layer to get contrastive representation
            if out is None:
                out = x
            else:
                out += x
                
        # Returns all contrastive graph embeddings in batch 
        return out
    
    def prediction(self, data, p, dropout, num_runs):
        self.p = p
        self.dropout = dropout
        self.num_runs = num_runs
        
        # Create intermediate representations 
        x = data.x 
        edge_index = data.edge_index
        batch = data.batch 
        
        # Do runs in parallel, by repeating the graphs in the batch
        x = x.unsqueeze(0).expand(self.num_runs, -1, -1).clone()   # Flattens features and creates num_runs copy of them 
        drop = torch.bernoulli(torch.ones([x.size(0), x.size(1)], device=x.device) * self.p).bool()   #  Returns a tensor of randomly dropped nodes based on p (p = probability of dropping) 
        x[drop] = torch.zeros([drop.sum().long().item(), x.size(-1)], device=x.device)  # Drop nodes from data  
        del drop
        
        # Stop gradients from updating base model 
        for layer in self.convs:
            for p in layer.parameters():
                p.requires_grad = False
                
        for layer in self.bns:
            for p in layer.parameters():
                p.requires_grad = False
        
        # Run augmented subgraph through model 
        outs = [x]  # Used to store view of x after each layer 
        x = x.view(-1, x.size(-1))  # Swap dimensions of data features 
        run_edge_index = edge_index.repeat(1, self.num_runs) + torch.arange(self.num_runs, device=edge_index.device).repeat_interleave(edge_index.size(1)) * (edge_index.max() + 1) # Expand edge_index and augment values
        for i in range(self.num_layers):
            x = self.convs[i](x, run_edge_index)  # Run node features and edge indices through CONV layer 
            x = self.bns[i](x)  # Run resulting values through BatchNorm1d
            x = F.relu(x)   # Run final values through RELU
            outs.append(x.view(self.num_runs, -1, x.size(-1)))    # Rearrange dimensions and append to outs 
        del run_edge_index
        
        # Aggregates results of runs by summing mean and applying random dropout (not dropping out nodes)
        out = None
        for i, x in enumerate(outs):
            x = x.mean(dim=0)
            x = global_add_pool(x, batch)
            x = F.dropout(self.fcs[i](x), p=self.dropout, training=self.training)
            if out is None:
                out = x
            else:
                out += x
        
        # Returns the likelihood of each outcome class
        return F.log_softmax(out, dim=-1) 

## Training Modules

Training Functions to run Contrastive Learning and Downstream Task Prediction

### Contrastive Learning Training Modules

In [8]:
# Used to train contrastive model 
def train_contrastive(model, loader, optimizer, loss_fn, p1=0.1, p2=0.2, device=None):
    # Set model to training
    model.train()
    
    # Run data through model and update model
    loss_all = 0
    n = 0 
    for data in loader: 
        data = data.to(device)
        optimizer.zero_grad()
        embeddings_1 = model(data, mode = "contrastive", p = p1)
        embeddings_2 = model(data, mode = "contrastive", p = p2)
        
        # Used as loss(embeddings, labels)
        loss = loss_fn(embeddings_1, embeddings_2)
        loss.backward()
        optimizer.step() 
        
        loss_all += data.num_graphs * loss.item()
        n += data.num_graphs
    return loss_all / n

In [9]:
# Used to validate contrastive model 
def valid_contrastive(model, loader, loss_fn, p1=0.1, p2=0.2, device=None):
    # Set model to eval
    model.eval()
    
    with torch.no_grad():
        loss_all = 0
        n = 0
        for data in loader: 
            data = data.to(device)
            embeddings_1 = model(data, mode = "contrastive", p = p1)
            embeddings_2 = model(data, mode = "contrastive", p = p2)
            loss = loss_fn(embeddings_1, embeddings_2)
            
            loss_all += data.num_graphs * loss.item()
            n += data.num_graphs
    return loss_all / n

### Prediction Training Modules

In [10]:
# Used to train prediction model AFTER contrastive learning 
def train_prediction(model, loader, optimizer, p=0.1, dropout=0.5, device=None):
    # Set model to training
    model.train()
    
    # Run data through model and update model 
    loss_all = 0
    n = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        results = model(data, mode = "prediction", p = p, dropout = dropout)
        loss = F.nll_loss(results, data.y)
    
        loss.backward()
        loss_all += data.num_graphs * loss.item()
        n += len(data.y)
        optimizer.step()

    return loss_all / n

In [11]:
# Used to validate prediction model AFTER contrastive learning, returns loss 
def valid_prediction(model, loader, p=0.1, dropout=0.5, device=None):
    # Set model to eval
    model.eval()
    
    # Run data through model
    with torch.no_grad():
        loss_all = 0
        n = 0
        for data in loader:
            data = data.to(device)
            results = model(data, mode = "prediction", p = p, dropout = dropout)
            loss = F.nll_loss(results, data.y)
                
            loss_all += data.num_graphs * loss.item()
            n += len(data.y)

    return loss_all / n

In [12]:
# Used to test prediction model AFTER contrastive learning, returns accuracy
def test_prediction(model, loader, p=0.1, dropout=0.5, device=None):
    # Set model to eval
    model.eval() 
    
    # Run data through model and make predictions
    with torch.no_grad():
        correct = 0
        for data in loader: 
            data = data.to(device)
            results = model(data, mode = "prediction", p = p, dropout = dropout)
            pred = results.max(1)[1]
            correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)

## Split Dataset into K-Folds

In [13]:
def separate_data(dataset_len, seed=0, n_splits=10):
    folds = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    idx_list = []
    for idx in folds.split(np.zeros(dataset_len), np.zeros(dataset_len)):
        idx_list.append(idx)
    return idx_list

## Training and Evaluation Loop

In [14]:
with open("hello.txt", "a") as f: 
    print("Hello", file=f)

In [15]:
def training_loop(model, dataset, train_idx, test_idx, batch_size, epochs, p1, p2, p, dropout, device, lr=0.001, seed=0, m=10, filename=None):
    """
    Runs a single training loop based on given training and testing indices 
    """
    # Set random seeds 
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    # Set batch size and number of epochs 
    BATCH = batch_size
    NUM_EPOCHS = epochs 
    LR = lr
    
    # Create training and testing datasets
    train_dataset = dataset[train_idx.tolist()]
    test_dataset = dataset[test_idx.tolist()]
    train_loader = torch.utils.data.DataLoader(train_dataset, sampler=torch.utils.data.RandomSampler(train_dataset, replacement=True, num_samples=int(len(train_dataset)*50/(len(train_dataset)/BATCH))), batch_size=BATCH, drop_last=False, collate_fn=Collater(follow_batch=[],exclude_keys=[]))
    test_loader = DataLoader(test_dataset, batch_size=BATCH)
    
    # Set up for contrastive learning
    loss_func = losses.SelfSupervisedLoss(losses.NTXentLoss())   # Specify contrastive loss function to use 
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)      # Optimizer for model to use 
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # Used to adjust learning rate while training 
    
    # CONTRASTIVE LEARNING: Train model on contrastive representation 
    print("STARTING CONTRASTIVE LEARNING")
    if filename != None:
        with open(filename, "a") as f: 
            print("STARTING CONTRASTIVE LEARNING", file=f)
    
    contrastive_losses = []
    for epoch in range(NUM_EPOCHS):
        if epoch % m == 0:
            start = time.time()

        lr = scheduler.optimizer.param_groups[0]['lr']
        train_loss = train_contrastive(model, train_loader, optimizer, loss_func, p1=p1, p2=p2, device=device)
        scheduler.step()
        test_loss = valid_contrastive(model, test_loader, loss_func, p1=p1, p2=p2, device=device)
        contrastive_losses.append(test_loss)

        if epoch % m == 0:
            print('Epoch: {:03d}, LR: {:7f}, Train Loss: {:.7f}, '
                'Val Loss: {:.7f}, Time: {:7f}'.format(
                    epoch, lr, train_loss, test_loss, time.time() - start), flush=True)
            if filename != None:
                with open(filename, "a") as f: 
                    print('Epoch: {:03d}, LR: {:7f}, Train Loss: {:.7f}, '
                        'Val Loss: {:.7f}, Time: {:7f}'.format(
                            epoch, lr, train_loss, test_loss, time.time() - start), flush=True, file=f)
            
    # Set up for prediction 
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)      # Optimizer for model to use 
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # Used to adjust learning rate while training 
    
    # PREDICTION: Train model using contrastive representations to make predictions 
    print("\nSTARTING PREDICTION LEARNING")
    if filename != None: 
        with open(filename, "a") as f: 
            print("\nSTARTING PREDICTION LEARNING", file=f)
    
    prediction_losses = []
    for epoch in range(NUM_EPOCHS):
        if epoch % m == 0:
            start = time.time()

        lr = scheduler.optimizer.param_groups[0]['lr']
        train_loss = train_prediction(model, train_loader, optimizer, p=p, dropout=dropout, device=device)
        scheduler.step()
        test_loss = valid_prediction(model, test_loader, p=p, dropout=dropout, device=device)
        prediction_losses.append(test_loss)

        if epoch % m == 0:
            print('Epoch: {:03d}, LR: {:7f}, Train Loss: {:.7f}, '
                'Val Loss: {:.7f}, Time: {:7f}'.format(
                    epoch, lr, train_loss, test_loss, time.time() - start), flush=True)
            if filename != None:
                with open(filename, "a") as f: 
                    print('Epoch: {:03d}, LR: {:7f}, Train Loss: {:.7f}, '
                        'Val Loss: {:.7f}, Time: {:7f}'.format(
                            epoch, lr, train_loss, test_loss, time.time() - start), flush=True, file=f)
            
    # Test final accuracy of final model 
    test_acc = test_prediction(model, test_loader, dropout=dropout, device=device)
    print(f"\nFinal Prediction Accuracy: {test_acc}\n")
    if filename != None: 
        with open(filename, "a") as f: 
            print(f"\nFinal Prediction Accuracy: {test_acc}\n", file=f)
    
    return contrastive_losses, prediction_losses, test_acc

In [16]:
def evaluation_loop(model, dataset, splits, batch_size, epochs, p1, p2, p, dropout, device, lr=0.001, seed=0, m=10, filename=None):
    # Train model on different splits, meant to evaluate model, not save best model
    contrastive_loss = []
    prediction_loss = []
    test_accuracies = []
    
    # Train a new model on every fold for evaluation
    for i, (train_idx, test_idx) in enumerate(splits): 
        print(f"Running Split {i}")
        if filename != None: 
            with open(filename, "a") as f: 
                print(f"Running Split {i}", file=f)
        
        model.reset_parameters()    # Resets upon every new fold 
        c_loss, p_loss, t_acc = training_loop(model, dataset, train_idx, test_idx, batch_size, epochs, p1, p2, p, dropout, device, lr, seed, m, filename)
        contrastive_loss.append(torch.tensor(c_loss))
        prediction_loss.append(torch.tensor(p_loss))
        test_accuracies.append(t_acc)
        
    # Calculate average contrastive loss and return best epoch for contrastive loss
    contrastive_loss = torch.stack(contrastive_loss, dim=0)
    contrastive_loss_mean = contrastive_loss.mean(dim=0)
    best_contrastive_epoch = contrastive_loss_mean.argmin().item()

    # Calculate average prediction loss and return best epoch for predictions 
    prediction_loss = torch.stack(prediction_loss, dim=0)
    prediction_loss_mean = prediction_loss.mean(dim=0)
    best_prediction_epoch = prediction_loss_mean.argmin().item()
    
    # Print average final prediction accuracy
    test_accuracies = torch.tensor(test_accuracies)
    print(f"Average Test Accuracy: {test_accuracies.mean()}")
    if filename != None: 
        with open(filename, "a") as f: 
            print(f"Average Test Accuracy: {test_accuracies.mean()}", file=f)
    
    return (contrastive_loss, contrastive_loss_mean, best_contrastive_epoch), (prediction_loss, prediction_loss_mean, best_prediction_epoch), test_accuracies

## Run DropGNN + Contrastive Learning Model

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [18]:
num_reps = [16, 32, 64]
p1 = [0.1, 0.4, 0.7]
p2 = [0.25, 0.55, 0.85]
p = [0.0, 0.1, 0.5]

In [None]:
i = 1
for n in num_reps:
    for p_1 in p1: 
        for p_2 in p2: 
            for p_i in p:
                filename = f"results/test_{i}.txt"
                
                with open(filename, "a") as f: 
                    print(f"num_reps: {n}\np1: {p_1}\np2: {p_2}\np: {p_i}\n", file=f)
                
                model_cfg = {
                    "num_features": dataset.num_features, 
                    "num_classes": dataset.num_classes, 
                    "num_reps": n, 
                    "hidden_units": 64
                }
                
                train_cfg = {
                    "model": DropGNN_Contrastive(**model_cfg).to(device),
                    "dataset": dataset, 
                    "splits": separate_data(len(dataset), seed=0, n_splits=5),
                    "batch_size": 32,
                    "epochs": 101, 
                    "p1": p_1, 
                    "p2": p_2, 
                    "p": p_i,
                    "dropout": 0.1, 
                    "device": device, 
                    "lr": 0.001, 
                    "seed": 0, 
                    "m": 10, 
                    "filename": filename
                }
                
                eval_results = evaluation_loop(**train_cfg)
                
                result_filename = f"results/test_{i}_metrics.txt"
                with open(result_filename, "a") as f: 
                    print(eval_results, file=f)
                
                i += 1

### Config Version of Running Evaluation Loop

In [23]:
# Configurations
model_cfg = {
    "num_features": dataset.num_features, 
    "num_classes": dataset.num_classes, 
    "num_reps": 32, 
    "hidden_units": 64
}

train_cfg = {
    "model": DropGNN_Contrastive(**model_cfg).to(device),
    "dataset": dataset, 
    "splits": separate_data(len(dataset), seed=0, n_splits=5),
    "batch_size": 32,
    "epochs": 101, 
    "p1": 0.1, 
    "p2": 0.2, 
    "p": 0.0,
    "dropout": 0.1, 
    "device": device, 
    "lr": 0.001, 
    "seed": 0, 
    "m": 10, 
    "filename": "test_1_results.txt"
}

In [24]:
eval_results = evaluation_loop(**train_cfg)

Running Split 0
STARTING CONTRASTIVE LEARNING


KeyboardInterrupt: 

### Train Model on IMDB-Binary

In [26]:
"""
SET UP PARAMETERS FOR TRAINING
"""

# Set random seeds
torch.manual_seed(0)
np.random.seed(0)

# Set batch size
BATCH = 32    # Default batch size in DropGNN

# Set number of epochs
NUM_EPOCHS = 100

# Set size of contrastive representation
NUM_REPS = 32

# Set node dropout probabilities 
p1 = 0.1
p2 = 0.2

# Set embedding dropout probabilities 
dropout = 0.5

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Device: {device}")

Device: cuda


In [27]:
"""
CREATE MODEL AND CREATE DATA SPLITS
"""

# Create model
model = DropGNN_Contrastive(num_features=dataset.num_features, num_reps=NUM_REPS, num_classes=dataset.num_classes, hidden_units=64).to(device)

# Split dataset
n = len(dataset)
splits = separate_data(n, seed=2)

In [19]:
# Run one test of training a single split 
con_loss, pred_loss, test_acc = training_loop(model, dataset, train_idx=splits[0][0], test_idx=splits[0][0], batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.2182636, Val Loss: 0.2284868, Time: 2.268660
Epoch: 010, LR: 0.001000, Train Loss: 0.1397976, Val Loss: 0.1764665, Time: 1.740617
Epoch: 020, LR: 0.001000, Train Loss: 0.1519121, Val Loss: 0.1772696, Time: 1.676606
Epoch: 030, LR: 0.001000, Train Loss: 0.1576299, Val Loss: 0.1751140, Time: 1.913610
Epoch: 040, LR: 0.001000, Train Loss: 0.1557956, Val Loss: 0.1684278, Time: 1.787359
Epoch: 050, LR: 0.000500, Train Loss: 0.1466705, Val Loss: 0.1639943, Time: 1.690199
Epoch: 060, LR: 0.000500, Train Loss: 0.1393766, Val Loss: 0.1641586, Time: 1.700798
Epoch: 070, LR: 0.000500, Train Loss: 0.1576357, Val Loss: 0.1643149, Time: 1.966390
Epoch: 080, LR: 0.000500, Train Loss: 0.1486617, Val Loss: 0.1641138, Time: 1.697750
Epoch: 090, LR: 0.000500, Train Loss: 0.1440432, Val Loss: 0.1634369, Time: 1.685822

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 5.1914301, Val Loss: 1.6985630, Time: 0.693039
Epoch

In [32]:
# Evaluate model performance over several different splits
eval_results = evaluation_loop(model, dataset, splits[:2], batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

Running Split 0
STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.8577212, Val Loss: 0.7581988, Time: 1.481414
Epoch: 010, LR: 0.001000, Train Loss: 0.7217305, Val Loss: 0.6846188, Time: 1.481297
Epoch: 020, LR: 0.001000, Train Loss: 0.7343596, Val Loss: 0.6867907, Time: 1.486436
Epoch: 030, LR: 0.001000, Train Loss: 0.7185254, Val Loss: 0.6825860, Time: 1.572199
Epoch: 040, LR: 0.001000, Train Loss: 0.7567702, Val Loss: 0.6926438, Time: 1.653671
Epoch: 050, LR: 0.000500, Train Loss: 0.7936304, Val Loss: 0.6865512, Time: 1.413609
Epoch: 060, LR: 0.000500, Train Loss: 0.7132576, Val Loss: 0.6893845, Time: 1.486974
Epoch: 070, LR: 0.000500, Train Loss: 0.6701815, Val Loss: 0.6822991, Time: 1.385326
Epoch: 080, LR: 0.000500, Train Loss: 0.7677331, Val Loss: 0.6827128, Time: 1.464864
Epoch: 090, LR: 0.000500, Train Loss: 0.7038253, Val Loss: 0.6834503, Time: 1.410454

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 4.7479936, Val Loss: 2.4119128, Time

### Train Model on IMDB-Multi

#### Setup 1

In [32]:
"""
SET UP PARAMETERS FOR TRAINING
"""

# Set random seeds
torch.manual_seed(0)
np.random.seed(0)

# Set batch size
BATCH = 32 

# Set number of epochs
NUM_EPOCHS = 100

# Set size of contrastive representation 
NUM_REPS = 16

# Set node dropout probabilities 
p1 = 0.1
p2 = 0.2

# Set embedding dropout probabilities 
dropout = 0.1

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Device: {device}")

Device: cuda


In [33]:
"""
CREATE MODEL AND CREATE DATA SPLITS
"""

# Create model
model2 = DropGNN_Contrastive(num_features=dataset2.num_features, num_reps=NUM_REPS, num_classes=dataset2.num_classes, hidden_units=64).to(device)

# Split dataset
n = len(dataset2)
splits = separate_data(n, seed=2)

In [34]:
# Run one test of training a single split 
con_loss2, pred_loss2, test_acc2 = training_loop(model3, dataset2, train_idx=splits[0][0], test_idx=splits[0][0], batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.0001, seed=0, m=10)

STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.000100, Train Loss: 0.7584886, Val Loss: 0.7487564, Time: 1.957749
Epoch: 010, LR: 0.000100, Train Loss: 0.7066865, Val Loss: 0.7465795, Time: 1.852912
Epoch: 020, LR: 0.000100, Train Loss: 0.6911866, Val Loss: 0.7458557, Time: 1.865959
Epoch: 030, LR: 0.000100, Train Loss: 0.7368738, Val Loss: 0.7445558, Time: 1.787293
Epoch: 040, LR: 0.000100, Train Loss: 0.7276498, Val Loss: 0.7442093, Time: 1.820059
Epoch: 050, LR: 0.000050, Train Loss: 0.6991768, Val Loss: 0.7453978, Time: 1.811658
Epoch: 060, LR: 0.000050, Train Loss: 0.7113082, Val Loss: 0.7463256, Time: 1.921042
Epoch: 070, LR: 0.000050, Train Loss: 0.7526180, Val Loss: 0.7451442, Time: 1.835454
Epoch: 080, LR: 0.000050, Train Loss: 0.7475584, Val Loss: 0.7442139, Time: 1.859795
Epoch: 090, LR: 0.000050, Train Loss: 0.7021159, Val Loss: 0.7449162, Time: 1.901287

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000050, Train Loss: 8.4479933, Val Loss: 12.6193943, Time: 0.735506
Epoc

In [35]:
# Evaluate model performance over several different splits
eval_results2 = evaluation_loop(model2, dataset2, splits, batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

Running Split 0
STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.8516927, Val Loss: 1.0220096, Time: 1.504766
Epoch: 010, LR: 0.001000, Train Loss: 0.7176018, Val Loss: 0.9342155, Time: 1.523161
Epoch: 020, LR: 0.001000, Train Loss: 0.6961899, Val Loss: 0.9312773, Time: 1.456449
Epoch: 030, LR: 0.001000, Train Loss: 0.7388766, Val Loss: 0.9270426, Time: 1.490390
Epoch: 040, LR: 0.001000, Train Loss: 0.7304915, Val Loss: 0.9297729, Time: 1.499190
Epoch: 050, LR: 0.000500, Train Loss: 0.7004330, Val Loss: 0.9276716, Time: 1.487565
Epoch: 060, LR: 0.000500, Train Loss: 0.7128414, Val Loss: 0.9277818, Time: 1.422486
Epoch: 070, LR: 0.000500, Train Loss: 0.7548429, Val Loss: 0.9268764, Time: 1.515224
Epoch: 080, LR: 0.000500, Train Loss: 0.7491760, Val Loss: 0.9295364, Time: 1.448396
Epoch: 090, LR: 0.000500, Train Loss: 0.7024541, Val Loss: 0.9300131, Time: 1.444923

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 6.0981954, Val Loss: 2.5190493, Time


STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 4.4439562, Val Loss: 3.6416358, Time: 0.560070
Epoch: 010, LR: 0.000500, Train Loss: 1.7960813, Val Loss: 1.4366892, Time: 0.493342
Epoch: 020, LR: 0.000500, Train Loss: 1.4452968, Val Loss: 1.3967404, Time: 0.485430
Epoch: 030, LR: 0.000500, Train Loss: 1.3525715, Val Loss: 1.1188027, Time: 0.486752
Epoch: 040, LR: 0.000500, Train Loss: 1.2217340, Val Loss: 1.2162709, Time: 0.505911
Epoch: 050, LR: 0.000250, Train Loss: 1.1521142, Val Loss: 1.1418914, Time: 0.492802
Epoch: 060, LR: 0.000250, Train Loss: 1.1448525, Val Loss: 1.0148370, Time: 0.526661
Epoch: 070, LR: 0.000250, Train Loss: 1.0717741, Val Loss: 1.0999534, Time: 0.488104
Epoch: 080, LR: 0.000250, Train Loss: 1.0682999, Val Loss: 1.0423398, Time: 0.504081
Epoch: 090, LR: 0.000250, Train Loss: 1.0523413, Val Loss: 1.0168620, Time: 0.495064

Final Prediction Accuracy: 0.5

Running Split 5
STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.8

Epoch: 000, LR: 0.001000, Train Loss: 0.8281127, Val Loss: 0.7741972, Time: 1.711579
Epoch: 010, LR: 0.001000, Train Loss: 0.7314454, Val Loss: 0.7060843, Time: 1.421336
Epoch: 020, LR: 0.001000, Train Loss: 0.6992294, Val Loss: 0.6987880, Time: 1.534797
Epoch: 030, LR: 0.001000, Train Loss: 0.7761878, Val Loss: 0.6999398, Time: 1.552006
Epoch: 040, LR: 0.001000, Train Loss: 0.7344553, Val Loss: 0.6937445, Time: 1.438837
Epoch: 050, LR: 0.000500, Train Loss: 0.7263060, Val Loss: 0.6869210, Time: 1.409423
Epoch: 060, LR: 0.000500, Train Loss: 0.7926921, Val Loss: 0.6839669, Time: 1.555288
Epoch: 070, LR: 0.000500, Train Loss: 0.7174751, Val Loss: 0.6887956, Time: 1.487142
Epoch: 080, LR: 0.000500, Train Loss: 0.7208485, Val Loss: 0.6832994, Time: 1.479109
Epoch: 090, LR: 0.000500, Train Loss: 0.7644253, Val Loss: 0.6905379, Time: 1.415560

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 5.1084681, Val Loss: 7.1146407, Time: 0.487374
Epoch: 010, LR: 0.000500, Train Los

#### Setup 2

In [22]:
"""
SET UP PARAMETERS FOR TRAINING
"""

# Set random seeds
torch.manual_seed(0)
np.random.seed(0)

# Set batch size
BATCH = 32 

# Set number of epochs
NUM_EPOCHS = 100

# Set size of contrastive representation 
NUM_REPS = 64

# Set node dropout probabilities 
p1 = 0.1
p2 = 0.2

# Set embedding dropout probabilities 
dropout = 0.5

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Device: {device}")

Device: cuda


In [23]:
"""
CREATE MODEL AND CREATE DATA SPLITS
"""

# Create model
model3 = DropGNN_Contrastive(num_features=dataset2.num_features, num_reps=NUM_REPS, num_classes=dataset2.num_classes, hidden_units=64).to(device)

# Split dataset
n = len(dataset2)
splits = separate_data(n, seed=2)

In [21]:
# Run one test of training a single split 
con_loss3, pred_loss3, test_acc3 = training_loop(model3, dataset2, train_idx=splits[0][0], test_idx=splits[0][0], batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.8356679, Val Loss: 0.7995140, Time: 2.505042
Epoch: 010, LR: 0.001000, Train Loss: 0.7122133, Val Loss: 0.7579982, Time: 1.750991
Epoch: 020, LR: 0.001000, Train Loss: 0.6926598, Val Loss: 0.7495884, Time: 1.773495
Epoch: 030, LR: 0.001000, Train Loss: 0.7395013, Val Loss: 0.7509951, Time: 1.826457
Epoch: 040, LR: 0.001000, Train Loss: 0.7283752, Val Loss: 0.7460425, Time: 1.827289
Epoch: 050, LR: 0.000500, Train Loss: 0.6993063, Val Loss: 0.7454823, Time: 1.727684
Epoch: 060, LR: 0.000500, Train Loss: 0.7110714, Val Loss: 0.7454571, Time: 1.723190
Epoch: 070, LR: 0.000500, Train Loss: 0.7526240, Val Loss: 0.7442305, Time: 1.766463
Epoch: 080, LR: 0.000500, Train Loss: 0.7469680, Val Loss: 0.7427226, Time: 1.820799
Epoch: 090, LR: 0.000500, Train Loss: 0.7023168, Val Loss: 0.7445388, Time: 1.789928

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 7.6022769, Val Loss: 6.3104228, Time: 0.722889
Epoch

In [25]:
# Evaluate model performance over several different splits
eval_results3 = evaluation_loop(model3, dataset2, splits, batch_size=BATCH, epochs=NUM_EPOCHS, p1=p1, p2=p2, dropout=dropout, device=device, lr=0.001, seed=0, m=10)

Running Split 0
STARTING CONTRASTIVE LEARNING
Epoch: 000, LR: 0.001000, Train Loss: 0.8550695, Val Loss: 1.0046302, Time: 1.738496
Epoch: 010, LR: 0.001000, Train Loss: 0.7147704, Val Loss: 0.9486681, Time: 1.495854
Epoch: 020, LR: 0.001000, Train Loss: 0.6942847, Val Loss: 0.9317470, Time: 1.673090
Epoch: 030, LR: 0.001000, Train Loss: 0.7363416, Val Loss: 0.9277910, Time: 1.394256
Epoch: 040, LR: 0.001000, Train Loss: 0.7297152, Val Loss: 0.9269102, Time: 1.454574
Epoch: 050, LR: 0.000500, Train Loss: 0.6986513, Val Loss: 0.9283576, Time: 1.497100
Epoch: 060, LR: 0.000500, Train Loss: 0.7117866, Val Loss: 0.9265751, Time: 1.444127
Epoch: 070, LR: 0.000500, Train Loss: 0.7534909, Val Loss: 0.9275482, Time: 1.470955
Epoch: 080, LR: 0.000500, Train Loss: 0.7473867, Val Loss: 0.9258637, Time: 1.499874
Epoch: 090, LR: 0.000500, Train Loss: 0.7008366, Val Loss: 0.9258947, Time: 1.551216

STARTING PREDICTION LEARNING
Epoch: 000, LR: 0.000500, Train Loss: 8.0150059, Val Loss: 3.9858280, Time