In [1]:
import numpy as np
import pandas as pd
from random import shuffle
import torch.nn as nn
from models.gcn import GCNNet
from utils import *
import itertools
import csv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def train(model, device, train_loader, optimizer, epoch, file_name):
  print('Training on {} samples...'.format(len(train_loader.dataset)))
  model.train()
  losses=[]
  for batch_idx, data in enumerate(train_loader):
    data = data.to(device)
    optimizer.zero_grad()
    output = model(data)
    loss = loss_fn(output, data.y.view(-1, 1).float().to(device))
    loss.backward()
    optimizer.step()
    if batch_idx % LOG_INTERVAL == 0:
      print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch,
                                                                      batch_idx * len(data.x),
                                                                      len(train_loader.dataset),
                                                                      100. * batch_idx / len(train_loader),
                                                                      loss.item()))
    losses.append(loss.item())
    # Write the loss values to a CSV file
  
    with open(file_name, 'a') as f:
      writer = csv.writer(f)
      writer.writerow([epoch] + losses)

In [3]:
def predicting(model, device, loader):
    model.eval()
    total_preds = torch.Tensor()
    total_labels = torch.Tensor()
    print('Make prediction for {} samples...'.format(len(loader.dataset)))
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            output = model(data)
            total_preds = torch.cat((total_preds, output.cpu()), 0)
            total_labels = torch.cat((total_labels, data.y.view(-1, 1).cpu()), 0)
    return total_labels.numpy().flatten(),total_preds.numpy().flatten()

## Hyperparameter Search 

In [4]:
# Define a range of hyperparameters to search over
learning_rates = [0.0005]
batch_sizes = [1024]


In [5]:
# Create a list of all possible hyperparameter combinations
hyperparameter_grid = list(itertools.product(
    learning_rates, batch_sizes))

In [6]:
# Loop over all hyperparameter combinations and train models
for hyperparameters in hyperparameter_grid:
    # Unpack the hyperparameters
    learning_rate, batch_size = hyperparameters

In [7]:
LOG_INTERVAL = 20
NUM_EPOCHS = 1000

In [8]:
# Select the dataset of interest
dataset = 'davis'

In [9]:
# access to the processed training data file
processed_data_file_train = 'data/processed/' + dataset + '_train.pt'
# access to the processed test data file
processed_data_file_test = 'data/processed/' + dataset + '_test.pt'
# train / test data
train_data = TestbedDataset(root='data', dataset=dataset+'_train')
test_data = TestbedDataset(root='data', dataset=dataset+'_test')

Pre-processed data found: data/processed/davis_train.pt, loading ...
Pre-processed data found: data/processed/davis_test.pt, loading ...


In [10]:
for hyperparameters in hyperparameter_grid:
    # Unpack the hyperparameters
    learning_rate, batch_size= hyperparameters
    print(learning_rate, batch_size)

    # Define the model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model =GCNNet().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Define your loss function and data loader
    loss_fn = nn.MSELoss()
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    # best mse, ci and epoch
    best_mse = 1000
    best_ci = 0
    best_epoch = -1

    # Set the files name
    model_file_name = 'model_' + str(learning_rate) + '_' + str(batch_size )+ '_'  +  '.model'
    best_result_file_name = 'best_result_' + str(learning_rate) + '_' + str(batch_size ) + '_' +  '.csv'
    result_file_name = 'result_' + str(learning_rate) + '_' + str(batch_size ) + '_' +  '.csv'
    losses_file_name = 'losses_' + str(learning_rate) + '_' + str(batch_size ) + '_' +  '.csv'

    # create a CSV file and write the header row
    with open(result_file_name, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['RMSE', 'MSE', 'Pearson', 'Spearman', 'CI', 'Learning rate', 'Batch size', 'Epoch'])
        
    for epoch in range(NUM_EPOCHS):
        train(model, device, train_loader, optimizer, epoch+1, losses_file_name)
        G,P = predicting(model, device, test_loader)
        ret = [rmse(G,P),mse(G,P),pearson(G,P),spearman(G,P),ci(G,P), learning_rate, batch_size, epoch]

        # append ret to CSV file
        with open(result_file_name, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(ret)
        
        # save the best results to CSV File
        if ret[1]<best_mse:
            torch.save(model.state_dict(), model_file_name)
            with open(best_result_file_name,'w') as f:
                f.write(','.join(map(str,ret)))
            best_epoch = epoch+1
            best_mse = ret[1]
            best_ci = ret[-1]
            print('rmse improved at epoch ', best_epoch, '; best_mse,best_ci:', best_mse,best_ci,dataset)        
        else:
            print(ret[1],'No improvement since epoch ', best_epoch, '; best_mse,best_ci:', best_mse,best_ci,dataset)

0.0005 1024
Training on 25046 samples...




Make prediction for 5010 samples...
rmse improved at epoch  1 ; best_mse,best_ci: 1.0649043 0 davis
Training on 25046 samples...
Make prediction for 5010 samples...
rmse improved at epoch  2 ; best_mse,best_ci: 0.748785 1 davis
Training on 25046 samples...
Make prediction for 5010 samples...
rmse improved at epoch  3 ; best_mse,best_ci: 0.7243942 2 davis
Training on 25046 samples...
Make prediction for 5010 samples...
rmse improved at epoch  4 ; best_mse,best_ci: 0.7187799 3 davis
Training on 25046 samples...
Make prediction for 5010 samples...
rmse improved at epoch  5 ; best_mse,best_ci: 0.67489153 4 davis
Training on 25046 samples...
Make prediction for 5010 samples...
0.7256512 No improvement since epoch  5 ; best_mse,best_ci: 0.67489153 4 davis
Training on 25046 samples...
Make prediction for 5010 samples...
rmse improved at epoch  7 ; best_mse,best_ci: 0.59316397 6 davis
Training on 25046 samples...
Make prediction for 5010 samples...
rmse improved at epoch  8 ; best_mse,best_ci: