## Notebook to evaluate molecule graph regression models (individual, fused or ensemble)

In [20]:
"""
    IMPORTING LIBS
"""
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm

import yaml

In [33]:
import os
# path = os.path.abspath("eval_molecules_graph_regression.ipynb")
# os.chdir(path) # go to root folder of the project
# os.chdir("../")
os.chdir("gnn_benchmarking")
print(os.getcwd())

/home/ehoskovec/Workspace/ETH/master/sem3/deepLearning/Project/gnn_fusion/gnn_benchmarking


In [22]:
"""
    IMPORTING CUSTOM MODULES/METHODS
"""
from nets.molecules_graph_regression.load_net import gnn_model # import all GNNS
from data.data import LoadData # import dataset
import train.train_molecules_graph_regression as train

In [23]:
# """
#     AUTORELOAD IPYTHON EXTENSION FOR RELOADING IMPORTED MODULES
# """

def in_ipynb():
    try:
        cfg = get_ipython().config 
        return True
    except NameError:
        return False
    
notebook_mode = in_ipynb()
print(notebook_mode)

if notebook_mode == True:
    %load_ext autoreload
    %autoreload 2

True
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
"""
    GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device


Copy of params for testing purposes -> change to read from txt file from output

In [25]:
"""
     Loading variables from config file
"""
def loadConfigFile(config_file):
    # Read data from the YAML file
    with open(config_file, 'r') as file:
        loaded_data = yaml.safe_load(file)

    # Extract data from the loaded dictionary
    DATASET_NAME = loaded_data['Dataset']
    MODEL_NAME = loaded_data['Model']
    net_params = loaded_data['net_params']
    params = loaded_data['params']

    # Add device back 
    net_params['device'] = gpu_setup(net_params['gpu_id'] != -1, net_params['gpu_id'])

    return DATASET_NAME, MODEL_NAME, net_params, params

In [26]:
"""
    Load data set
"""
def LoadAndSplitDataset(DATASET_NAME,net_params,params):
    dataset = LoadData(DATASET_NAME)

    if MODEL_NAME in ['GCN', 'GAT']:
            if net_params['self_loop']:
                print("[!] Adding graph self-loops for GCN/GAT models (central node trick).")
                dataset._add_self_loops()
                
    if MODEL_NAME in ['GatedGCN']:
        if net_params['pos_enc']:
            print("[!] Adding graph positional encoding.")
            dataset._add_positional_encodings(net_params['pos_enc_dim'])
            print('Time PE:',time.time()-t0)

    trainset, valset, testset = dataset.train, dataset.val, dataset.test
    # batching exception for Diffpool
    drop_last = True if MODEL_NAME == 'DiffPool' else False

    if MODEL_NAME in ['RingGNN', '3WLGNN']:
            # import train functions specific for WLGNNs
            from train.train_molecules_graph_regression import train_epoch_dense as train_epoch, evaluate_network_dense as evaluate_network
            from functools import partial # util function to pass edge_feat to collate function

            train_loader = DataLoader(trainset, shuffle=True, collate_fn=partial(dataset.collate_dense_gnn, edge_feat=net_params['edge_feat']))
            val_loader = DataLoader(valset, shuffle=False, collate_fn=partial(dataset.collate_dense_gnn, edge_feat=net_params['edge_feat']))
            test_loader = DataLoader(testset, shuffle=False, collate_fn=partial(dataset.collate_dense_gnn, edge_feat=net_params['edge_feat']))
            
    else:
        # import train functions for all other GNNs
        from train.train_molecules_graph_regression import train_epoch_sparse as train_epoch, evaluate_network_sparse as evaluate_network
        
        train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, drop_last=drop_last, collate_fn=dataset.collate)
        val_loader = DataLoader(valset, batch_size=params['batch_size'], shuffle=False, drop_last=drop_last, collate_fn=dataset.collate)
        test_loader = DataLoader(testset, batch_size=params['batch_size'], shuffle=False, drop_last=drop_last, collate_fn=dataset.collate)

        return train_loader, val_loader, test_loader

In [27]:
"""
    IMPORTING MODELS
"""
def evalModel(model_file,MODEL_NAME, net_params, train_loader, val_loader, test_loader):
    model = gnn_model(MODEL_NAME, net_params)
    model.load_state_dict(torch.load(model_file))
    model.eval()

    _, test_mae = train.evaluate_network_sparse(model, net_params["device"], test_loader, 0)
    print("Train MAE: {:.4f}".format(test_mae))

In [36]:
def main():
    modelPath = "out/molecules_graph_regression/GCN_Test_0"
    DATASET_NAME, MODEL_NAME, net_params, params = loadConfigFile(modelPath + "/config.yaml")
    train_loader, val_loader, test_loader = LoadAndSplitDataset(DATASET_NAME,net_params,params)
    
    evalModel(modelPath + "/final.pkl", MODEL_NAME, net_params, train_loader, val_loader, test_loader)


main()

cuda not available
[I] Loading dataset ZINC...
train, test, val sizes : 10000 1000 1000
[I] Finished loading.
[I] Data load time: 18.0037s
Train MAE: 0.6608
