In [1]:
import torch
import pandas as pd
from utils import load_model, makedir, set_random_seed
from utils.data import load_data
from trainer import Trainer

%load_ext autoreload
%autoreload 2
set_random_seed(22)

# Pretraining Source Tasks

## Configuration

In [2]:
dataset = 'QM9'
tasks = ["mu","alpha","homo","lumo","gap","r2","zpve","u0","u298","h298","g298","cv"]
data_path = '../datasets/qm9/10000/'
model_type = 'GCN'
model_path = f"../saved_models/QM9/GCN/10000/"
makedir(model_path)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
results_dict = {'task':[]}

## Training

In [3]:
for task in tasks:
    print(task)
    train_loader, val_loader, test_loader, data_args = load_data(
        dataset=dataset,
        data_path=data_path,                  
        tasks=[task],
        device = device
    )
    model = load_model(n_tasks=1, device=device)
    trainer = Trainer(device=device,tasks=[task],
                      data_args=data_args,model_path=model_path,
                     )
    model, task_results_dict = trainer.fit(model, train_loader, 
                                      val_loader, test_loader)
    results_dict['task'].append(task)
    for metric in data_args['metrics']:
        if metric not in list(results_dict.keys()):
            results_dict.update({metric:[]})
        results_dict[metric].append(task_results_dict[metric][task])

mu
preprocessing data ...
8000 loaded!
preprocessing data ...
1000 loaded!
preprocessing data ...
1000 loaded!
[0] training loss:0.7203775395154953
val r2:0.39005630975442973
val mae:0.883161723613739
[20] training loss:0.40729767811298373
val r2:0.507736787682456
val mae:0.77555912733078
test r2:0.47638131819015084
test mae:0.7637039422988892
alpha
8000 loaded!
1000 loaded!
1000 loaded!
[0] training loss:0.474027630507946
val r2:0.5506941211694147
val mae:3.7718985080718994
[20] training loss:0.09711525401473045
val r2:0.817594793723914
val mae:2.8775346279144287
[40] training loss:0.06573056454211473
val r2:0.9092255853305639
val mae:1.9472790956497192
[60] training loss:0.04892896731197834
val r2:0.9188388181331558
val mae:1.8094267845153809
[80] training loss:0.04367459122091532
val r2:0.9206542642094493
val mae:1.8040122985839844
[100] training loss:0.03405665999650955
val r2:0.9153574269416989
val mae:1.8788572549819946
test r2:0.9030606229688243
test mae:1.7866597175598145
homo


In [4]:
result_path = model_path.replace('saved_models','results')
makedir(result_path)
pd.DataFrame(results_dict).to_csv(result_path+'results.csv', float_format='%.3f',
                                  index=False)
print(f"Results have been saved to {result_path+'results.csv'}")

Results have been saved to ../results/QM9/GCN/10000/results.csv


# Training Target Tasks

## Configuration

In [5]:
dataset = 'QM9'
tasks = ["mu","alpha","homo","lumo","gap","r2","zpve","u0","u298","h298","g298","cv"]
data_path = '../datasets/qm9/1000/'
model_type = 'GCN'
model_path = f"../saved_models/QM9/GCN/1000/"
makedir(model_path)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
results_dict = {'task':[]}

## Training

In [6]:
for task in tasks:
    print(task)
    train_loader, val_loader, test_loader, data_args = load_data(
        dataset=dataset,
        data_path=data_path,                  
        tasks=[task],
        device = device
    )
    model = load_model(n_tasks=1, device=device)
    trainer = Trainer(device=device,tasks=[task],
                      data_args=data_args,model_path=model_path,
                     )
    model, task_results_dict = trainer.fit(model, train_loader, 
                                      val_loader, test_loader)
    results_dict['task'].append(task)
    for metric in data_args['metrics']:
        if metric not in list(results_dict.keys()):
            results_dict.update({metric:[]})
        results_dict[metric].append(task_results_dict[metric][task])

mu
preprocessing data ...
800 loaded!
preprocessing data ...
100 loaded!
preprocessing data ...
1000 loaded!
[0] training loss:0.9282799148559571
val r2:0.18148887140579828
val mae:1.101701021194458
[20] training loss:0.3716514217853546
val r2:0.410117742243692
val mae:0.8942062258720398
test r2:0.3739219767889158
test mae:0.8489102125167847
alpha
800 loaded!
100 loaded!
1000 loaded!
[0] training loss:0.6377165973186493
val r2:0.46205235072804696
val mae:4.55453634262085
[20] training loss:0.22394257724285127
val r2:0.45978760018849885
val mae:3.9462292194366455
[40] training loss:0.13393680319190027
val r2:0.6002905675731194
val mae:3.772871732711792
test r2:0.5356036541517397
test mae:3.7989706993103027
homo
800 loaded!
100 loaded!
1000 loaded!
[0] training loss:0.9544387662410736
val r2:0.17597092671703107
val mae:0.015206742100417614
[20] training loss:0.35568213984370234
val r2:0.5340390614778645
val mae:0.012023153714835644
test r2:0.3831952288851873
test mae:0.012830333784222603

In [7]:
result_path = model_path.replace('saved_models','results')
makedir(result_path)
pd.DataFrame(results_dict).to_csv(result_path+'results.csv', float_format='%.3f',
                                  index=False)
print(f"Results have been saved to {result_path+'results.csv'}")

Results have been saved to ../results/QM9/GCN/1000/results.csv
