# Train Pretrained Vertical Model for oral bioavailability prediction

1. This notebook used the verticalGNN model pretrained with high similarity solubility dataset
2. Ensure that the notebook, data folder, config.py, engine.py, model.py and utils.py are in the same directory. 
3. Whole process can be repeated by running the cells in order
4. Otherwise, download the saved models as described in the README.md from google drive, comment the run_training line to get the results in the paper.

In [1]:
# import all required materials

import torch
import numpy as np
from torch_geometric.loader import DataLoader
from sklearn.model_selection import KFold
import os

from model import VerticalGNN
from config import NUM_FEATURES, NUM_TARGET, EDGE_DIM, DEVICE, SEED_NO, PATIENCE, EPOCHS, NUM_GRAPHS_PER_BATCH, N_SPLITS, best_params_vertical
from engine import EngineSol, EngineHOB
from utils import seed_everything, LoadHOBDataset, LoadSolDataset

First, we define a train and test function that can aid us in transfer learning. Then, we evaluate how different factors can affect the results of transfer learning. 

In [11]:
def run_training(method_tf, train_loader, valid_loader, params,es_trigger, path_to_pretrained_model, path_to_save_trained_model):
    
    '''
    Define a function to wrap training

    Args:
    method_tf (str): freeze --> freeze parameters of feature extraction block, fine_tune_5x -> fine tune at 5x slower learning rate, 
                    fine_tune_10x -> fine tune at 10x slower learning rate
    train_loader: DataLoader class from pytorch geometric containing train data
    valid_loader: DataLoader class from pytorch geometric containing validation data
    params (dict): dictionary containing the hyperparameters
    es_trigger (int): a number to force train model before triggering early stopping mechanism 
    path_to_pretrained_model (str): path to load the pretrained models
    path_to_save_trained_model: path to save the trained models

    Return:
    best loss: return best validation loss
    '''
    
    model = VerticalGNN(
            num_features=NUM_FEATURES,
            num_targets=NUM_TARGET,
            num_gin_layers=params["num_gin_layers"],
            num_graph_trans_layers=params["num_graph_trans_layers"],
            hidden_size=params["hidden_size"],
            n_heads=params["n_heads"],
            dropout=params["dropout"],
            edge_dim=EDGE_DIM,
        )

    model.load_state_dict(torch.load(path_to_pretrained_model))  
    model.to(DEVICE)
    if method_tf == 'freeze':
        for param in model.gin_model.parameters():
            param.requires_grad=False
        for param in model.graph_trans_model.parameters():
            param.requires_grad=False

        optimizer=torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = params['learning_rate'])
    
    elif method_tf == 'fine_tune_5x':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']/5},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']/5},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])

    elif method_tf == 'fine_tune_10x':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']/10},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']/10},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])

    elif method_tf == 'fine_tune':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])
        
    eng = EngineHOB(model, optimizer, device=DEVICE)

    best_loss = np.inf
    early_stopping_iter = PATIENCE
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss, acc_score, f1, roc_auc = eng.validate(valid_loader)
        print(
            f"Epoch: {epoch+1}/{EPOCHS}, train loss : {train_loss}, validation loss : {valid_loss}"
        )
        if epoch+1>es_trigger:
            if valid_loss < best_loss:
                best_loss = valid_loss
                early_stopping_counter = 0  # reset counter
                print("Saving model...")
                torch.save(model.state_dict(), path_to_save_trained_model)
            else:
                early_stopping_counter += 1

            if early_stopping_counter > early_stopping_iter:
                print("Early stopping...")
                break
            print(f"Early stop counter: {early_stopping_counter}")

    return best_loss



In [3]:
def run_validation(method_tf, valid_loader, params, path_to_trained_model):
    model = VerticalGNN(
            num_features=NUM_FEATURES,
            num_targets=NUM_TARGET,
            num_gin_layers=params["num_gin_layers"],
            num_graph_trans_layers=params["num_graph_trans_layers"],
            hidden_size=params["hidden_size"],
            n_heads=params["n_heads"],
            dropout=params["dropout"],
            edge_dim=EDGE_DIM,
        )

    model.load_state_dict(torch.load(path_to_trained_model))  
    model.to(DEVICE)
    if method_tf == 'freeze':
        for param in model.gin_model.parameters():
            param.requires_grad=False
        for param in model.graph_trans_model.parameters():
            param.requires_grad=False

        optimizer=torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = params['learning_rate'])        
    
    elif method_tf == 'fine_tune_5x':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']/5},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']/5},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])
        

    elif method_tf == 'fine_tune_10x':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']/10},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']/10},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])

    elif method_tf == 'fine_tune':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])
        

    eng = EngineHOB(model, optimizer, device=DEVICE)
    bce, acc, f1, roc_auc = eng.validate(valid_loader)
    print(f"bce:{bce}, acc :{acc}, f1: {f1}, roc_auc: {roc_auc}")
    return bce, acc, f1, roc_auc

In [4]:
def run_testing(method_tf, test_loader, params, path_to_trained_model):
    
    '''
    Define a function to wrap testing

    Args:
    method_tf (str): freeze --> freeze parameters of feature extraction block, fine_tune_5x -> fine tune at 5x slower learning rate, 
                    fine_tune_10x -> fine tune at 10x slower learning rate
    test_loader: DataLoader class from pytorch geometric containing test data
    params (dict): dictionary containing the hyperparameters
    path_to_save_trained_model: path to load the saved trained models

    Return:
    bce: logloss from pytorch geometric 
    acc: accuracy score
    f1: f1 score
    roc_auc: roc auc score
    '''
    
    model = VerticalGNN(
            num_features=NUM_FEATURES,
            num_targets=NUM_TARGET,
            num_gin_layers=params["num_gin_layers"],
            num_graph_trans_layers=params["num_graph_trans_layers"],
            hidden_size=params["hidden_size"],
            n_heads=params["n_heads"],
            dropout=params["dropout"],
            edge_dim=EDGE_DIM,
        )

    model.load_state_dict(torch.load(path_to_trained_model))  
    model.to(DEVICE)
    if method_tf == 'freeze':
        for param in model.gin_model.parameters():
            param.requires_grad=False
        for param in model.graph_trans_model.parameters():
            param.requires_grad=False

        optimizer=torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = params['learning_rate'])        
    
    elif method_tf == 'fine_tune_5x':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']/5},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']/5},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])
        

    elif method_tf == 'fine_tune_10x':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']/10},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']/10},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])
        
    elif method_tf == 'fine_tune':
        optimizer=torch.optim.Adam([
            {'params': model.gin_model.parameters(), 'lr': params['learning_rate']},
            {'params': model.graph_trans_model.parameters(), 'lr': params['learning_rate']},
            {'params': model.ro.parameters()}
        ],lr = params['learning_rate'])
        

    eng = EngineHOB(model, optimizer, device=DEVICE)
    bce, acc, f1, roc_auc = eng.test(test_loader)
    print(f"bce:{bce}, acc :{acc}, f1: {f1}, roc_auc: {roc_auc}")
    return bce, acc, f1, roc_auc

# Effect of number of pre-training epochs to transfer learning prediction performance
1. Weights are first frozen for the feature extraction block. 
2. Models are allowed to train and only weights for the classifier block is allowed to be updated. 
3. To evaluate how number of pre-training epochs affect the transfer learning prediction performance.

### Note on path
1. path_to_pretrained_model = path to model pretrained with solubility dataset
2. path_to_save_trained_model = path to pretrained model trained with oral bioavailability dataset
3. path_to_trained_model = path to pretrained model trained with oral bioavailability dataset

In [4]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'freeze'
params = best_params_vertical
es_trigger = 0
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_20/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_20/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_20_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)

        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.5906937718391418, acc :0.6939655172413793, f1: 0.8022284122562675, roc_auc: 0.6828609986504723
bce:0.5707521140575409, acc :0.6968060661764706, f1: 0.7883715622503709, roc_auc: 0.781543219043219
Rep no 0, Fold no 1
bce:0.6356474757194519, acc :0.6508620689655172, f1: 0.7839999999999999, roc_auc: 0.6134453781512605
bce:0.5464897751808167, acc :0.7232306985294117, f1: 0.8041850869437077, roc_auc: 0.8015584265584266
Rep no 0, Fold no 2
bce:0.6616944670677185, acc :0.5800865800865801, f1: 0.616600790513834, roc_auc: 0.6313184340782961
bce:0.518362894654274, acc :0.7877987132352942, f1: 0.8298730834128091, roc_auc: 0.8063492750992751
Rep no 0, Fold no 3
bce:0.6838578581809998, acc :0.5281385281385281, f1: 0.5112107623318385, roc_auc: 0.5695760211212921
bce:0.5375787913799286, acc :0.7594209558823529, f1: 0.8157467532467533, roc_auc: 0.8098056848056848
Rep no 0, Fold no 4
bce:0.6146137714385986, acc :0.70995670995671, f1: 0.6968325791855203, roc_auc: 0.7313813813813

In [5]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'freeze'
params = best_params_vertical
es_trigger = 0
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_40/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_40/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_40_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)

        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.6155871748924255, acc :0.6982758620689655, f1: 0.8148148148148149, roc_auc: 0.5791160593792173
bce:0.6866039335727692, acc :0.5900735294117647, f1: 0.7311348781937017, roc_auc: 0.5156488906488906
Rep no 0, Fold no 1
bce:0.6172770261764526, acc :0.6551724137931034, f1: 0.7647058823529411, roc_auc: 0.660984393757503
bce:0.4844960421323776, acc :0.7877987132352942, f1: 0.8378378378378378, roc_auc: 0.844056969056969
Rep no 0, Fold no 2
bce:0.6713252663612366, acc :0.5800865800865801, f1: 0.6196078431372549, roc_auc: 0.5964451777411129
bce:0.5272093862295151, acc :0.7711397058823529, f1: 0.8156891495601173, roc_auc: 0.8168377855877855
Rep no 0, Fold no 3
bce:0.686718761920929, acc :0.5670995670995671, f1: 0.5283018867924528, roc_auc: 0.5704301910234508
bce:0.5336512625217438, acc :0.7760799632352942, f1: 0.8233989266547406, roc_auc: 0.7967239217239217
Rep no 0, Fold no 4
bce:0.6102309226989746, acc :0.683982683982684, f1: 0.6696832579185521, roc_auc: 0.723948948948

In [6]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'freeze'
params = best_params_vertical
es_trigger = 0
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.6056687831878662, acc :0.7155172413793104, f1: 0.8124999999999999, roc_auc: 0.6624493927125507
bce:0.5857762098312378, acc :0.7163373161764706, f1: 0.7822835185947578, roc_auc: 0.7545443795443795
Rep no 0, Fold no 1
bce:0.6111372709274292, acc :0.6767241379310345, f1: 0.7800586510263929, roc_auc: 0.6579431772709085
bce:0.4837230294942856, acc :0.7466681985294117, f1: 0.8099162992780014, roc_auc: 0.8240699490699491
Rep no 0, Fold no 2
bce:0.6527214050292969, acc :0.5714285714285714, f1: 0.6147859922178989, roc_auc: 0.6508924553772312
bce:0.5387207865715027, acc :0.7613740808823529, f1: 0.8047554755475548, roc_auc: 0.8031334906334906
Rep no 0, Fold no 3
bce:0.677701473236084, acc :0.5497835497835498, f1: 0.4851485148514852, roc_auc: 0.5503960242273644
bce:0.5138731300830841, acc :0.7721737132352942, f1: 0.8221534227726178, roc_auc: 0.813232531982532
Rep no 0, Fold no 4
bce:0.6508514285087585, acc :0.6796536796536796, f1: 0.6636363636363636, roc_auc: 0.7064564564

## Next, we evaluated other methods for trf learning 
1. Using the best performance model --> pretrained at 60 epochs using high similarity data
2. We investigated the prediction performances by using different learning rate at the feature extraction block. 

## Fine tuning at 5x slower learning rate

In [5]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_5x'
params = best_params_vertical
es_trigger = 0
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)
        
val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.5807152986526489, acc :0.6724137931034483, f1: 0.7696969696969699, roc_auc: 0.7156713900134953
bce:0.45497073233127594, acc :0.7995174632352942, f1: 0.8412698412698413, roc_auc: 0.8720691845691846
Rep no 0, Fold no 1
bce:0.6174641251564026, acc :0.6508620689655172, f1: 0.7552870090634441, roc_auc: 0.6572228891556624
bce:0.4905737042427063, acc :0.8025045955882353, f1: 0.841635007738849, roc_auc: 0.8588451088451088
Rep no 0, Fold no 2
bce:0.6593272089958191, acc :0.5930735930735931, f1: 0.6209677419354839, roc_auc: 0.6376181190940453
bce:0.4854661375284195, acc :0.7760799632352942, f1: 0.8206502322257949, roc_auc: 0.8537947287947288
Rep no 0, Fold no 3
bce:0.6443107724189758, acc :0.6320346320346321, f1: 0.5142857142857143, roc_auc: 0.6347258891132164
bce:0.4665004014968872, acc :0.7936580882352942, f1: 0.8303504847129008, roc_auc: 0.8652890527890529
Rep no 0, Fold no 4
bce:0.6248038411140442, acc :0.6926406926406926, f1: 0.7053941908713692, roc_auc: 0.73340840

## Fine tuning at 10x slower learning rate

In [6]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_10x'
params = best_params_vertical
es_trigger = 0
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)
        
val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.574695885181427, acc :0.6724137931034483, f1: 0.7696969696969699, roc_auc: 0.7223346828609987
bce:0.4711271822452545, acc :0.7995174632352942, f1: 0.8431983385254413, roc_auc: 0.8682982432982433
Rep no 0, Fold no 1
bce:0.6116788387298584, acc :0.6982758620689655, f1: 0.7852760736196319, roc_auc: 0.6743497398959583
bce:0.4717002958059311, acc :0.7897518382352942, f1: 0.8315412186379929, roc_auc: 0.8591376403876405
Rep no 0, Fold no 2
bce:0.6412062644958496, acc :0.5844155844155844, f1: 0.5752212389380531, roc_auc: 0.6643917804109795
bce:0.45823684334754944, acc :0.8044577205882353, f1: 0.8406276993953354, roc_auc: 0.8719471531971532
Rep no 0, Fold no 3
bce:0.6643053293228149, acc :0.5930735930735931, f1: 0.5, roc_auc: 0.6106538282341979
bce:0.48230066895484924, acc :0.7780330882352942, f1: 0.8249475890985325, roc_auc: 0.8587027962027962
Rep no 0, Fold no 4
bce:0.6141497492790222, acc :0.7056277056277056, f1: 0.690909090909091, roc_auc: 0.7237987987987987
bce:0.

### Fine tune with normal learning rate

In [7]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune'
params = best_params_vertical
es_trigger = 0
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.5660783052444458, acc :0.7112068965517241, f1: 0.809116809116809, roc_auc: 0.7068994601889339
bce:0.4877791404724121, acc :0.7927389705882353, f1: 0.8440957036274762, roc_auc: 0.8363555863555863
Rep no 0, Fold no 1
bce:0.6468179821968079, acc :0.646551724137931, f1: 0.7616279069767442, roc_auc: 0.5793517406962785
bce:0.5063085556030273, acc :0.7849264705882353, f1: 0.8291332040985876, roc_auc: 0.8213264775764776
Rep no 0, Fold no 2
bce:0.6615083813667297, acc :0.6060606060606061, f1: 0.6459143968871596, roc_auc: 0.6379181040947952
bce:0.4838125556707382, acc :0.7721737132352942, f1: 0.8232418464976605, roc_auc: 0.8354999917499917
Rep no 0, Fold no 3
bce:0.6493971943855286, acc :0.5670995670995671, f1: 0.5327102803738318, roc_auc: 0.6433452399440907
bce:0.5150824189186096, acc :0.7143841911764706, f1: 0.785625, roc_auc: 0.8207916020416022
Rep no 0, Fold no 4
bce:0.6272428631782532, acc :0.7056277056277056, f1: 0.7166666666666666, roc_auc: 0.7141141141141141
bce

# Evaluating the use of slower learning rate + force training the model 
1. Force training at 10, 15 and 20 epochs

In [24]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_5x'
params = best_params_vertical
es_trigger = 10
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)

        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.5815671682357788, acc :0.6681034482758621, f1: 0.7673716012084593, roc_auc: 0.7169365721997301
bce:0.45119190216064453, acc :0.7995174632352942, f1: 0.8412698412698413, roc_auc: 0.8759026884026884
Rep no 0, Fold no 1
bce:0.6222043633460999, acc :0.6508620689655172, f1: 0.7492260061919505, roc_auc: 0.6472989195678271
bce:0.475962296128273, acc :0.7995174632352942, f1: 0.835016835016835, roc_auc: 0.8651013651013651
Rep no 0, Fold no 2
bce:0.659091055393219, acc :0.5930735930735931, f1: 0.6209677419354839, roc_auc: 0.637543122843858
bce:0.48530712723731995, acc :0.7780330882352942, f1: 0.8218637992831541, roc_auc: 0.8538885726385727
Rep no 0, Fold no 3
bce:0.6443515419960022, acc :0.6320346320346321, f1: 0.5142857142857143, roc_auc: 0.6345705854946421
bce:0.46644918620586395, acc :0.7936580882352942, f1: 0.8303504847129008, roc_auc: 0.8653203340703342
Rep no 0, Fold no 4
bce:0.6256555914878845, acc :0.6926406926406926, f1: 0.7053941908713692, roc_auc: 0.733108108

In [16]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_5x'
params = best_params_vertical
es_trigger = 15
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.580559253692627, acc :0.6681034482758621, f1: 0.7673716012084593, roc_auc: 0.7139844804318489
bce:0.4556100368499756, acc :0.7995174632352942, f1: 0.8412698412698413, roc_auc: 0.8675131175131175
Rep no 0, Fold no 1
bce:0.6257521510124207, acc :0.6810344827586207, f1: 0.7672955974842767, roc_auc: 0.6517006802721088
bce:0.4457445442676544, acc :0.8083639705882353, f1: 0.8475473801560758, roc_auc: 0.8695261195261195
Rep no 0, Fold no 2
bce:0.6616900563240051, acc :0.5670995670995671, f1: 0.5614035087719298, roc_auc: 0.64076796160192
bce:0.4612022638320923, acc :0.8014705882352942, f1: 0.8425336164189667, roc_auc: 0.8504352566852567
Rep no 0, Fold no 3
bce:0.6624472737312317, acc :0.5757575757575758, f1: 0.4302325581395348, roc_auc: 0.6218356887715484
bce:0.4452606290578842, acc :0.8200827205882353, f1: 0.8505186089078705, roc_auc: 0.8666795229295229
Rep no 0, Fold no 4
bce:0.649895191192627, acc :0.7012987012987013, f1: 0.7112970711297071, roc_auc: 0.737987987987

In [17]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_5x'
params = best_params_vertical
es_trigger = 20
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.5835076570510864, acc :0.6896551724137931, f1: 0.7791411042944786, roc_auc: 0.710357624831309
bce:0.43635958433151245, acc :0.8103170955882353, f1: 0.8459606830248243, roc_auc: 0.8820338195338195
Rep no 0, Fold no 1
bce:0.6529975533485413, acc :0.6336206896551724, f1: 0.7301587301587302, roc_auc: 0.6368947579031614
bce:0.43248628079891205, acc :0.8269761029411764, f1: 0.8602150537634409, roc_auc: 0.8587763587763588
Rep no 0, Fold no 2
bce:0.6707444190979004, acc :0.5800865800865801, f1: 0.5610859728506787, roc_auc: 0.6424178791060448
bce:0.4682886451482773, acc :0.7995174632352942, f1: 0.8359972729338687, roc_auc: 0.8396339333839333
Rep no 0, Fold no 3
bce:0.6685404777526855, acc :0.5887445887445888, f1: 0.45714285714285713, roc_auc: 0.6347258891132164
bce:0.4401945322751999, acc :0.8220358455882353, f1: 0.8525387655822437, roc_auc: 0.8699186824186824
Rep no 0, Fold no 4
bce:0.6743557453155518, acc :0.6883116883116883, f1: 0.7024793388429752, roc_auc: 0.739114

In [21]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_5x'
params = best_params_vertical
es_trigger = 25
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
Epoch: 1/300, train loss : 1.3573159873485565, validation loss : 0.6672883033752441
Epoch: 2/300, train loss : 0.7439092099666595, validation loss : 0.6624502539634705
Epoch: 3/300, train loss : 0.7037086635828018, validation loss : 0.7322502732276917
Epoch: 4/300, train loss : 0.6863243877887726, validation loss : 0.6494139432907104
Epoch: 5/300, train loss : 0.662983849644661, validation loss : 0.6637433767318726
Epoch: 6/300, train loss : 0.6509369611740112, validation loss : 0.6498183012008667
Epoch: 7/300, train loss : 0.6347012966871262, validation loss : 0.6043581366539001
Epoch: 8/300, train loss : 0.6238026469945908, validation loss : 0.6012547612190247
Epoch: 9/300, train loss : 0.6111105382442474, validation loss : 0.6027132272720337
Epoch: 10/300, train loss : 0.6114585101604462, validation loss : 0.5991734266281128
Epoch: 11/300, train loss : 0.5966641157865524, validation loss : 0.606228768825531
Epoch: 12/300, train loss : 0.5986240655183792, validati

In [None]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_5x'
params = best_params_vertical
es_trigger = 30
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

### fine tune 10x, force training 

In [18]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_10x'
params = best_params_vertical
es_trigger = 10
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.5746744275093079, acc :0.6810344827586207, f1: 0.7757575757575758, roc_auc: 0.722165991902834
bce:0.47143182158470154, acc :0.7995174632352942, f1: 0.8431983385254413, roc_auc: 0.8689551502051502
Rep no 0, Fold no 1
bce:0.6113265156745911, acc :0.6982758620689655, f1: 0.7852760736196319, roc_auc: 0.675390156062425
bce:0.4705562889575958, acc :0.7897518382352942, f1: 0.8315412186379929, roc_auc: 0.8604999229999231
Rep no 0, Fold no 2
bce:0.6408312916755676, acc :0.6017316017316018, f1: 0.6166666666666667, roc_auc: 0.6661166941652918
bce:0.4608095586299896, acc :0.8005514705882353, f1: 0.8388746803069054, roc_auc: 0.8693649006149007
Rep no 0, Fold no 3
bce:0.664361834526062, acc :0.5930735930735931, f1: 0.5, roc_auc: 0.6106538282341979
bce:0.48236191272735596, acc :0.7780330882352942, f1: 0.8249475890985325, roc_auc: 0.85879664004664
Rep no 0, Fold no 4
bce:0.6160662770271301, acc :0.7056277056277056, f1: 0.6991150442477877, roc_auc: 0.7243243243243244
bce:0.504

In [19]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_10x'
params = best_params_vertical
es_trigger = 15
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)
        
        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.5755579471588135, acc :0.6810344827586207, f1: 0.7757575757575758, roc_auc: 0.7209851551956814
bce:0.4701726883649826, acc :0.8014705882352942, f1: 0.8444444444444446, roc_auc: 0.8687987437987438
Rep no 0, Fold no 1
bce:0.6114091277122498, acc :0.7068965517241379, f1: 0.7926829268292683, roc_auc: 0.67859143657463
bce:0.474857896566391, acc :0.7897518382352942, f1: 0.8322649572649572, roc_auc: 0.8656644281644281
Rep no 0, Fold no 2
bce:0.6407521963119507, acc :0.5887445887445888, f1: 0.5777777777777777, roc_auc: 0.6650667466626669
bce:0.4570174068212509, acc :0.8044577205882353, f1: 0.8406276993953354, roc_auc: 0.8720722783222783
Rep no 0, Fold no 3
bce:0.6759536266326904, acc :0.5800865800865801, f1: 0.5076142131979696, roc_auc: 0.6108867836620593
bce:0.46198470890522003, acc :0.8044577205882353, f1: 0.8491597759402507, roc_auc: 0.8671549296549297
Rep no 0, Fold no 4
bce:0.6232619285583496, acc :0.7056277056277056, f1: 0.706896551724138, roc_auc: 0.73228228228

In [20]:
train_data_root_path = './data/graph_data/data_oral_avail_train/'
train_data_raw_filename = 'data_oral_avail_train_50.csv'
test_data_root_path = './data/graph_data/data_oral_avail_test'
test_data_raw_filename = 'data_oral_avail_test_1_50.csv'
n_repetitions = 5
method_tf = 'fine_tune_10x'
params = best_params_vertical
es_trigger = 20
path_to_pretrained_model = './trf_learning_models/pretrained_models/vertical/high/'
path_to_save_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'
path_to_trained_model = './trf_learning_models/trained_models/vertical/high/pretrained_60/'

val_bce_list = []
val_acc_list = []
val_f1_list = []
val_roc_auc_list = []

bce_list = []
acc_list = []
f1_list = []
roc_auc_list = []

dataset_for_cv = LoadHOBDataset(train_data_root_path, train_data_raw_filename)
kf = KFold(n_splits=N_SPLITS)

for repeat in range(n_repetitions):
    for fold_no, (train_idx, valid_idx) in enumerate(kf.split(dataset_for_cv)):
        seed_everything(SEED_NO)
        train_dataset = []
        valid_dataset = []
        
        for t_idx in train_idx:
            train_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{t_idx}.pt"
                )
            )
        for v_idx in valid_idx:
            valid_dataset.append(
                torch.load(
                    f"./data/graph_data/data_oral_avail_train/processed/molecule_{v_idx}.pt"
                )
            )

        train_loader = DataLoader(
            train_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=True
        )
        valid_loader = DataLoader(
            valid_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        test_dataset = LoadHOBDataset(test_data_root_path, test_data_raw_filename)
        test_loader = DataLoader(
            test_dataset, batch_size=NUM_GRAPHS_PER_BATCH, shuffle=False
        )
        print(f'Rep no {repeat}, Fold no {fold_no}')
        '''
        run_training(method_tf, train_loader, valid_loader, params, es_trigger, os.path.join(path_to_pretrained_model, f'pretrained_vertical_model_60_epoch.pt'),
            os.path.join(
                path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"
            ),
        )
        '''
        val_bce, val_acc, val_f1, val_roc_auc = run_validation(method_tf, valid_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))
        bce, acc, f1, roc_auc = run_testing(method_tf, test_loader, params, 
                        os.path.join(path_to_save_trained_model, f"trained_vertical_model_{method_tf}_repeat_{repeat}_fold_{fold_no}_{es_trigger}_es_trigger.pt"))

        val_bce_list.append(val_bce)
        val_acc_list.append(val_acc)
        val_f1_list.append(val_f1)
        val_roc_auc_list.append(val_roc_auc)

        bce_list.append(bce)
        acc_list.append(acc)
        f1_list.append(f1)
        roc_auc_list.append(roc_auc)

val_bce_arr = np.array(val_bce_list)
val_mean_bce = np.mean(val_bce_arr)
val_sd_bce = np.std(val_bce_arr)
print(f'validation bce:{val_mean_bce:.3f}±{val_sd_bce:.3f}')

val_acc_arr = np.array(val_acc_list)
val_acc_mean= np.mean(val_acc_arr)
val_acc_sd = np.std(val_acc_arr)
print(f'validation acc:{val_acc_mean:.3f}±{val_acc_sd:.3f}')

val_f1_arr = np.array(val_f1_list)
val_f1_mean= np.mean(val_f1_arr)
val_f1_sd = np.std(val_f1_arr)
print(f'validation f1: {val_f1_mean:.3f}±{val_f1_sd:.3f}')

val_roc_auc_arr = np.array(val_roc_auc_list)
val_roc_auc_mean= np.mean(val_roc_auc_arr)
val_roc_auc_sd = np.std(val_roc_auc_arr)
print(f'validation roc_auc: {val_roc_auc_mean:.3f}±{val_roc_auc_sd:.3f}')

bce_arr = np.array(bce_list)
mean_bce = np.mean(bce_arr)
sd_bce = np.std(bce_arr)
print(f'bce:{mean_bce:.3f}±{sd_bce:.3f}')

acc_arr = np.array(acc_list)
acc_mean= np.mean(acc_arr)
acc_sd = np.std(acc_arr)
print(f'acc:{acc_mean:.3f}±{acc_sd:.3f}')

f1_arr = np.array(f1_list)
f1_mean= np.mean(f1_arr)
f1_sd = np.std(f1_arr)
print(f'f1: {f1_mean:.3f}±{f1_sd:.3f}')

roc_auc_arr = np.array(roc_auc_list)
roc_auc_mean= np.mean(roc_auc_arr)
roc_auc_sd = np.std(roc_auc_arr)
print(f'roc_auc: {roc_auc_mean:.3f}±{roc_auc_sd:.3f}')

print("Training Completed!")

Rep no 0, Fold no 0
bce:0.574384868144989, acc :0.6767241379310345, f1: 0.7734138972809668, roc_auc: 0.7231781376518218
bce:0.4712449908256531, acc :0.8014705882352942, f1: 0.8444444444444446, roc_auc: 0.8684233684233684
Rep no 0, Fold no 1
bce:0.6162497401237488, acc :0.6810344827586207, f1: 0.7743902439024389, roc_auc: 0.6736294517807123
bce:0.4568988233804703, acc :0.8034237132352942, f1: 0.8438054668086618, roc_auc: 0.8633949883949885
Rep no 0, Fold no 2
bce:0.6411864757537842, acc :0.5887445887445888, f1: 0.5777777777777777, roc_auc: 0.6649542522873856
bce:0.45768292248249054, acc :0.8044577205882353, f1: 0.8406276993953354, roc_auc: 0.8721035596035596
Rep no 0, Fold no 3
bce:0.6878967881202698, acc :0.5930735930735931, f1: 0.4777777777777778, roc_auc: 0.6041310762540767
bce:0.44944070279598236, acc :0.8025045955882353, f1: 0.8431032087789971, roc_auc: 0.8653859903859904
Rep no 0, Fold no 4
bce:0.625647783279419, acc :0.70995670995671, f1: 0.7124463519313305, roc_auc: 0.7343093093