In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES']='1'

## hyperparameter screening
The hyperparam_screening would return the best parameter dictionary.
Users could also check the performances recorded in the log file and manually pick the best param.

In [None]:
task_names = ['T8', 'T9']

para_dict_DNN = {
    'model_name': 'VISAR_pytorch_demo',  # user specific
    'task_list': task_names,  # MUST BE A LIST!
    # input data related params:
    'dataset_file': './data/Kinase_tot_4deepchem_processed.csv',
    'feature_type': 'Morgan',
    'id_field': 'molregno',
    'smiles_field': 'cano_smiles',  #
    'add_features': None,
    'frac_train': 0.9,
    'rand_seed': 0,
    'batch_size': 100,
    'normalize': True,
    # model architecture related parameters:
    'layer_nodes': [256, 128, len(task_names)], #
    'dropouts': 0.3,
    # model training related parameters:
    'learning_rate': 0.001,
    'GPU': True,
    'epoch': 100, # training epoch of each round (saving model at the end of each round)
    'epoch_num': 5, # how many rounds
    'optimizer': 'Adam',
    # viz file processing related parameters:
    'model_architecture': 'ST',
    'hidden_layer': 1
}

In [None]:
from visar.utils.pytorch_functions import hyperparam_screening
import os
os.chdir('/working/directory/')  # user specified

import copy
from collections import OrderedDict
from visar.dataloader.pytorch_utils import compound_FP_loader
from visar.pytorch_regressor import pytorch_DNN_model

candidate_params_dict = OrderedDict([('layer_nodes', [[256,128,1], [512,64,1], 
                                                      [512,128,1], [512,265,1]]),
                                     ('dropouts', [0.2, 0.4]),
                                     ('learning_rate', [0.01, 0.001])])

In [None]:
best_param = hyperparam_screening(pytorch_DNN_model, para_dict_DNN, candidate_params_dict, 
                     mode = 'grid_search', epoch = 10, epoch_num = 2)

## Model training

In [None]:
from visar.pytorch_regressor import pytorch_DNN_model
from visar.dataloader.pytorch_utils import compound_FP_loader
train_loader, test_loader, train_df, test_df, para_dict_DNN = compound_FP_loader(para_dict_DNN)


In [None]:
pyDNN_model = pytorch_DNN_model(para_dict_DNN)
pyDNN_model.model_init()
pyDNN_model.model

In [None]:
pyDNN_model.fit(train_loader, test_loader)

## generate viz files

In [None]:
from visar.dataloader.pytorch_utils import compound_FP_loader
from visar.pytorch_regressor import pytorch_DNN_model

# prepare custom dataloader
custom_para_dict = {
    'task_list': ['activity'],   # a dummy column of float
    # input data related params:
    'dataset_file': './data/binding_mode_notation.csv',
    'feature_type': 'Morgan',
    'id_field': 'cid',   #
    'smiles_field': 'SMILES',  #
    'model_flag':'ST',
    'add_features': None,
    'frac_train': 1,
    'batch_size': 100,
    'normalize': False
}

custom_loader, custom_df, custom_para_dict = compound_FP_loader(custom_para_dict)

In [None]:
# load previous model
import json
para_dict_DNN = json.load(open('./logs/VISAR_pytorch_demo/train_parameters.json','r'))

pyDNN_model = pytorch_DNN_model(para_dict_DNN)
pyDNN_model.model_init()
pyDNN_model.load_model()

In [None]:
# load training data
train_loader, test_loader, train_df, test_df, para_dict_DNN = compound_FP_loader(para_dict_DNN,
                                                                                max_cutoff = 8000)

In [None]:
pyDNN_model.para_dict['custom_id_field'] = custom_para_dict['id_field']
pyDNN_model.para_dict['custom_smiles_field'] = custom_para_dict['smiles_field']
pyDNN_model.para_dict['hidden_layer'] = 2
pyDNN_model.para_dict['model_architecture'] = 'RobustMT'
pyDNN_model.generate_viz_results(train_loader, train_df, 'KLIFS_global750_MT_rep3',
                           custom_loader = custom_loader, 
                           custom_df = custom_df)