In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES']='1'

## Hyperparameter screening

In [None]:
from collections import OrderedDict
target_list = ['T8','T9']

para_dict_deepchem = {
    'model_name': 'VISAR_dc_demo',
    'task_list': target_list,
    'eval_type': 'regression',
    # input data related params:
    'dataset_file': './data/Kinase_tot_4deepchem_processed.csv',
    'feature_type': 'Circular_2048',
    'id_field': 'molregno',
    'smiles_field': 'cano_smiles',
    'model_flag': 'MT',
    'add_features': None,
    'frac_train': 0.9,
    'rand_seed': 0,
    'batch_size': 100,
    'normalize': True,
    # model architecture related parameters:
    'layer_sizes': [256, 64],
    'bypass_layer_sizes': [32],
    'dropouts': 0.5,
    'bypass_dropouts': 0.5,
    # model training related parameters:
    'learning_rate': 0.001,
    'GPU': True,
    'epoch': 5, # training epoch of each round (saving model at the end of each round)
    'epoch_num': 10, # how many rounds
    'optimizer': 'RMSprop',
    # viz file processing related parameters:
    'model_architecture': 'RobustMT',
    'hidden_layer': 2,
}

candidate_params_dict = OrderedDict(
    n_tasks = [4],
    n_features = [2048], ## need modification given FP types
    activation = ['relu'],
    momentum = [.9],
    batch_size = [128],
    init = ['glorot_uniform'],
    learning_rate = [0.001],
    decay = [1e-6],
    nb_epoch = [30],
    dropouts = [.2, .4],
    nb_layers = [1],
    batchnorm = [False],
    layer_sizes = [(1024, 512), (512,265), (512, 128)],
    bypass_dropouts = [0.2, 0.4],
    bypass_layer_sizes = [[128], [64]]
)


In [None]:
os.chdir('/working/directory/')
log_out = RobustMT_model_hyperparam_screen(para_dict_deepchem, candidate_params_dict)

## model training

In [None]:
from visar.deepchem_regressor import deepchem_robust_regressor
from visar.VISAR_model import visar_model
from visar.dataloader.deepchem_utils import prepare_dataset

In [None]:
train_loader, test_loader, train_df, test_df, para_dict_deepchem = prepare_dataset(para_dict_deepchem)

In [None]:
dc_model = deepchem_robust_regressor(para_dict_deepchem)
dc_model.model_init()
dc_model.model

In [None]:
dc_model.fit(train_loader, test_loader)

## generate viz files

In [None]:
# custom data loader prepare
custom_para_dict = {
    'task_list': ['IC50_acvalue'],   # a dummy column of float
    # input data related params:
    'dataset_file': './data/FOR_predict.csv',
    'feature_type': 'Morgan',
    'id_field': 'CID',   #
    'smiles_field': 'SMILES',  #
    'model_flag':'ST',
    'add_features': None,
    'frac_train': 1,
    'batch_size': 100,
    'normalize': False
}

custom_loader, custom_df, custom_para_dict = compound_FP_loader(custom_para_dict)

In [None]:
# load previous model
import json
para_dict_deepchem = json.load(open('./logs/VISAR_dc_demo/train_parameters.json','r'))

In [None]:
model = pytorch_DNN_model(para_dict_deepchem)
model.model_init()
model.load_model()
model.para_dict['custom_id_field'] = 'CID'
model.para_dict['custom_smiles_field'] = 'SMILES'
model.generate_viz_results(train_loader, train_df, 'test',
                           custom_loader = custom_loader, 
                           custom_df = custom_df)

## baseline model

In [None]:
para_dict_baseline = {
            'model_name': 'KLIFS_global750_SVR',
            'task_list': ['T9'],
            'eval_type': 'regression',
            # input data related params:
            'dataset_file': '../data/MT_data_clean_June28.csv',
            'dataset_file': './data/Kinase_tot_4deepchem_processed.csv',
            'feature_type': 'Circular_2048',
            'id_field': 'molregno',
            'smiles_field': 'cano_smiles',
            'model_flag': 'ST',
            'add_features': None,
            'frac_train': 0.9,
            'rand_seed': 10000,
            'normalize': True,
            # model architecture related parameters:
            'baseline_type': 'SVR', # 'RidgeCV'
            # viz file processing related parameters:
            'model_architecture': 'ST'
}

In [None]:
# prepare dataset
train_loader, test_loader, train_df, test_df, para_dict_baseline = prepare_dataset(para_dict_baseline)
baseline_model = visar_model(para_dict_baseline)
baseline_model.model_init()
baseline_model.fit(train_loader)
sqrt_train, pearsonr_train = baseline_model.evaluate(train_loader)
sqrt_test, pearsonr_test = baseline_model.evaluate(test_loader)

In [None]:
baseline_model.generate_viz_results(train_loader, train_df, 'SVR_demo')