In [10]:
import load_data_multitox as ld
import dataloaders_sigma as dl
from Model_train_test_regression import Net, EarlyStopping, train, test

import pandas as pd
import numpy as np

import torch
from torch.utils import data as td
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter

import sys 
import os
import glob

from sklearn.model_selection import train_test_split

from tensorboardX import SummaryWriter

import time
from sklearn.preprocessing import MinMaxScaler#StandardScaler

import json


# number of conformers created for every molecule
NUM_CONFS = 100

# amount of chemical elements taking into account
AMOUNT_OF_ELEM = 9

# amount of target values
TARGET_NUM = 29

#dataset folder
# DATASET_PATH="~/Tox21-MultiTox/MultiTox"
DATASET_PATH="./"

#logs path
LOG_PATH=os.path.join(DATASET_PATH,"logs_sigma_right")


#models path
MODEL_PATH=os.path.join(DATASET_PATH,"models_sigma_right")

In [11]:
EXPERIMENT_NUM=24

In [12]:
dir_path = os.path.join(LOG_PATH,'exp_'+str(EXPERIMENT_NUM))
os.makedirs(dir_path, exist_ok=True)
LOG_PATH = dir_path
dir_path = os.path.join(MODEL_PATH,'exp_'+str(EXPERIMENT_NUM))
os.makedirs(dir_path, exist_ok=True)
MODEL_PATH = dir_path

In [13]:
path="./"

In [14]:
with open(os.path.join(path,"logs_sigma_right",'exp_'+str(24),str(24)+'_parameters.json'),'r') as f:
  args = json.load(f)

In [7]:
# args['NUM_EXP']=str(EXPERIMENT_NUM)
# args['BATCH_SIZE']=64

# args['TRANSF']='w'
# args['SIGMA_TRAIN']=False

In [15]:
args

{'EPOCHS_NUM': 100,
 'PATIENCE': 25,
 'SIGMA': 1.4,
 'BATCH_SIZE': 128,
 'TRANSF': 'g',
 'NUM_EXP': '24',
 'VOXEL_DIM': 50,
 'LEARN_RATE': 1e-05,
 'SIGMA_TRAIN': False}

In [9]:
# conf_calc = ld.reading_sql_database(database_dir='./database/data/')

In [23]:
f_log=open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'w')
f_log.close()
start_time=time.time()
writer=SummaryWriter(LOG_PATH)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
    f_log.write('Using device:'+str(device)+'\n')
print()
#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')

    with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
        f_log.write(torch.cuda.get_device_name(0)+'\n'+'Memory Usage:'+'\n'+'Allocated:'+str(round(torch.cuda.memory_allocated(0)/1024**3,1))+ 'GB'+'\n'+'Cached:   '+str(round(torch.cuda.memory_cached(0)/1024**3,1))+'GB'+'\n')
print('Start loading dataset...')
with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
    f_log.write('Start loading dataset...'+'\n')
# get dataset without duplicates from csv
data = pd.read_csv(os.path.join(DATASET_PATH,'database', 'MultiTox.csv'))
props = list(data)[1:]
scaler = MinMaxScaler()
data[props]=scaler.fit_transform(data[props])

# create elements dictionary
#     elements = ld.create_element_dict(data, amount=AMOUNT_OF_ELEM+1)
elements={'N':0,'C':1,'Cl':2,'I':3,'Br':4,'F':5,'O':6,'P':7,'S':8}

# read databases to dictionary
#     conf_calc = ld.reading_sql_database(database_dir='./dat/')
with open(os.path.join(DATASET_PATH,'many_elems.json'), 'r') as fp:
    conf_calc = json.load(fp)

keys=list(conf_calc.keys())
print ('Initial dataset size = ', len(keys))
with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
    f_log.write('Initial dataset size = '+str(len(keys))+'\n')
new_conf_calc={}
for smiles in conf_calc.keys():
    for conf_num in conf_calc[smiles]:
        if smiles in new_conf_calc.keys():
            new_conf_calc[smiles][int(conf_num)]=conf_calc[smiles][conf_num]
        else:
            new_conf_calc[smiles]={}
            new_conf_calc[smiles][int(conf_num)]=conf_calc[smiles][conf_num]

conf_calc=new_conf_calc

elems = []
for key in keys:
    conformers=list(conf_calc[key].keys())
    for conformer in conformers:
        try:
            energy = conf_calc[key][conformer]['energy']
            elems = list(set(elems+list(conf_calc[key][conformer]['coordinates'].keys())))
        except:
            del conf_calc[key][conformer]
    if set(conf_calc[key].keys())!=set(range(100)):
          del conf_calc[key]
    elif conf_calc[key]=={}:
        del conf_calc[key]

print ('Post-processed dataset size = ', len(list(conf_calc.keys())))
with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
    f_log.write('Post-processed dataset size = '+str(len(list(conf_calc.keys())))+'\n')
# create indexing and label_dict for iteration
indexing, label_dict = ld.indexing_label_dict(data, conf_calc)
print('Dataset has been loaded, ', int(time.time()-start_time),' s')
with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
    f_log.write('Dataset has been loaded, '+str(int(time.time()-start_time))+' s'+'\n')

start_time=time.time()
# create train and validation sets' indexes
print('Neural network initialization...')
with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
    f_log.write('Neural network initialization...'+'\n')
train_indexes, test_indexes, _, _ = train_test_split(np.arange(0, len(conf_calc.keys())),
                                                     np.arange(0, len(conf_calc.keys())), test_size=0.2,
                                                     random_state=115)

Using device: cuda:0

GeForce GTX 1080 Ti
Memory Usage:
Allocated: 2.6 GB
Cached:    4.8 GB
Start loading dataset...
Initial dataset size =  13091
Post-processed dataset size =  13084
Dataset has been loaded,  163  s
Neural network initialization...


In [24]:
train_set = dl.Cube_dataset(conf_calc, label_dict, elements, indexing, train_indexes, dim = args['VOXEL_DIM'])
train_generator = td.DataLoader(train_set, batch_size=args['BATCH_SIZE'], shuffle=True)

test_set = dl.Cube_dataset(conf_calc, label_dict, elements, indexing, test_indexes, dim = args['VOXEL_DIM'])
test_generator = td.DataLoader(test_set, batch_size=args['BATCH_SIZE'], shuffle=True)

model = Net(dim=args['VOXEL_DIM'], num_elems=AMOUNT_OF_ELEM, num_targets=TARGET_NUM, elements=elements, transformation=args['TRANSF'],device=device,sigma_0 = args['SIGMA'],sigma_trainable = args['SIGMA_TRAIN'])


if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
    print ('Run in parallel!')
    with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
        f_log.write('Run in parallel!'+'\n')

# Construct our model by instantiating the class defined above

model=model.to(device)

for (batch, target) in train_generator:
    batch = batch.to(device)
    target = target.to(device)
    with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
        f_log.write('Batch to device!'+'\n')
    print('Batch to device!')
    output = model(batch)
    with open(os.path.join(LOG_PATH,args['NUM_EXP']+'_logs.txt'),'a') as f_log:
        f_log.write('Batch output!'+'\n')
    print('Batch output!')
    break

KeyError: 'SIGMA_TRAIN'