<a href="https://colab.research.google.com/github/mkbond777/DTI-meta-learning/blob/master/Modeular_code_V2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow-addons

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np

import json

import re

import zipfile
import os

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,roc_auc_score
from sklearn.metrics import confusion_matrix, f1_score

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

import tensorflow as tf

import matplotlib.pyplot as plt

import seaborn as sns

from shutil import copyfile

import datetime

import tensorflow_addons as tfa

from tensorflow.keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

import time

import glob

import random


In [None]:
#chembl_id = 'CHEMBL286'

In [None]:
#! rm -r /content/model_init_CHEMBL286

In [None]:
def main(src_df, target_ids,cnn_model_name,metrics):

  curr_dt_time = datetime.datetime.now()

  # unzipping entire image data 
  src_img_path = '/content/drive/MyDrive/ML-DTI/target_training_datasets.zip'
  dest_img_path = '/content/img_path'
  if not os.path.exists(dest_img_path):
    os.mkdir(dest_img_path) 
    unzip_img_data(src_img_path,dest_img_path)

  # prepare data
  #src_df = prepare_data()

  # # target_ids
  #target_ids = src_df['target_id'].tolist()

  #target_ids = ['CHEMBL3969','CHEMBL2035']

  out_file = '/content/drive/MyDrive/ML-DTI/metrics_' + cnn_model_name[0] + '_' + metrics[0] + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '.csv'

  for target in target_ids:
    target_df = target_specific_data(src_df,target)
    target_df.reset_index(drop=True,inplace=True)
    target_df['labels'] = target_df['labels'].astype(str)
    target_specific_run(target_df, target, out_file,cnn_model_name, metrics)

  # target_df = target_specific_data(src_df,'CHEMBL286')

  # target_df.reset_index(drop=True,inplace=True)

  # target_df['labels'] = target_df['labels'].astype(str)

  # target_specific_run(target_df, 'CHEMBL286')



In [None]:
def target_specific_run(target_df, target_id, filename,cnn_model_name,metrics):

  # imagedatagenerator for CNN
  train_generator, valid_generator, test_generator = prepare_train_valid_test_cnn_data(target_df, target_id)
  #print('Train,Test data generated \n')

  # cnn model

  cnn_model = pre_trained_cnn_model(cnn_model_name)

  cnn_model = compile_cnn_model(cnn_model, metrics)

  cl = callback_list(cnn_model, metrics, target_id)

  cnn_model, history = cnn_model_training(50, cnn_model, train_generator, valid_generator, cl)

  model_weights = get_latest_file(target_id)

  #model_weights = '/content/model-00035-0.18334-0.97339-0.67943-0.90520.h5'

  cnn_model = load_weights(cnn_model, model_weights)

  train_metrics = log_metrics(cnn_model, train_generator)

  test_metrics = log_metrics(cnn_model, test_generator)

  cnn_predict_df = predictions(cnn_model, target_df, target_id)

  encoded_df = label_encoding_smiles(target_df)

  # svm_auc, svm_f1_score = svm_model_training(encoded_df, cnn_predict_df, with_cnn = False)

  # svm_cnn_auc, svm_cnn_f1_score = svm_model_training(encoded_df, cnn_predict_df, with_cnn = True)

  #l = [target_id,train_metrics[1],train_metrics[2][0],test_metrics[1],test_metrics[2][0],svm_auc,svm_f1_score,svm_cnn_auc,svm_cnn_f1_score]

  #l = [target_id,train_metrics[1],train_metrics[2][0],test_metrics[1],test_metrics[2][0],svm_cnn_auc,svm_cnn_f1_score]

  svm_acc = svm_model_training(encoded_df, cnn_predict_df, metrics, with_cnn = False)

  svm_cnn_acc = svm_model_training(encoded_df, cnn_predict_df, metrics, with_cnn = True)

  l = [target_id,train_metrics[1],test_metrics[1],svm_acc,svm_cnn_acc]

  data = ",".join([str(i) for i in l])
  
  with open(filename, "a") as myfile:
    myfile.write(data + "\n")

  delete_folder(target_id)


In [None]:
def delete_folder(target_id):
  folder_name = 'model_init' + '_' + target_id + '/'
  !rm -r {folder_name}

In [None]:
def svm_model_training(encoded_df, cnn_predict_df, metrics, with_cnn = False):
  svm_train_df = prepare_svm_data(encoded_df[encoded_df['type'] != 'test'], cnn_predict_df, with_cnn)

  svm_test_df = prepare_svm_data(encoded_df[encoded_df['type'] == 'test'], cnn_predict_df, with_cnn)

  model_svm = train_svm(svm_train_df)

  return test_svm(model_svm, svm_test_df, metrics)

In [None]:
def get_latest_file(target_id):
  folder_path = 'model_init' + '_' + target_id + '/*'
  list_of_files = glob.glob(folder_path) # * means all if need specific format then *.csv
  latest_file = max(list_of_files, key=os.path.getctime)
  return latest_file

In [None]:
def unzip_img_data(src_path,dest_path):
  if dest_path is None:
    !unzip -qq {src_path}
  else:
    !unzip -qq {src_path} -d {dest_path}

In [None]:
def prepare_data():

  # create a column for img path
  def f(row):
    return row['drug_id'] + '.png'

  # read json files containing test, train and valid
  json_df = pd.read_csv('/content/drive/MyDrive/ML-DTI/json_df.csv')
  
  # read smiles files
  smiles_df = pd.read_csv('/content/drive/MyDrive/ML-DTI/chembl_v28_json_joined_202110241711.csv')

  final_df = pd.merge(json_df, smiles_df, left_on=['drug_id',], right_on=['drug_id'],how='inner')

  final_df['img_path'] = final_df.apply(f, axis=1)

  return final_df

  


In [None]:
def target_specific_data(df, target_id):
  # filter data based on given chembl_id
  return df[df['target_id'] == target_id]

In [None]:
def prepare_train_valid_test_cnn_data(df, target_id, img_h = 200, img_w = 200):
  
  print(target_id + '\n')
  
  zip_path = '/content/img_path/' + target_id + '.zip'
  img_path = '/content/img_path/' + target_id + '/imgs/'


  if not os.path.exists(img_path): 
    unzip_img_data(zip_path,'/content/img_path/')

  datagen=ImageDataGenerator(validation_split=0.2)

  train_generator=datagen.flow_from_dataframe(
    dataframe=df[df['type'] != 'test'],
    directory=img_path,
    x_col="img_path",
    y_col="labels",
    subset="training",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="binary",
    target_size=(img_h,img_w))
  
  valid_generator=datagen.flow_from_dataframe(
    dataframe=df[df['type'] != 'test'],
    directory=img_path,
    x_col="img_path",
    y_col="labels",
    subset="validation",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="binary",
    target_size=(img_h,img_w))
  
  test_datagen=ImageDataGenerator()
  
  test_generator=test_datagen.flow_from_dataframe(
    dataframe=df[df['type'] == 'test'],
    directory=img_path,
    x_col="img_path",
    y_col="labels",
    batch_size=32,
    seed=42,
    shuffle=False,
    class_mode="binary",
    target_size=(img_h,img_w))
  
  return train_generator, valid_generator, test_generator

In [None]:
def pre_trained_cnn_model(name, img_height=200,img_width=200):
  # Configure the dataset for performance
  #AUTOTUNE = tf.data.AUTOTUNE

  #train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
  # train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
  # val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

  preprocess_input, base_model = pre_trained_cnn_model_selection(name)
  
  global_average_layer = tf.keras.layers.GlobalMaxPooling2D()

  prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid')

  base_model.trainable = False

  inputs = tf.keras.Input(shape=(img_height, img_width, 3))
  x = preprocess_input(inputs)
  x = base_model(x, training=False)
  x = global_average_layer(x)
  x = tf.keras.layers.Dropout(0.2)(x)

  outputs = prediction_layer(x)
  model = tf.keras.Model(inputs, outputs)

  return model


In [None]:
def pre_trained_cnn_model_selection(name, img_height=200,img_width=200):

  if 'vgg16' in name.lower():
    preprocess_input = tf.keras.applications.vgg16.preprocess_input

    base_model = tf.keras.applications.vgg16.VGG16(input_shape=(img_height, img_width, 3),
                                                  include_top=False,
                                                  weights='imagenet')
    return preprocess_input, base_model
    
  if 'inception' in name.lower():
    preprocess_input = tf.keras.applications.inception_v3.preprocess_input

    base_model = tf.keras.applications.inception_v3.InceptionV3(input_shape=(img_height, img_width, 3),
                                                  include_top=False,
                                                  weights='imagenet')
    return preprocess_input, base_model


    
  preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

  base_model = tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape=(img_height, img_width, 3),
                                              include_top=False,
                                              weights='imagenet')
  
  return preprocess_input, base_model
    
  
  

In [None]:
def compile_cnn_model(model, metric_list, base_rate = 0.0001):
  metrics = get_metrics(metric_list)

  model.compile(optimizer=tf.keras.optimizers.Adam(),
                loss=tf.keras.losses.BinaryCrossentropy(),
                metrics=metrics)
  return model

In [None]:
def get_metrics(metric_list):
  metric_list_lower = [i.lower() for i in metric_list]
  metrics = []
  if 'accuracy' in metric_list_lower :
    metrics.append('accuracy')
  if 'auc' in metric_list_lower :
    metrics.append(tf.keras.metrics.AUC(name='auc'))
  if 'f1_score' in metric_list_lower:
    metrics.append(tfa.metrics.F1Score(num_classes=1,threshold=0.5,
                                       name='f1_score'))
  
  return metrics


In [None]:
def callback_list(model, metric_list, target_id):

  metric_list_1st = metric_list[0].lower()

  if 'f1_score' in metric_list_1st:
    monitor_m = 'val_f1_score'
  elif 'accuracy' in metric_list_1st:
    monitor_m = 'val_accuracy'
  else:
    monitor_m = 'val_auc'

  # create model save folder such that it can be read
  model_name = 'model_init' + '_' + target_id + '/'

  if not os.path.exists(model_name):
    os.mkdir(model_name)

  if 'accuracy' in metric_list_1st:
    filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{accuracy:.5f}-{val_loss:.5f}-{val_accuracy:.5f}.h5'
  else:
    filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{auc:.5f}-{val_loss:.5f}-{val_auc:.5f}.h5'

  checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, 
                                                  monitor=monitor_m, 
                                                  verbose=1, 
                                                  save_best_only=True, 
                                                  save_weights_only=True, 
                                                  mode='max', 
                                                  save_freq='epoch')

  #LR = ReduceLROnPlateau(monitor='val_auc', factor=0.4, verbose=1, patience=3) # write the REducelronplateau code here

  es = tf.keras.callbacks.EarlyStopping(monitor=monitor_m,
                                        mode='max', 
                                        verbose=1, 
                                        patience=5)
  #callbacks_list = [checkpoint, LR, es]
  callbacks_list = [checkpoint, es]

  return callbacks_list


In [None]:
def cnn_model_training(epoch, model, train_ds, val_ds, callbacks_list):

  history = model.fit(train_ds,
                      epochs=epoch,
                      validation_data=val_ds,
                      callbacks=callbacks_list)
  return model, history

In [None]:
def load_weights(model, model_weights):
  model.load_weights(model_weights)
  return model

In [None]:
def log_metrics(model, ds):
  return model.evaluate(ds, verbose=1)

In [None]:
def predictions(model, df, target_id, img_height=200,img_width=200):
  data_list = []
  imgs_path = '/content/img_path/' + target_id + '/imgs/'

  for index, row in df.iterrows():
    img_path = os.path.join(imgs_path, row['img_path'])
    img = image.load_img(img_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img)
    img_batch = np.expand_dims(img_array, axis=0)
    #img_preprocessed = tf.keras.applications.mobilenet_v2.preprocess_input(img_batch)
    prediction = model.predict(img_batch)
    data_list.append((row['drug_id'],row['labels'],prediction[0][0]))

  df = pd.DataFrame(data_list,columns=['drug_id','label','y_pred_prob',])  

  return df

In [None]:
def label_encoding_smiles(df):

  CHARCANSMISET = { "#": 1, "%": 2, ")": 3, "(": 4, "+": 5, "-": 6, 
         ".": 7, "1": 8, "0": 9, "3": 10, "2": 11, "5": 12, 
         "4": 13, "7": 14, "6": 15, "9": 16, "8": 17, "=": 18, 
         "A": 19, "C": 20, "B": 21, "E": 22, "D": 23, "G": 24,
         "F": 25, "I": 26, "H": 27, "K": 28, "M": 29, "L": 30, 
         "O": 31, "N": 32, "P": 33, "S": 34, "R": 35, "U": 36, 
         "T": 37, "W": 38, "V": 39, "Y": 40, "[": 41, "Z": 42, 
         "]": 43, "_": 44, "a": 45, "c": 46, "b": 47, "e": 48, 
         "d": 49, "g": 50, "f": 51, "i": 52, "h": 53, "m": 54, 
         "l": 55, "o": 56, "n": 57, "s": 58, "r": 59, "u": 60,
         "t": 61, "y": 62, "/" : 63, "\\" : 64, "@":65}

  CHARCANSMILEN = 65

  def one_hot_sequence(line, MAX_SEQ_LEN = 80, smi_ch_ind = CHARCANSMISET):
    X = np.zeros((MAX_SEQ_LEN, len(smi_ch_ind))) 
    for i, ch in enumerate(line[:MAX_SEQ_LEN]):
        X[i,(smi_ch_ind[ch])-1] = 1

    return X.flatten() #.tolist()

  df['lbl_encoding'] = df.apply(
      lambda row : one_hot_sequence(row['canonical_smiles']), axis = 1)

  return df

  

  

In [None]:
def prepare_svm_data(df, cnn_predict_df,meta_learning=False):

  df.reset_index(drop=True,inplace=True)

  def f(row):
    if row['y_pred_prob'] <= 0.5 :
        val = 0
    else:
        val = 1
    return val

  def svm_cnn_date(df, cnn_predict_df):
    cnn_predict_df['y_pred'] = cnn_predict_df.apply(f,axis=1)
    return pd.merge(df, cnn_predict_df, left_on=['drug_id',], 
                          right_on=['drug_id'],how='inner')
    
  if meta_learning:
    final_svm_df = svm_cnn_date(df, cnn_predict_df)
    final_svm_df = pd.concat([pd.DataFrame(final_svm_df.lbl_encoding.values.tolist()), final_svm_df.y_pred, final_svm_df.labels],axis=1)
  else:
    final_svm_df = pd.concat([pd.DataFrame(df.lbl_encoding.values.tolist()), df.labels],axis=1)
  
  final_svm_df = final_svm_df.astype('uint8')

  return final_svm_df


In [None]:
def train_svm(svm_df, kernel_type = 'rbf'):
  X_train = svm_df.drop("labels", axis = 1)
  y_train = svm_df['labels']

  model = SVC(kernel=kernel_type, probability=True)
  model.fit(X_train, y_train)

  return model

In [None]:
def test_svm(model, svm_df_test, metrics):
  X_test = svm_df_test.drop("labels", axis = 1)
  y_test = svm_df_test['labels']

  y_pred = model.predict(X_test)

  if 'accuracy' in metrics:
    return round(accuracy_score(y_test,y_pred),2)
  else:
    roc_auc = round(roc_auc_score(y_test,y_pred),2) 
    pred_f1_score = round(f1_score(y_test,y_pred),2)
    return roc_auc, pred_f1_score

### Preparing data and running model

In [None]:
src_df = prepare_data()
# k = 100
# min_range = 1000
# max_range = 3000


## EDA

In [None]:
src_df['drug_id'].nunique()

In [None]:
json_df = pd.read_csv('/content/drive/MyDrive/ML-DTI/json_df.csv')

In [None]:
json_df['drug_id'].count()

In [None]:
409288/409311

In [None]:
target_count_df = src_df['target_id'].value_counts().rename_axis('target_id').reset_index(name='counts')

In [None]:
#labels = [0,1,2,3,4,5,6,7,8,9]
target_count_df['counts_bins'] = pd.cut(target_count_df['counts'], bins=[0,500,1000,3000,10000,40000], 
             include_lowest=False)

In [None]:
target_count_df.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
!pip install seaborn --upgrade

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})

In [None]:
sns.countplot(x='counts_bins', data=target_count_df)


In [None]:
ax = sns.countplot(x="counts_bins", data=target_count_df)

for p in ax.patches:
   ax.annotate(p.get_height(), (p.get_x()+0.25, p.get_height()+0.01))

ax.set_title("Compound count per category")
ax.set_xlabel('compounds count bins')
plt.show(ax)

### Class comparison for all data

In [None]:
#agg_df = src_df.groupby(['target_id','labels'])['drug_id'].count().rename_axis(['target_id','labels']).reset_index(name='counts')

In [None]:
#agg_df.head()

In [None]:
#sns.lineplot(x=agg_df.index,y='counts',hue='labels',hue_norm=(0,1),data=agg_df)

In [None]:
agg_df = src_df.groupby(['target_id'])['labels'].value_counts(normalize=True).mul(100).rename('percent').reset_index()
#.pipe((sns.lineplot,'data'), x=agg_df.index/2,y='percent',hue='labels',))

In [None]:
agg_df.head()

In [None]:
#cgfc_df.plot(x="id", y=["action", "comedy"])

In [None]:
#sns.lineplot(x=range(0,812),y='percent',hue='labels',data=cgfc_df)

In [None]:
x = agg_df.pivot(index='target_id',columns='labels',values='percent').reset_index()

In [None]:
x.columns.name = None

In [None]:
x.head()

In [None]:
x = x.assign(id=x.groupby(['target_id',]).ngroup())

In [None]:
x['diff'] = abs(x[0] - x[1])

In [None]:
x.describe()

In [None]:
(x['diff'] > 20).sum()

In [None]:
(x['diff'] == 0).sum()

In [None]:
x.head()

In [None]:
ax = x.plot(x="id", y=[0, 1],figsize=(20,10))
ax.set_ylabel('Percentage data distribution',fontsize = 12,position=(0.7, .5))
ax.set_xlabel('Target proteins',fontsize = 12, )
ax.set_title('Class comparison', fontsize = 20, position=(0.5, .95))
plt.show(ax)


In [None]:
#sns.set(rc={'figure.figsize':(11.7,8.27)})

In [None]:
# ax = sns.scatterplot(y = 'diff', x = x.index, data = x)
# ax.set_ylabel('Percentage difference')
# ax.set_xlabel('Target proteins')
# ax.set_title('Class comparison', fontsize = 20)
# plt.show(ax)

### filtered record

In [None]:
mask = src_df['target_id'].isin(l)

In [None]:
filtered_df = src_df.loc[mask].reset_index(drop=True)

In [None]:
filtered_df['target_id'].nunique()

In [None]:
agg_df = filtered_df.groupby(['target_id','labels'])['drug_id'].count().rename_axis(['target_id','labels']).reset_index(name='counts')

In [None]:
agg_df.head()

In [None]:
pivot_df = agg_df.pivot(index='target_id',columns='labels',values='counts').reset_index()

In [None]:
pivot_df.columns.name = None

In [None]:
pivot_df['diff'] = abs(pivot_df[0]/(pivot_df[0] + pivot_df[1]) - pivot_df[1]/(pivot_df[0] + pivot_df[1])) * 100

In [None]:
pivot_df.head()

In [None]:
sns.lineplot(x="id", y=["Percentage +ve", "Percentage -ve"], data = pivot_df)

In [None]:
pivot_df['Percentage +ve'] = (pivot_df[1]/(pivot_df[0] + pivot_df[1])) * 100

In [None]:
pivot_df['Percentage -ve'] = (pivot_df[0]/(pivot_df[0] + pivot_df[1])) * 100

In [None]:
pivot_df = pivot_df.assign(id=pivot_df.groupby(['target_id',]).ngroup())

In [None]:
pivot_df['diff'].describe()

In [None]:
(pivot_df['diff'] > 20).sum()

In [None]:
(pivot_df['diff'] == 0).sum()

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})

In [None]:
ax = sns.scatterplot(y = 'diff', x = pivot_df.index, data = pivot_df)
ax.set_ylabel('Percentage difference')
ax.set_xlabel('Target proteins')
ax.set_title('Class comparison', fontsize = 20)
plt.show(ax)

## SMILES EDA

In [None]:
src_df['smiles_length'] = src_df['canonical_smiles'].str.len()

In [None]:
src_df.head()

In [None]:
smiles_df = src_df[['drug_id','smiles_length']].drop_duplicates().reset_index(drop=True)

In [None]:
smiles_df.head()

In [None]:
smiles_df['smiles_length'].describe(percentiles=[.9,.92,.95])

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
ax = sns.boxplot(x=smiles_df["smiles_length"])

In [None]:
iqr = smiles_df['smiles_length'][smiles_df['smiles_length'].between(0, smiles_df['smiles_length'].quantile(.92), inclusive=True)]

In [None]:
ax = sns.boxplot(iqr)
ax.set_title('with 92% data', fontsize = 20, position=(0.88, .92))

In [None]:
fig, ax = plt.subplots(1, 2, sharex='col', sharey='row', figsize = (20,10))
# #plt1 = sns.boxplot(x=smiles_df["smiles_length"])
# plt2 = sns.boxplot(iqr)
# ax[0].boxplot(x=smiles_df["smiles_length"])
# ax[1].plot(plt2)

plt1 = sns.boxplot(  x=smiles_df["smiles_length"],  orient='v' , ax=ax[0])
plt2 = sns.boxplot(  x = iqr ,  orient='v', ax=ax[1])
plt1.set_title('100% data', fontsize = 20, position=(0.85, .93))
plt2.set_title('92% data', fontsize = 20, position=(0.85, .93))
plt.show(plt1)

## EDA ends

In [None]:
# target_count_df = src_df['target_id'].value_counts().rename_axis('Target_ID').reset_index(name='count')
# full_target_ids = target_count_df[(target_count_df['count'] > min_range) & (target_count_df['count'] < max_range)]['Target_ID'].tolist()
# target_ids = random.sample(full_target_ids, k)

In [None]:
l = ['CHEMBL223','CHEMBL3473','CHEMBL228','CHEMBL276','CHEMBL3568','CHEMBL1900','CHEMBL4822','CHEMBL1981','CHEMBL2069','CHEMBL3024','CHEMBL3231','CHEMBL2959','CHEMBL2742','CHEMBL1908389','CHEMBL4578','CHEMBL1785','CHEMBL1994','CHEMBL3286','CHEMBL4128','CHEMBL206','CHEMBL4308','CHEMBL257','CHEMBL2993','CHEMBL2039','CHEMBL2292','CHEMBL208','CHEMBL2581','CHEMBL1855','CHEMBL2028','CHEMBL6136','CHEMBL2413','CHEMBL3571','CHEMBL2722','CHEMBL2695','CHEMBL298','CHEMBL1821','CHEMBL213','CHEMBL2014','CHEMBL304','CHEMBL2001','CHEMBL3522','CHEMBL2949','CHEMBL1946','CHEMBL5147','CHEMBL3974','CHEMBL3920','CHEMBL1867','CHEMBL288','CHEMBL2016','CHEMBL3973','CHEMBL2598','CHEMBL3358','CHEMBL1835','CHEMBL1978','CHEMBL4588','CHEMBL216','CHEMBL2431','CHEMBL281','CHEMBL3553','CHEMBL1875','CHEMBL4204','CHEMBL2808','CHEMBL229','CHEMBL1936','CHEMBL331','CHEMBL3594','CHEMBL4794','CHEMBL2820','CHEMBL338','CHEMBL3045','CHEMBL2525','CHEMBL6164','CHEMBL3142','CHEMBL3649','CHEMBL5407','CHEMBL2035','CHEMBL1991','CHEMBL4908','CHEMBL2208','CHEMBL221','CHEMBL321','CHEMBL4422','CHEMBL3979','CHEMBL265','CHEMBL3976','CHEMBL3869','CHEMBL2047','CHEMBL335','CHEMBL2276','CHEMBL324','CHEMBL1801','CHEMBL231','CHEMBL308','CHEMBL3629','CHEMBL313']

In [None]:
l.index('CHEMBL2820')

In [None]:
l[55]

In [None]:
target_ids = l[56:]

In [None]:
len(target_ids)

In [None]:
model_name = 'inception'

In [None]:
metrics = ['accuracy']

In [None]:
start_time = time.time()
main(src_df, target_ids,model_name,metrics)
print("--- %s seconds ---" % (time.time() - start_time))

Hello