In [None]:
%tensorflow_version 1.x

!git clone https://github.com/oguuzhansahin/DeepConv-DTI

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

from rdkit.Chem import AllChem as Chem

!python ../data_collector.py

dataset_path = Path("/benchmarks/AllDB")

Path("tmp").mkdir(exist_ok=True)
data = pd.read_csv(dataset_path / "data.csv") #dataset_path buraya dataset klasörü verilcek.

idx2protein = dict(enumerate(data["Target Sequence"].astype("category").cat.categories))
idx2smile = dict(enumerate(data["SMILES"].astype("category").cat.categories))

#train_indices, val_indices, test_indices, bunları belirlemen gerek.
smile_idx ={seq: idx for idx, seq in idx2smile.items()}
protein_idx ={seq: idx for idx, seq in idx2protein.items()}

train_indices = np.fromfile(str(dataset_path / "train_indices.bin"), dtype=int)
val_indices = np.fromfile(str(dataset_path / "val_indices.bin"), dtype=int)
test_indices = np.fromfile(str(dataset_path / "test_indices.bin"), dtype=int)

def get_morgan_fingerprint(x):
  try:
    rep = Chem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(x), radius=2, nBits=2048)

  except:
    return None
  
  else:
    return rep
  
data['Protein_ID'] = data["Target Sequence"].map(protein_idx)
data["Compound_ID"] = data.SMILES.map(smile_idx)
data["Label"] = data["Label"].apply(int)
data["morgan_fp_r1"] = data.SMILES.apply(get_morgan_fingerprint)
data = data.drop(index=data.index[data["morgan_fp_r1"].isna()])#.reset_index(drop=True)

train_indices = np.intersect1d(train_indices, data.index.values)
val_indices = np.intersect1d(train_indices, data.index.values)
test_indices = np.intersect1d(train_indices, data.index.values)

data["Sequence"] = data["Target Sequence"]

for idx, file_name in zip([train_indices, val_indices, test_indices], ["train", "val", "test"]):
    data.loc[idx, ['Compound_ID','Protein_ID','Label']].to_csv(Path("tmp") / (file_name + "_dti" + ".csv"))
    data.loc[idx, ['Compound_ID','SMILES','morgan_fp_r1']].to_csv(Path("tmp") / (file_name + "_compound" + ".csv"))
    data.loc[idx, ['Protein_ID', 'Sequence']].to_csv(Path("tmp") / (file_name + "_protein" + ".csv"))

In [None]:
!python3 /content/DeepConv-DTI/DeepConvDTI.py /content/tmp/train_dti.csv /content/tmp/train_compound.csv /content/tmp/train_protein.csv --validation -n validation_dataset -i /content/tmp/val_dti.csv -d /content/tmp/val_compound.csv -t /content/tmp/val_protein.csv -W -c 512 128 -w 10 15 20 25 30 -p 128 -f 128 -r 0.0001 -n 30 -v Convolution -l 2500 -L 2048 -D 0 -a elu -F 128 -b 32 -y 0.0001 -o ./validation_output.csv -m ./model.model -e 1