In [None]:
# in order for the code to function properly, the file "datasetlog10.txt" and the folder "models"
# need to be placed in the same location as the jupyter notebook

In [None]:
# install the required dependencies
!pip install DeepPurpose
!pip install rdkit
!pip install git+https://github.com/bp-kelley/descriptastorus
!pip install pandas-flavor

In [None]:
# import required libraries
from DeepPurpose import utils, dataset
from DeepPurpose import DTI as models
import warnings
import pandas as pd
warnings.filterwarnings("ignore")

In [None]:
# load the training dataset
X_drugs, X_targets, y = dataset.read_file_training_dataset_drug_target_pairs('datasetlog10.txt')

In [None]:
# specify the drug and target encoders
drug_encoding, target_encoding = 'Transformer', 'CNN'

In [None]:
# load model
model = models.model_pretrained(path_dir = 'model/')

In [None]:
# single drug-target prediction
X_drug = ['N#C[C@@H]1CSCN1C(=O)CNC(=O)c1ccnc2ccc(N3CC(F)(C4CC4)C3)cc12']
X_target = ['MKTWVKIVFGVATSAVLALLVMCIVLRPSRVHNSEENTMRALTLKDILNGTFSYKTFFPNWISGQEYLHQSADNNIVLYNIETGQSYTILSNRTMKSVNASNYGLSPDRQFVYLESDYSKLWRYSYTATYYIYDLSNGEFVRGNELPRPIQYLCWSPVGSKLAYVYQNNIYLKQRPGDPPFQITFNGRENKIFNGIPDWVYEEEMLATKYALWWSPNGKFLAYAEFNDTDIPVIAYSYYGDEQYPRTINIPYPKAGAKNPVVRIFIIDTTYPAYVGPQEVPVPAMIASSDYYFSWLTWVTDERVCLQWLKRVQNVSVLSICDFREDWQTWDCPKTQEHIEESRTGWAGGFFVSTPVFSYDAISYYKIFSDKDGYKHIHYIKDTVENAIQITSGKWEAINIFRVTQDSLFYSSNEFEEYPGRRNIYRISIGSYPPSKKCVTCHLRKERCQYYTASFSDYAKYYALVCYGPGIPISTLHDGRTDQEIKILEENKELENALKNIQLPKEEIKKLEVDEITLWYKMILPPQFDRSKKYPLLIQVYGGPCSQSVRSVFAVNWISYLASKEGMVIALVDGRGTAFQGDKLLYAVYRKLGVYEVEDQITAVRKFIEMGFIDEKRIAIWGWSYGGYVSSLALASGTGLFKCGIAVAPVSSWEYYASVYTERFMGLPTKDDNLEHYKNSTVMARAEYFRNVDYLLIHGTADDNVHFQNSAQIAKALVNAQVDFQAMWYSDQNHGLSGLSTNHLYTHMTHFLKQCFSLSD']

X_pred = utils.data_process(X_drug, X_target, y, 
                                drug_encoding, target_encoding, 
                                split_method='no_split')
y_pred = model.predict(X_pred)
print('The predicted score is ' + str(y_pred))

In [None]:
# bulk drug-target prediction
# input file should be in csv format
# 1st column contains SMILES sequence of drug
# 2nd column contains amino acid sequence of target

csv_file = 'input.csv' 
data = pd.read_csv(csv_file, header=None)  # No headers in the CSV

# extract SMILES and protein sequences from the first two columns
X_drug = data[0].tolist()  # first column: SMILES
X_target = data[1].tolist()  # second column: protein sequences

# list to hold predictions
predictions = []

# loop through each drug-target pair
for i in range(len(X_drug)):
    drug = X_drug[i]
    target = X_target[i]
    
    # process the input data for prediction
    X_pred = utils.data_process([drug], [target], y, 
                                drug_encoding, target_encoding, 
                                split_method='no_split')
    
    # predict the binding affinity
    y_pred = model.predict(X_pred)
    predictions.append(y_pred[0])  # Assuming the prediction is a single value

# add the predictions as a new column to the dataframe (third column)
data[2] = predictions  # add predictions to the third column

# save the updated dataframe to a new CSV file with predictions
output_file = 'output.csv'
data.to_csv(output_file, index=False, header=False)

print(f'Predictions saved to {output_file}')