In [None]:
#Getting drug data from ChEMBL database
#This script connects to the ChEMBL database and retrieves information about drugs and their interactions with a specified protein target.

import importlib
import chembl_connect as cc
importlib.reload(cc)
import pandas as pd
import json 

df = pd.read_csv("drug.csv")
drug_names = df["Drug"].dropna().unique().tolist()

interaction = cc.main(drug_list= drug_names, protein_name="Dopamine transporter")

In [None]:
import similarity as sim
importlib.reload(sim)


drugs_df = pd.read_csv("drug_target_interactions.csv")

#create a new dataframe to store the drug names and their corresponding SMILES strings
drug_smiles= drugs_df[['drug_name', 'canonical_smiles']].dropna().drop_duplicates()

#generate Morgan fingerprints for each drug using the canonical SMILES strings
drug_smiles['fingerprint'] = drug_smiles['canonical_smiles'].apply(sim.get_morgan_fingerprint)
print(drug_smiles.head())

# calculates the Tanimoto similarity between the fingerprints of each drug and a reference drug 
results = []

for i in range(len(drug_smiles)): #select each frug one by one 
    drug1 = drug_smiles.iloc[i] #get the current drug row
    for j in range(len(drug_smiles)): #for each drug, loop through all drugs again to compare
        if i == j:
            continue # skip if it's the same drug
        drug2 = drug_smiles.iloc[j] #get the next drug row
        fp1 = drug1["fingerprint"] 
        fp2 = drug2["fingerprint"] 
        
        sim_score = sim.calculate_tanimoto_similarity(fp1, fp2) # calculate the similarity score between the two fingerprints
        results.append({
            'drug_1': drug1['drug_name'],
            'drug_2': drug2['drug_name'],
            'similarity': sim_score
        })

similarity_df = pd.DataFrame(results)
similarity_df.to_csv("pairwise_drug_similarity.csv", index=False)



                       drug_name                      canonical_smiles  \
0                Methylphenidate            COC(=O)C(c1ccccc1)C1CCCCN1   
4  Methylphenidate Hydrochloride         COC(=O)C(c1ccccc1)C1CCCCN1.Cl   
6                    Amphetamine                        CC(N)Cc1ccccc1   
7                    Atomoxetine                        CC(N)Cc1ccccc1   
8                      Modafinil  NC(=O)C[S+]([O-])C(c1ccccc1)c1ccccc1   

                                         fingerprint  
0  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
4  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
6  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
7  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
8  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
