In [1]:
import os

import numpy as np
import pandas as pd

import matplotlib.ticker as plticker
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
import matplotlib as mpl
import seaborn as sns

from sklearn.metrics import roc_curve, auc

from rdkit.Chem import AllChem as Chem
from rdkit import DataStructs

import warnings
warnings.filterwarnings("ignore")

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams["figure.dpi"] = 300
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['Arial']

In [3]:
#LBVS
def read_smis(smis_path):
    with open(smis_path) as f:
        smis = [l.rstrip() for l in f]
    return smis

def Compute_RocAuc(ligands, decoys, fptype="ECFP4"):   
    S_Ligand = read_smis(ligands)
    length_Ligand = len(S_Ligand)
    Suppl_Ligand = []
    for i in range(0,length_Ligand):
        m = Chem.MolFromSmiles(S_Ligand[i])
        if m:
            Suppl_Ligand.append(m)

    S_Decoy = read_smis(decoys)
    length_Decoy= len(S_Decoy)
    Suppl_Decoy = []
    for i in range(0,length_Decoy):
        m = Chem.MolFromSmiles(S_Decoy[i])
        if m:
            Suppl_Decoy.append(m)

    if fptype == "ECFP4":
        Fps_Ligand = [Chem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024) for x in Suppl_Ligand]
        Fps_Decoy  = [Chem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024) for x in Suppl_Decoy]
    else:
        Fps_Ligand = [Chem.GetMorganFingerprintAsBitVect(x, 3, useFeatures=True, nBits=1024) for x in Suppl_Ligand]
        Fps_Decoy  = [Chem.GetMorganFingerprintAsBitVect(x, 3, useFeatures=True, nBits=1024) for x in Suppl_Decoy]
        
    def sims_LL(t,r): 
        sims=DataStructs.FingerprintSimilarity(Fps_Ligand[t],Fps_Ligand[r])
        return sims
    def sims_LD(t,r): 
        sims=DataStructs.FingerprintSimilarity(Fps_Ligand[t],Fps_Decoy[r])
        return sims

    def Get_sims(k):
        Sims = []
        
        for i in range(0,length_Ligand):
            if i != k:
                Sims.append(1)
                Sims.append(sims_LL(k,i))     
        for j in range(0,length_Decoy):
            if not ((39*k<=j)&(j<39*(k+1))): 
                Sims.append(0)
                Sims.append(sims_LD(k,j))   
        return Sims

    def Get_Roc_Arg_sims(k):
        Sims_list = Get_sims(k)                                          
        data = np.array(Sims_list).reshape(-1,2)                      
        df_data = pd.DataFrame(data,columns = ['sort','sims'])           
        df = df_data.sort_values(by="sims" , ascending=False)
        sort = list(df['sort'])
        sims = list(df['sims'])

        fpr, tpr, thresholds  =  roc_curve(sort, sims)
        roc_auc = auc(fpr,tpr)

        return (fpr, tpr,roc_auc)

    Fpr_list_sims = []
    Tpr_list_sims = []
    Auc_list_sims = []

    for k in range(0,length_Ligand):
        Roc_Arg = Get_Roc_Arg_sims(k)  
        Fpr_list_sims.append(Roc_Arg[0])
        Tpr_list_sims.append(Roc_Arg[1])
        Auc_list_sims.append(Roc_Arg[2])
    aucs_np = np.array(Auc_list_sims)
    return Auc_list_sims, aucs_np.mean(), aucs_np.std()

def set_box_color(bp, color):
    for patch in bp['boxes']:
        patch.set_facecolor(color)
    plt.setp(bp['fliers'], markerfacecolor="black")
    plt.setp(bp['medians'], color='black')
rows_ecfp = []
rows_fcfp = []
lbvs_dir = os.path.join(os.getcwd(), "datasets_ext_val_SI_classical_VS", "LBVS")
cases = ["HIVRT", "HSP90A", "ESR1", "ESR2", "FAK1"]

deepcoy_ligand_dir = [os.path.join(it, "deepcoy/actives.smi") for it in os.scandir(lbvs_dir)]
deepcoy_inactive_dir = [os.path.join(it, "deepcoy/decoys.smi") for it in os.scandir(lbvs_dir)]

tocodecoy_ligand_dir = [os.path.join(it, "tocodecoy/actives.smi") for it in os.scandir(lbvs_dir)]
tocodecoy_decoy_dir = [os.path.join(it, "tocodecoy/decoys.smi") for it in os.scandir(lbvs_dir)]

ecfp4_deepcoy, ecfp4_tocodecoy = [], []
fcfp6_deepcoy, fcfp6_tocodecoy = [], []
for case in cases:
    for dir in deepcoy_ligand_dir:
        if case in dir:
            single_deepcoy_ligands = dir
    for dir in deepcoy_inactive_dir:
        if case in dir:
            single_deepcoy_inactives = dir
    for dir in tocodecoy_ligand_dir:
        if case in dir:
            single_tocodecoy_ligands = dir
    for dir in tocodecoy_decoy_dir:
        if case in dir:
            single_tocodecoy_decoys = dir

    ecfp4_deepcoy_l, deepcoy_m, deepcoy_s = Compute_RocAuc(single_deepcoy_ligands, single_deepcoy_inactives)
    ecfp4_tocodecoy_l, tocodecoy_m, tocodecoy_s = Compute_RocAuc(single_tocodecoy_ligands, single_tocodecoy_decoys)

    ecfp4_deepcoy.append(ecfp4_deepcoy_l)
    ecfp4_tocodecoy.append(ecfp4_tocodecoy_l)
    rows_ecfp.append({"Case":case,
                    "deepcoy_mean":deepcoy_m, "deepcoy_s":deepcoy_s,
                    "tocodecoy_mean":tocodecoy_m, "tocodecoy_s":tocodecoy_s})

    fcfp6_deepcoy_l, deepcoy_m, deepcoy_s = Compute_RocAuc(single_deepcoy_ligands, single_deepcoy_inactives, "FCFP6")
    fcfp6_tocodecoy_l, tocodecoy_m, tocodecoy_s = Compute_RocAuc(single_tocodecoy_ligands, single_tocodecoy_decoys, "FCFP6")

    fcfp6_deepcoy.append(fcfp6_deepcoy_l)
    fcfp6_tocodecoy.append(fcfp6_tocodecoy_l)
    rows_fcfp.append({"Case":case,
                    "deepcoy_mean":deepcoy_m, "deepcoy_s":deepcoy_s,
                    "tocodecoy_mean":tocodecoy_m, "tocodecoy_s":tocodecoy_s})

df_ecfp = pd.DataFrame(rows_ecfp)
df_ecfp.to_csv("LBVS_SI_table_ecfp4.csv", index=False)
df_fcfp = pd.DataFrame(rows_fcfp)
df_fcfp.to_csv("LBVS_SI_table_fcfp6.csv", index=False)

In [5]:
#SBVS
def read_score(file_path):
    with open(file_path) as f:
        lines = f.readlines()
 
        content = [line.rstrip() for line in lines]
        dic = {}
        
        for i, con_ in enumerate(content):
            if ('CHEMBL' in con_) or ('ZINC' in con_) or ('active' in con_) or ('decoy' in con_):
                if (con_ not in dic):
                    dic[con_] = 9999
                idx = con_

            if con_ == '> <minimizedAffinity>':
                score = float(content[i+1])
                if score < dic[idx]:
                    dic[idx] = score

    Dock = []
    if file_path.name.count('actives'):
        for val in dic.values():
            Dock.append(1)
            Dock.append(val)
    else:
        for val in dic.values():
            Dock.append(0)
            Dock.append(val)

    return Dock

def get_args(Dock_l):
    data = np.array(Dock_l).reshape(-1,2)
    df_data = pd.DataFrame(data,columns = ['type','dock'])
    df = df_data.sort_values(by="dock" , ascending=True)
    sort = list(df['type'])
    dock = list(df['dock'])

    fpr, tpr, thresholds  =  roc_curve(sort, dock, pos_label=0)
    roc_auc = auc(fpr,tpr)

    return fpr,tpr,roc_auc

sbvs_dir = os.path.join(os.getcwd(), "datasets_ext_val_SI_classical_VS", "SBVS")
cases = ["HIVRT", "HSP90A", "ESR1", "ESR2", "FAK1"]

rows = []
for i, case in enumerate(cases):
    for it in os.scandir(sbvs_dir):
        if case in it.name:
            case_dir = it
    for sdf in os.scandir(case_dir):
        if sdf.name == "smina_out_actives_deepcoy.sdf":
            deepcoy_actives_dir = sdf
        elif sdf.name == "smina_out_decoys_deepcoy.sdf":            
            deepcoy_decoys_dir = sdf
        elif sdf.name == "smina_out_actives_tocodecoy.sdf":            
            tocodecoy_actives_dir = sdf
        else:
            tocodecoy_decoys_dir = sdf

    deepcoy_actives_scores = read_score(deepcoy_actives_dir)
    deepcoy_decoys_scores = read_score(deepcoy_decoys_dir)
    deepcoy_actives_scores.extend(deepcoy_decoys_scores)
    fpr_deepcoy, tpr_deepcoy, roc_auc_deepcoy = get_args(deepcoy_actives_scores)

    tocodecoy_actives_scores = read_score(tocodecoy_actives_dir)
    tocodecoy_decoys_scores = read_score(tocodecoy_decoys_dir)
    tocodecoy_actives_scores.extend(tocodecoy_decoys_scores)
    fpr_tocodecoy, tpr_tocodecoy, roc_auc_tocodecoy = get_args(tocodecoy_actives_scores)

    rows.append({"Case":case, "deepcoy":roc_auc_deepcoy,
                    "tocodecoy":roc_auc_tocodecoy})
df_smina = pd.DataFrame(rows)
df_smina.to_csv("SBVS_SI_table_smina.csv", index=False)