In [None]:
import os

import numpy as np
import pandas as pd

import matplotlib.ticker as plticker
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

from sklearn.metrics import roc_curve, auc

from rdkit.Chem import AllChem as Chem
from rdkit import DataStructs

import warnings
warnings.filterwarnings("ignore")

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams["figure.dpi"] = 300
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['Arial']

In [None]:
#LBVS
def Compute_RocAuc(Diverse_ligands_PS, Final_decoys, fptype="ECFP4"):   
   
    df_Ligand = pd.read_csv(Diverse_ligands_PS)
    df_Decoy  = pd.read_csv(Final_decoys)
    length_Ligand = len(df_Ligand)
    length_Decoy= len(df_Decoy)

    S_Ligand = list(df_Ligand['SMILES'])
    Suppl_Ligand = []
    for i in range(0,length_Ligand):
        m = Chem.MolFromSmiles(S_Ligand[i])
        Suppl_Ligand.append(m)

    S_Decoy = list(df_Decoy['SMILES'])
    Suppl_Decoy = []
    for i in range(0,length_Decoy):
        m = Chem.MolFromSmiles(S_Decoy[i])
        Suppl_Decoy.append(m)

    if fptype == "ECFP4":
        Fps_Ligand = [Chem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024) for x in Suppl_Ligand]
        Fps_Decoy  = [Chem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024) for x in Suppl_Decoy]
    else:
        Fps_Ligand = [Chem.GetMorganFingerprintAsBitVect(x, 3, useFeatures=True, nBits=1024) for x in Suppl_Ligand]
        Fps_Decoy  = [Chem.GetMorganFingerprintAsBitVect(x, 3, useFeatures=True, nBits=1024) for x in Suppl_Decoy]
        
    def sims_LL(t,r): 
        sims=DataStructs.FingerprintSimilarity(Fps_Ligand[t],Fps_Ligand[r])
        return sims
    def sims_LD(t,r): 
        sims=DataStructs.FingerprintSimilarity(Fps_Ligand[t],Fps_Decoy[r])
        return sims

    def Get_sims(k):
        Sims = []
        
        for i in range(0,length_Ligand):
            if i != k:
                Sims.append(1)
                Sims.append(sims_LL(k,i))    
        for j in range(0,length_Decoy):
            if not ((39*k<=j)&(j<39*(k+1))): 
                Sims.append(0)
                Sims.append(sims_LD(k,j))   
        return Sims

    def Get_Roc_Arg_sims(k):
        Sims_list = Get_sims(k)                                         
        data = np.array(Sims_list).reshape(-1,2)                      
        df_data = pd.DataFrame(data,columns = ['sort','sims'])           
        df = df_data.sort_values(by="sims" , ascending=False)        
        sort = list(df['sort'])
        sims = list(df['sims'])

        fpr, tpr, thresholds  =  roc_curve(sort, sims)
        roc_auc = auc(fpr,tpr)

        return (fpr, tpr,roc_auc)

    Fpr_list_sims = []
    Tpr_list_sims = []
    Auc_list_sims = []

    for k in range(0,length_Ligand):
        Roc_Arg = Get_Roc_Arg_sims(k)  
        Fpr_list_sims.append(Roc_Arg[0])
        Tpr_list_sims.append(Roc_Arg[1])
        Auc_list_sims.append(Roc_Arg[2])

    return Auc_list_sims

def set_box_color(bp, color):
    for patch in bp['boxes']:
        patch.set_facecolor(color)
    plt.setp(bp['fliers'], markerfacecolor="black")
    plt.setp(bp['medians'], color='black')

lbvs_dir = os.path.join(os.getcwd(), "datasets_ext_val_classical_VS", "LBVS")
cases = ["HIVRT", "HSP90A", "ESR1", "ESR2", "FAK1"]
MUBD_ligand_dir = [os.path.join(it, "MUBDreal/Diverse_ligands_PS.csv") for it in os.scandir(lbvs_dir)]
MUBDreal_decoy_dir = [os.path.join(it, "MUBDreal/Final_decoys.csv") for it in os.scandir(lbvs_dir)]
MUBDsyn_decoy_dir = [os.path.join(it, "MUBDsyn/Final_decoys.csv") for it in os.scandir(lbvs_dir)]

MUV_ligand_dir = [os.path.join(it, "MUV/Diverse_ligands_PS.csv") for it in os.scandir(lbvs_dir)]
MUV_inactive_dir = [os.path.join(it, "MUV/Final_decoys.csv") for it in os.scandir(lbvs_dir)]

DUDE_ligand_dir = [os.path.join(it, "DUDE/Diverse_ligands_PS.csv") for it in os.scandir(lbvs_dir)]
DUDE_decoy_dir = [os.path.join(it, "DUDE/Final_decoys.csv") for it in os.scandir(lbvs_dir)]

ecfp4_MUBDreal, ecfp4_MUBDsyn, ecfp4_MUV, ecfp4_DUDE = [], [], [], []
fcfp6_MUBDreal, fcfp6_MUBDsyn, fcfp6_MUV, fcfp6_DUDE = [], [], [], []
for case in cases:
    for dir in MUBD_ligand_dir:
        if case in dir:
            single_ligands = dir
    for dir in MUBDreal_decoy_dir:
        if case in dir:
            single_real_decoys = dir
    for dir in MUBDsyn_decoy_dir:
        if case in dir:
            single_syn_decoys = dir
    for dir in MUV_ligand_dir:
        if case in dir:
            single_MUV_ligands = dir
    for dir in MUV_inactive_dir:
        if case in dir:
            single_MUV_inactives = dir
    for dir in DUDE_ligand_dir:
        if case in dir:
            single_DUDE_ligands = dir
    for dir in DUDE_decoy_dir:
        if case in dir:
            single_DUDE_decoys = dir
            
    ecfp4_MUBDreal.append(Compute_RocAuc(single_ligands, single_real_decoys))
    ecfp4_MUBDsyn.append(Compute_RocAuc(single_ligands, single_syn_decoys))
    ecfp4_MUV.append(Compute_RocAuc(single_MUV_ligands, single_MUV_inactives))
    ecfp4_DUDE.append(Compute_RocAuc(single_DUDE_ligands, single_DUDE_decoys))

    fcfp6_MUBDreal.append(Compute_RocAuc(single_ligands, single_real_decoys, "FCFP6"))
    fcfp6_MUBDsyn.append(Compute_RocAuc(single_ligands, single_syn_decoys, "FCFP6"))
    fcfp6_MUV.append(Compute_RocAuc(single_MUV_ligands, single_MUV_inactives, "FCFP6"))
    fcfp6_DUDE.append(Compute_RocAuc(single_DUDE_ligands, single_DUDE_decoys, "FCFP6"))

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(28, 7))
#ECFP4
line1 = ax1.axhline(y=0.5, ls=(0, (5,2.5)), c="black", linewidth=2.5)

ecfp4_real_bp = ax1.boxplot(ecfp4_MUBDreal, positions=np.array(range(len(ecfp4_MUBDreal)))*3.6-1.4, sym='k.', widths=0.6, patch_artist=True,)
ecfp4_syn_bp = ax1.boxplot(ecfp4_MUBDsyn, positions=np.array(range(len(ecfp4_MUBDsyn)))*3.6-0.8, sym='k.', widths=0.6,patch_artist=True,)
ecfp4_MUV_bp = ax1.boxplot(ecfp4_MUV, positions=np.array(range(len(ecfp4_MUV)))*3.6-0.2, sym='k.', widths=0.6,patch_artist=True,)
ecfp4_DUDE_bp = ax1.boxplot(ecfp4_DUDE, positions=np.array(range(len(ecfp4_DUDE)))*3.6+0.4, sym='k.', widths=0.6,patch_artist=True,)

set_box_color(ecfp4_real_bp, 'royalblue') 
set_box_color(ecfp4_syn_bp, 'indianred')
set_box_color(ecfp4_MUV_bp, 'violet') 
set_box_color(ecfp4_DUDE_bp, 'lime')

ax1.set_xticks([-0.5, 3.1, 6.7, 10.3, 13.9])
ax1.set_xticklabels(cases)
ax1.set_yticks([0.2,.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0])
ax1.yaxis.set_major_formatter(plticker.FormatStrFormatter('%.2f'))
ax1.set_xlim(-2, len(cases)*3.08)
ax1.set_ylim(0.2, 1.0)
ax1.tick_params(labelsize=26)
ax1.set_ylabel("AUC", size="26")
ax1.set_xlabel("Case", size="26")
ax1.set_title("ECFP_4", size="26")

#FCFP6
line2 = ax2.axhline(y=0.5, ls=(0, (5,2.5)), c="black", linewidth=2.5)

fcfp6_real_bp = ax2.boxplot(fcfp6_MUBDreal, positions=np.array(range(len(fcfp6_MUBDreal)))*3.6-1.4, sym='k.', widths=0.6, patch_artist=True,)
fcfp6_syn_bp = ax2.boxplot(fcfp6_MUBDsyn, positions=np.array(range(len(fcfp6_MUBDsyn)))*3.6-0.8, sym='k.', widths=0.6,patch_artist=True,)
fcfp6_MUV_bp = ax2.boxplot(fcfp6_MUV, positions=np.array(range(len(fcfp6_MUV)))*3.6-0.2, sym='k.', widths=0.6,patch_artist=True,)
fcfp6_DUDE_bp = ax2.boxplot(fcfp6_DUDE, positions=np.array(range(len(fcfp6_DUDE)))*3.6+0.4, sym='k.', widths=0.6,patch_artist=True,)

set_box_color(fcfp6_real_bp, 'royalblue') 
set_box_color(fcfp6_syn_bp, 'indianred')
set_box_color(fcfp6_MUV_bp, 'violet') 
set_box_color(fcfp6_DUDE_bp, 'lime')

ax2.set_xticks([-0.5, 3.1, 6.7, 10.3, 13.9])
ax2.set_xticklabels(cases)
ax2.set_yticks([0.2,.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0])
ax2.yaxis.set_major_formatter(plticker.FormatStrFormatter('%.2f'))
ax2.set_xlim(-2, len(cases)*3.08)
ax2.set_ylim(0.2, 1.0)
ax2.tick_params(labelsize=26)
ax2.set_xlabel("Case", size="26")
ax2.set_title("FCFP_6", size="26")

labels = ['Random distribution', '$\mathdefault{MUBD^{real}}$', '$\mathdefault{MUBD^{syn}}$',
        "MUV", "DUD-E"]
fig.legend((line2, fcfp6_real_bp["boxes"][0], fcfp6_syn_bp["boxes"][0],
         fcfp6_MUV_bp["boxes"][0], fcfp6_DUDE_bp["boxes"][0]), labels)
sns.move_legend(fig, "upper center",bbox_to_anchor=(.5, 1.1), ncol=5, 
                title=None, frameon=False, prop={"size":26}, markerscale=3)
fig.savefig("LBVS.pdf", transparent=True, bbox_inches='tight')

In [None]:
#SBVS
def read_score(file_path):
    with open(file_path) as f:
        lines = f.readlines()
 
        content = [line.rstrip() for line in lines]
        dic = {}
        
        for i, con_ in enumerate(content):
            if ('CHEMBL' in con_) or ('ZINC' in con_) or ('active' in con_) or ('decoy' in con_):
                if (con_ not in dic):
                    dic[con_] = 9999
                idx = con_

            if con_ == '> <minimizedAffinity>':
                score = float(content[i+1])
                if score < dic[idx]:
                    dic[idx] = score

    Dock = []
    if file_path.name.count('actives'):
        for val in dic.values():
            Dock.append(1)
            Dock.append(val)
    else:
        for val in dic.values():
            Dock.append(0)
            Dock.append(val)

    return Dock

def get_args(Dock_l):
    data = np.array(Dock_l).reshape(-1,2)
    df_data = pd.DataFrame(data,columns = ['type','dock'])
    df = df_data.sort_values(by="dock" , ascending=True)
    sort = list(df['type'])
    dock = list(df['dock'])

    fpr, tpr, thresholds  =  roc_curve(sort, dock, pos_label=0)
    roc_auc = auc(fpr,tpr)

    return fpr,tpr,roc_auc

sbvs_dir = os.path.join(os.getcwd(), "datasets_ext_val_classical_VS", "SBVS")
cases = ["HIVRT", "HSP90A", "ESR1", "ESR2", "FAK1"]
fig, axes = plt.subplots(1, 5, figsize=(28, 7))

for i, case in enumerate(cases):
    for it in os.scandir(sbvs_dir):
        if case in it.name:
            case_dir = it
    for sdf in os.scandir(case_dir):
        if sdf.name == "smina_out_actives_DUDE.sdf":
            DUDE_actives_dir = sdf
        elif sdf.name == "smina_out_decoys_DUDE.sdf":
            DUDE_decoys_dir = sdf
        elif sdf.name == "smina_out_actives_MUV.sdf":
            MUV_actives_dir = sdf
        elif sdf.name == "smina_out_decoys_MUV.sdf":
            MUV_decoys_dir = sdf
        elif sdf.name == "smina_out_actives_MUBD.sdf":
            MUBD_actives_dir = sdf
        elif sdf.name == "smina_out_decoys_MUBDreal.sdf":
            MUBDreal_decoys_dir = sdf
        else:
            MUBDsyn_decoys_dir = sdf

    DUDE_actives_scores = read_score(DUDE_actives_dir)
    DUDE_decoys_scores = read_score(DUDE_decoys_dir)
    DUDE_actives_scores.extend(DUDE_decoys_scores)
    fpr_DUDE, tpr_DUDE, roc_auc_DUDE = get_args(DUDE_actives_scores)

    MUV_actives_scores = read_score(MUV_actives_dir)
    MUV_decoys_scores = read_score(MUV_decoys_dir)
    MUV_actives_scores.extend(MUV_decoys_scores)
    fpr_MUV, tpr_MUV, roc_auc_MUV = get_args(MUV_actives_scores)

    MUBD_actives_scores = read_score(MUBD_actives_dir)
    MUBDreal_decoys_scores = read_score(MUBDreal_decoys_dir)
    MUBD_actives_scores.extend(MUBDreal_decoys_scores)
    fpr_MUBDreal, tpr_MUBDreal, roc_auc_MUBDreal = get_args(MUBD_actives_scores)

    MUBD_actives_scores = read_score(MUBD_actives_dir)
    MUBDsyn_decoys_scores = read_score(MUBDsyn_decoys_dir)
    MUBD_actives_scores.extend(MUBDsyn_decoys_scores)
    fpr_MUBDsyn, tpr_MUBDsyn, roc_auc_MUBDsyn = get_args(MUBD_actives_scores)

    axes[i].set_title(case, fontsize=26)  
    axes[i].set_xlim(0.0, 1.0)
    axes[i].set_ylim(0.0, 1.05)
    line1 = axes[i].plot([0, 1], [0, 1], color=('k'), lw=2.5, ls=(0, (5,2.5)))
    smina_real = axes[i].plot(fpr_MUBDreal, tpr_MUBDreal, color='royalblue', lw=3) 
    smina_syn = axes[i].plot(fpr_MUBDsyn, tpr_MUBDsyn, color='indianred', lw=3)
    smina_MUV = axes[i].plot(fpr_MUV, tpr_MUV, color='violet', lw=3)
    smina_DUDE = axes[i].plot(fpr_DUDE, tpr_DUDE, color='lime', lw=3)

    axes[i].set_xticks([0.0,.2,0.4,0.6,0.8,1.0])  
    axes[i].set_yticks([0.0,.2,0.4,0.6,0.8,1.0])
    plt.setp(axes[i].get_xticklabels(), rotation=45)
    axes[i].tick_params(labelsize=26)
    
axes[0].set_ylabel("True positive rate", size=26)
axes[2].set_xlabel("False positive rate", size=26)

labels = ['Random distribution', '$\mathdefault{MUBD^{real}}$', '$\mathdefault{MUBD^{syn}}$',
        "MUV", "DUD-E"]

fig.legend((line1[0], smina_real[0], smina_syn[0], smina_MUV[0], smina_DUDE[0]), labels)
sns.move_legend(fig, "upper center",bbox_to_anchor=(.5, 0.9), ncol=5, 
                title=None, frameon=False, prop={"size":26}, markerscale=3)

for i in range(5):
    box = axes[i].get_position()
    axes[i].set_position([box.x0, box.y0, box.width, box.height*0.7])
fig.savefig("SBVS.pdf", transparent=True, bbox_inches='tight')