# Comparison of performance of SP and XP

In [1]:
from benchmark_new import *
%matplotlib inline

In [2]:
def get_data(similarity, blacklist):
    helpers = 'pdb'
    mcss = '../../../mcss_sizes.pkl'
    data = pd.concat(load(version, helpers, mcss) for version in ['stats19', 'stats20'])
    data = add_correct(data, thresh = 2.0)
    data = filter_to_ubiquitous_ligands(data)
    
    mask = data.index.get_level_values('protein') != None
    for target in blacklist:
        mask *= data.index.get_level_values('protein') != target
    data = data[mask]
    data = data[similarity(data.mcss)]

    ligand = data.xs((helpers, 'standard', 0, 'mcss_contact_hbond_sb', 1.0),
                      level=('helpers', 'mode', 'n_ligs', 'features', 'alpha')).filter(regex='rmsd|correct')
    target = ligand.groupby(level=list(range(len(ligand.index.levels)-1))).mean().dropna()
    family_ligand = ligand.groupby(level=list(range(len(ligand.index.levels)-2))).mean()
    family_target = target.groupby(level=list(range(len(target.index.levels)-1))).mean()
    
    return ligand, target, family_ligand, family_target
    
def overall_scores(ligand, target, family_ligand, family_target, ligands):
    metrics = []
    for metric, df in [('Ligand', ligand.groupby('version').mean()),
                       ('Target', target.groupby('version').mean()),
                       ('Drug-target', drug_average(family_target)),
                       ('Drug-ligand', drug_average(family_ligand))]:
        df['metric'] = metric
        df['ligands'] = ligands
        df['version'] = df.index.get_level_values('version')
        df = df.set_index(['ligands', 'metric', 'version',])
        metrics += [df.filter(regex='correct')]
    return pd.concat(metrics)

def highlight_SP_best(x):
    c = 'background-color: green;'
    m = x.groupby(level=list(range(len(x.index.names)-1)))['glide_correct'].transform('max').eq(x['glide_correct'])
    m *= x.index.get_level_values('version') == 'stats19'
    df = pd.DataFrame('', index=x.index, columns=x.columns)
    df.loc[m] += c
    return df

In [3]:
metrics = []
for slabel, sfunc in  [('All', lambda x: x > -1),
                       ('<0.5', lambda x: x < 0.5),
                       ('<0.4', lambda x: x < 0.4),
                       ('>=0.5', lambda x: x >= 0.5),
                       ('>=0.4', lambda x: x >= 0.4)]:
    for blabel, blacklist in [('All', []), ('A2AR-VDR', ['A2AR', 'VDR']), ('A2AR', ['A2AR']), ('A2AR-TRPV1', ['A2AR', 'TRPV1'])]:
        label = '{} & {}'.format(slabel, blabel)
        ligand, target, family_ligand, family_target = get_data(sfunc, blacklist)
        metrics += [overall_scores(ligand, target, family_ligand, family_target, label)]

In [4]:
pd.concat(metrics).style.apply(highlight_SP_best, axis = None)

  .format(op=op_str, alt_op=unsupported[op_str]))


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,combind_correct,glide_correct,best_correct
ligands,metric,version,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All & All,Ligand,stats19,0.506718,0.477927,0.737044
All & All,Ligand,stats20,0.49904,0.47025,0.585413
All & All,Target,stats19,0.507685,0.463182,0.759462
All & All,Target,stats20,0.510532,0.484192,0.600042
All & All,Drug-target,stats19,0.52285,0.4552,0.795768
All & All,Drug-target,stats20,0.550707,0.516708,0.646542
All & All,Drug-ligand,stats19,0.513691,0.435017,0.783792
All & All,Drug-ligand,stats20,0.543812,0.499814,0.647532
All & A2AR-VDR,Ligand,stats19,0.502066,0.469008,0.727273
All & A2AR-VDR,Ligand,stats20,0.483471,0.454545,0.572314


In [5]:
df = pd.concat(metrics)
df = df[df.index.get_level_values('metric') == 'Drug-target']
df.style.apply(highlight_SP_best, axis = None)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,combind_correct,glide_correct,best_correct
ligands,metric,version,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All & All,Drug-target,stats19,0.52285,0.4552,0.795768
All & All,Drug-target,stats20,0.550707,0.516708,0.646542
All & A2AR-VDR,Drug-target,stats19,0.533856,0.45621,0.800032
All & A2AR-VDR,Drug-target,stats20,0.556377,0.520935,0.654854
All & A2AR,Drug-target,stats19,0.539253,0.461707,0.803741
All & A2AR,Drug-target,stats20,0.561086,0.525265,0.659842
All & A2AR-TRPV1,Drug-target,stats19,0.516031,0.436176,0.790546
All & A2AR-TRPV1,Drug-target,stats20,0.580812,0.543342,0.684384
<0.5 & All,Drug-target,stats19,0.466518,0.419055,0.780444
<0.5 & All,Drug-target,stats20,0.465881,0.424911,0.570298


In [6]:
df = pd.concat(metrics)
df = df[df.index.get_level_values('metric') == 'Drug-ligand']
df.style.apply(highlight_SP_best, axis = None)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,combind_correct,glide_correct,best_correct
ligands,metric,version,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All & All,Drug-ligand,stats19,0.513691,0.435017,0.783792
All & All,Drug-ligand,stats20,0.543812,0.499814,0.647532
All & A2AR-VDR,Drug-ligand,stats19,0.561804,0.451351,0.802341
All & A2AR-VDR,Drug-ligand,stats20,0.562378,0.511417,0.67706
All & A2AR,Drug-ligand,stats19,0.568199,0.457747,0.806732
All & A2AR,Drug-ligand,stats20,0.568225,0.517071,0.683099
All & A2AR-TRPV1,Drug-ligand,stats19,0.566565,0.455928,0.805812
All & A2AR-TRPV1,Drug-ligand,stats20,0.56956,0.518268,0.684756
<0.5 & All,Drug-ligand,stats19,0.421427,0.375728,0.738392
<0.5 & All,Drug-ligand,stats20,0.451306,0.414177,0.573199
