In [1]:
import sys
sys.path.append('../')
from containers import Protein
import config
import utils

import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline



In [2]:
def make_df(version, exclude=['P19491', 'Q05586-Q12879', 'P22756']):
    paths = {'CODE': '../',
             'DATA': '/Users/jpaggi/sherlock/oak/users/jpaggi/combind',
             'PDB': '{ROOT}/structures/pdb.csv'}

    paths.update(config.PATHS)
    paths = utils.resolve(paths)

    proteins, ligands, top_rmsds, best_rmsds = [], [], [], []
    for name in utils.get_proteins(paths, exclude):
        protein = Protein(name, config.STATS[version], paths)
        _ligands = protein.lm.get_pdb()
        protein.load_docking(protein.lm.docked(_ligands))
        for ligand in _ligands:
            proteins += [name]
            ligands += [ligand]
            if protein.lm.docked([ligand]):
                top_rmsds += [protein.docking[ligand].poses[0].rmsd]
                best_rmsds += [min(pose.rmsd for pose in protein.docking[ligand].poses[:100])]
            else:
                top_rmsds += [float('inf')]
                best_rmsds += [float('inf')]

    df = pd.DataFrame(zip(proteins, ligands, top_rmsds, best_rmsds),
                      columns=['protein', 'ligand', 'top_rmsd', 'best_rmsd'])
    df = df.set_index(['protein', 'ligand']).sort_index()
    return df

In [3]:
core = make_df('rd1_core')
(core < 2.05).mean(), core.shape

(top_rmsd     0.783422
 best_rmsd    0.783422
 dtype: float64, (374, 2))

In [4]:
paper = make_df('paper')
(paper < 2.05).mean(), paper.shape

(top_rmsd     0.502674
 best_rmsd    0.788770
 dtype: float64, (374, 2))

In [5]:
es4 = make_df('rd1')
(es4 < 2.05).mean(), es4.shape

(top_rmsd     0.510695
 best_rmsd    0.764706
 dtype: float64, (374, 2))