In [1]:
import os
import sys

import numpy as np

import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import matplotlib.cm as cm
from matplotlib import gridspec

get_ipython().magic(u'matplotlib inline')

sys.path.append('../4_analyze')
from containers import Dataset
from score_query import ScoreQuery
from utils import export, show_side_by_side, load_score_file

sys.path.append('../3_score')
from optimize import Scores

sys.path.append('../1_dock')
from parse_chembl import load_chembl

In [2]:
data = '/scratch/PI/rondror/jbelk'

dock_st = {'B1AR':'2VT4','AR_final':'3B67','TRPV1':'3J5Q'} 
all_ligs = {p:sorted([l.split('.')[0] for l in os.listdir('{}/{}/unique_ligands'.format(data, p))
               if l != st+'_lig.mae']) for p,st in dock_st.items()}

glide_dir = 'glide12'
cross = True
ifp_dir = 'ifp/ifp13'

chembl_dir = '/scratch/PI/rondror/jbelk/CHEMBL'

In [3]:
d = Dataset(dock_st.keys(), data, dock_st, chembl_dir)
d.load_docking(glide_dir, ifp_dir, cross)

In [None]:
max_n_lig = 20

all_rmsd1 = {}
all_rmsd2 = {}
for n in range(max_n_lig + 1):
    all_rmsd1[n] = []
    all_rmsd2[n] = []
    for prot, st in dock_st.items():
        lig_objs = d.all_proteins[prot].docking[(glide_dir, cross)].ligands
        for l in all_ligs[prot]:
            #if l != '5IRX_lig':continue
                
            #fname1 = 'scores4/{}/5-0-{}-1.txt'.format(l, n)
            fname1 = 'scores3/{}/0-0-{}-1.txt'.format(l, n)
            fpath1 = '{}/{}/{}'.format(data, prot, fname1)

            sq1 = load_score_file(fpath1, lig_objs)
            pose_ranks1 = sorted(sq1.pose_scores.keys(),key=lambda x:-sq1.pose_scores[x])
            us_rmsd1 = sq1.all_lig[l].poses[pose_ranks1[0]].rmsd
            all_rmsd1[n].append(us_rmsd1)

            #print fname1, pose_ranks1[0]
            #print round(sq1.pose_scores[0],2), round(sq1.pose_scores[1],2)

            fname2 = 'scores5/{}/5-0-{}-1.txt'.format(l, n)
            fpath2 = '{}/{}/{}'.format(data, prot, fname2)

            sq2 = load_score_file(fpath2, lig_objs)
            pose_ranks2 = sorted(sq2.pose_scores.keys(),key=lambda x:-sq2.pose_scores[x])
            us_rmsd2 = sq2.all_lig[l].poses[pose_ranks2[0]].rmsd
            all_rmsd2[n].append(us_rmsd2)

            for i in sq1.w:
                assert sq1.w[i] == sq2.w.get(i,0), i
            #assert sq1.w == sq2.w
            assert sq1.d_const == sq2.d_const
            assert sq1.l_q == sq2.l_q

            if pose_ranks1[0] != pose_ranks2[0]:
                print prot, l, n, pose_ranks1[0], pose_ranks2[0]
                print sq1.pose_scores[pose_ranks1[0]] - sq2.pose_scores[pose_ranks2[0]]
                
plt.plot(range(max_n_lig + 1), [np.mean(all_rmsd1[n]) for n in range(max_n_lig + 1)])
plt.title(fname1)
plt.show()
plt.plot(range(max_n_lig + 1), [np.mean(all_rmsd2[n]) for n in range(max_n_lig + 1)])
plt.title(fname2)
plt.show()

In [None]:
out_dir='scores3'
all_lam = [i/10.0 for i in range(1,11)]
all_w10 = [i/100.0 for i in range(6)]
all_n = [i for i in range(21)]
all_w56 = [(0,0),(0,1),(1,0)]

#out_dir='scores5'
#all_lam = [i/100.0 for i in range(5,16)]
#all_w10 = [i/200.0 for i in range(4)]
#all_n = [i for i in range(26)]
#all_w56 = [(0,0),(0,1),(1,0)]

for x in enumerate(all_lam):
    #if x[0] != 5: continue
    for y in enumerate(all_w10):
        if y[0] != 1: continue
        for m in enumerate(all_w56):
            if m[0] != 0: continue

            all_rmsd = {}
            all_rmsd2 = {}
            for z in enumerate(all_n):
                all_rmsd[z[0]] = []
                all_rmsd2[z[0]] = []
                f_name = '{}-{}-{}-{}.txt'.format(x[0], y[0], z[0], m[0])
                for prot, st in dock_st.items():
                    lig_objs = d.all_proteins[prot].docking[(glide_dir, cross)].ligands
                    for l in all_ligs[prot]:
                        f_path = '{}/{}/scores5/{}/{}'.format(data, prot, l, f_name)
                        sq = load_score_file(f_path, lig_objs)
                        pose_ranks = sorted(sq.pose_scores.keys(),key=lambda x:-sq.pose_scores[x])
                        us_rmsd = sq.all_lig[l].poses[pose_ranks[0]].rmsd
                        all_rmsd[z[0]].append(us_rmsd)
                        
                        #f_path2 = '{}/{}/scores4/{}/{}'.format(data, prot, l, f_name)
                        #sq2 = load_score_file(f_path2, lig_objs)
                        #pose_ranks2 = sorted(sq2.pose_scores.keys(),key=lambda x:-sq2.pose_scores[x])
                        #us_rmsd2 = sq2.all_lig[l].poses[pose_ranks2[0]].rmsd
                        #all_rmsd2[z[0]].append(us_rmsd2)
                        
                        #if sq.pose_scores[pose_ranks[0]] != sq2.pose_scores[pose_ranks2[0]]:
                        #    print prot, l, z[0], pose_ranks[0], pose_ranks2[0]
                        #    print sq.pose_scores[pose_ranks[0]] - sq2.pose_scores[pose_ranks2[0]]
                        #    assert sq.objective(sq2.pose_neighbors[pose_ranks2[0]]) == sq2.pose_scores[pose_ranks2[0]]
                        
            plt.plot(all_n, [np.mean(all_rmsd[n]) for n in all_n])
            plt.title('scores5 {}'.format(f_name))
            plt.show()
            #plt.plot(all_n, [np.mean(all_rmsd2[n]) for n in all_n])
            #plt.title('scores4 {}'.format(f_name))
            #plt.show()