## Library file
This file is not meant to be run directly. It stores many common functions and is imported from the remaining notebooks.<br/>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from rdkit.ML.Scoring import Scoring
%run DOCK_OutputParser.ipynb

In [None]:
def positive_detector(objects):
    return (["oleObject" in s for s in objects])

In [None]:
def return_analysis_of(all_tdos,N_GOOD,N_TOT,N_MAX=None,bitmatch_fx=positive_detector): # Always N_MAX >= N_TOT
    if N_MAX is None: N_MAX=N_GOOD
    cdo=combine_DOCKOutputs(all_tdos,aggregate=np.nanmean) # nanmin should be used, but for benchmarking it is unreliable.
    cdo.sort_scores()
    
    no_top=np.arange(1,N_MAX+1)
    perfect_er=list(range(1,N_GOOD+1))+[N_GOOD]*(len(no_top)-N_GOOD)
    perfect_er=(np.array(perfect_er,dtype=float)/no_top)
    random_er=np.ones(N_TOT)*(N_GOOD/N_TOT)
    random_er=random_er[:len(no_top)]
    
    agg_err=[]
    for ti,tdo in enumerate(all_tdos+[cdo]):
        tdo.sort_scores()
        objects=list(tdo.score_dict.keys())
        bitmatch=bitmatch_fx(objects)
        no_er=[]
        for n in range(1,N_MAX+1):
            enrichment=np.sum(bitmatch[:n])/n
            no_er.append(enrichment)
        no_er=np.array(no_er)
        agg_err.append(no_er)
        #plt.plot(no_top,no_er*no_top,label="Raw" if ti==0 else None,c="grey",linewidth=2)
    raw_err=np.stack(agg_err)
    
    comb_err=agg_err[-1]
    agg_err=np.mean(raw_err[:-1],axis=0)
    return cdo,no_top,comb_err,raw_err[:-1],agg_err,random_er

In [None]:
def plot_all(no_top,raw_err,agg_err,comb_err,random_er,N_GOOD,N_CHECK=None):
    if N_CHECK is None: N_CHECK=int(0.02*N_GOOD)+1
        
    plt.figure(figsize=(10,8))
    plt.title("DOCK results (MD Structures)",fontsize=28)
    plt.plot(no_top,random_er*no_top,label="Random",linestyle="--",c="purple")
    for ti in range(len(raw_err)):
        no_er=raw_err[ti]
        plt.plot(no_top,no_er*no_top,label="Raw" if ti==0 else None,c="grey",linewidth=2)
    
    plt.plot(no_top,agg_err*no_top,label="DOCK (mean)",c="black",linewidth=2)
    plt.plot(no_top,comb_err*no_top,label="DOCK (agg)",c="blue",linewidth=2)
    plt.legend(fontsize=18)
    plt.xlabel("No. docked",fontsize=21)
    plt.ylabel("No of good ligands",fontsize=21)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.xlim(0,500)
    plt.ylim(0,N_GOOD+1)
    plt.axvline(x=N_CHECK,c="red",linestyle="--")
    plt.show()