## Library file
This file is not meant to be run directly. It stores many common functions and is imported from the remaining notebooks.<br/>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os,sys

In [58]:
class DOCKOutput:
    def __init__(self,mol2file,scoredict=None, skip_fileload=False):
        if scoredict is None:
            self.filename=mol2file
            self.score_dict=dict()
            if not skip_fileload: self._loadFile()
        else:
            self.filename=None
            self.names=mol2file
            if type(scoredict)==dict:
                self.score_dict=scoredict
            else:
                self.score_dict=dict()
                for i,n in enumerate(self.names):
                    self.score_dict[n]=scoredict[i]
            
    def _loadFile(self):
        f=open(self.filename,"rb")
        self.names=[]
        self.scores=[]
        name_counter=1
        for l in f:
            try: l=l.decode("utf-8")
            except: continue
            l=l.strip()
            if not len(l): continue
            if not l[0]=="#": continue
            if "Name" in l:
                l=[s.strip() for s in l.split(":") if len(s.strip())]
                if len(l)>1:
                    self.names.append(str(l[1].strip()))
                else:
                    self.names.append("Unnamed"+str(name_counter))
                    name_counter+=1
            if "Grid_Score" in l:
                l=[s for s in l.split() if len(s)]
                self.scores.append(float(l[2].strip()))
        print("Loaded",len(self.names),"elements and found",len(self.scores),"scores (must match)")
        for i,n in enumerate(self.names):
            self.score_dict[n]=self.scores[i]
        #del self.names
        del self.scores

    def __getitem__(self,mn):
        return self.score_dict[mn]
    def __len__(self): return len(self.score_dict)

    def apply_on_keys(self,fx):
        nd=dict()
        for i in range(len(self.names)):
            modn=fx(self.names[i])
            nd[modn]=self.score_dict[self.names[i]]
            self.names[i]=modn
        self.score_dict=nd

    def apply_on_values(self,fx):
        nd=dict()
        for k in self.score_dict:
            nd[k]=fx(self.score_dict[k])
        self.score_dict=nd

    def sort_scores(self,sort_order=None):
        if sort_order is None: self.score_dict=dict(sorted(self.score_dict.items(), key=lambda item: item[1]))
        else:
            all_keys=np.array(list(self.score_dict.keys()),dtype=str)
            all_vals=np.array(list(self.score_dict.values()),dtype=float)
            all_keys=all_keys[sort_order]
            all_vals=all_vals[sort_order]
            self.score_dict=dict()
            for ki,k in enumerate(all_keys):
                self.score_dict[k]=all_vals[ki]
            self.names=all_keys
            self.scores=all_vals
            
        return self.score_dict

    def shuffle(self):
        self.score_dict=dict(sorted(self.score_dict.items(), key=lambda item: np.random.uniform(0,1).item()))
        return self.score_dict

    def get_score_vec(self):
        return np.array(list(self.score_dict.values()),dtype=float)
        
    def get_score_mean(self):
        vals=list(self.score_dict.values())
        return np.nanmean(vals).item()
        
    def fix_score_mean(self,mean_value=0):
        sft=self.get_score_mean()-mean_value
        for k in self.score_dict:
            self.score_dict[k]-=sft
        return self

    def generateFromData(names,scores,use_filename_label=None,duplicates="fail"):
        ret=DOCKOutput(use_filename_label,None,skip_fileload=True)
        for ni,n in enumerate(names):
            if n in ret.score_dict:
                if duplicates == "lowest":
                    ret.score_dict[n]=min(ret.score_dict[n],scores[ni])
                elif duplicates == "highest":
                    ret.score_dict[n]=max(ret.score_dict[n],scores[ni])
                else:
                    raise ValueError("Duplicate name "+str(n)+" encountered in input")
            else:
                ret.score_dict[n]=scores[ni]
        ret.names=names
        ret.scores=scores
        return ret

In [59]:
def combine_DOCKOutputs(outputs,aggregate=np.nanmean,missing=np.nan):
    cout=dict()
    all_keys=[]
    for do in outputs:
        for k in do.names:
            if k in all_keys: continue
            else: all_keys.append(k)
    for k in all_keys:
        cout[k]=[]
    for do in outputs:
        for k in all_keys:
            if k in do.names: cout[k].append(do[k])
            else: cout[k].append(missing)

    if aggregate is not None:
        for k in all_keys:
            cout[k]=aggregate(np.array(cout[k],dtype=float))
    return DOCKOutput(all_keys,cout)

In [None]:
def splice_DOCKOutputs(outputs,size_lim=-1,write_indices_instead_of_scores=False,aggregation=np.nanmean):
    if size_lim>0:
        raw_keys=[list(dout.score_dict.keys())[:size_lim] for dout in outputs]
    else:
        raw_keys=[list(dout.score_dict.keys()) for dout in outputs]
    key_counts=[len(kl) for kl in raw_keys]

    found_keys=[]
    ret=dict()
    idx=0
    while True:
        tret=dict()
        for i,dout in enumerate(outputs):
            if idx>=len(dout): continue
            try:
                ckey=raw_keys[i][idx]
            except:
                print("Index",idx,"of output",i,"failed")
                print("Dock-out size:",len(dout))
                raise ValueError()
            if ckey in found_keys: continue

            kv=float(len(found_keys)) if write_indices_instead_of_scores else dout[ckey]
            if ckey in tret: tret[ckey].append(kv)
            else: tret[ckey]=[kv]
        
        for tk in tret:
            tret[tk]=aggregation(tret[tk])
            if tk not in found_keys: found_keys.append(tk)
        
        tret=dict(sorted(tret.items(), key=lambda item: item[1]))
        for k in tret: ret[k]=tret[k]
            
        if size_lim>0 and len(ret)>size_lim: break
        idx+=1
        if idx>=np.max(key_counts): break

    return DOCKOutput(found_keys,ret)