In [2]:
import pandas as pd
import numpy as np
import os

from MOA_L1000 import validation, config
import MOA_L1000.dataprep as dataprep
import MOA_L1000.dataload as dataload
%load_ext autoreload
%autoreload 2

X_train, X_holdout, w_train, y_train, y_holdout = dataload.load_data_v1()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:

def load_prediction(config_id=1):
    """Load params for the run, load ensemble solution and holdout metrics"""
    fcn_config = eval("config.get_params_{}".format(config_id))
    params = fcn_config()
    directory = params.get("output_directory")
    dict_solutions = {'df_ensemble':None, 'df_ensemble_mean':None, 'df_ensemble_median':None}
    
    filename = os.path.join(directory, "df_solution_ensemble.h5")
    if os.path.exists(filename):
        df_ensemble = pd.read_hdf(filename, 'root').astype('float64')
        dict_solutions['df_ensemble'] = df_ensemble
    filename = os.path.join(directory, "df_solution_ensemble_mean.h5")
    if os.path.exists(filename):
        df_ensemble = pd.read_hdf(filename, 'root').astype('float64')
        dict_solutions['df_ensemble_mean'] = df_ensemble
    filename = os.path.join(directory, "df_solution_ensemble_median.h5")
    if os.path.exists(filename):
        df_ensemble = pd.read_hdf(filename, 'root').astype('float64')
        dict_solutions['df_ensemble_median'] = df_ensemble
        
    filename = os.path.join(directory, "holdout_metrics.csv")
    df_metrics = pd.read_csv(filename, sep="\t", index_col=0)
    return dict_solutions, df_metrics, params


def compute_precision_and_recall(df_solution, df_ground=None):
    """Load solution matrix. Load params for our model. Load
    ensemble solution for our model. Load holdout metrics for our model. """
    assert df_ground is not None
    df_g = df_ground
    df_s = df_solution.loc[df_g.index, :]
    precisions = validation.compute_precision(df_s, df_g)
    recalls = validation.compute_recall(df_s, df_g)
    return precisions, recalls


def load_all_predictions_and_metrics(config_ids = []):
    """ For all solutions, load the ensemble solution, 
     and the holdout metrics"""
    solutions = {}
    metrics = {}
    for config_id in config_ids:
        dict_solutions, df_metrics, params = load_prediction(config_id)
        solutions[config_id] = dict_solutions
        metrics[config_id] = df_metrics
    
    df_metrics = pd.DataFrame(
        {config_id:d['holdout_score'] for config_id, d in metrics.items()}
        )
    return solutions, df_metrics

In [7]:
config_ids = ['DNN1', "DNN2", "DNN3", "DNN4", "DNN5", "DNN6", "DNN7", "DNN8", "DNN9", "DNN10", "DNN11", "DNN12", "DNN13", "DNN14"]
predictions, df_metrics = load_all_predictions_and_metrics(config_ids)
df_metrics

Unnamed: 0,DNN1,DNN2,DNN3,DNN4,DNN5,DNN6,DNN7,DNN8,DNN9,DNN10,DNN11,DNN12,DNN13,DNN14
0_0,0.05419,0.05138,0.047,0.04875,0.05168,0.04791,0.04737,0.0463,0.05253,0.05565,0.05407,0.05704,0.05017,0.0503
0_1,0.05362,0.05431,0.04883,0.04779,0.05188,0.0476,0.04857,0.04699,0.0522,0.05718,0.05807,0.05932,0.05277,0.05157
0_2,0.05524,0.05703,0.04719,0.04905,0.05018,0.04853,0.04735,0.04687,0.05496,0.05593,0.05611,0.05926,0.05373,0.05347
0_3,0.05382,0.05237,0.04845,0.04695,0.05395,0.04773,0.04839,0.0459,0.0544,0.05552,0.05444,0.05828,0.05228,0.05075
0_4,0.05772,0.05282,0.04834,0.04952,0.05258,0.0477,0.04821,0.0471,0.05342,0.05659,0.05691,0.05918,0.0528,0.0521
1_0,0.05342,0.0559,0.0457,0.0485,0.05531,0.04816,0.04835,0.0475,0.05415,0.05612,0.05799,0.05654,0.05126,0.05152
1_1,0.05651,0.05424,0.04562,0.04916,0.05109,0.04759,0.04805,0.04488,0.05178,0.05755,0.05786,0.05939,0.05214,0.05316
1_2,0.05306,0.05203,0.0463,0.04851,0.05099,0.0481,0.04845,0.04696,0.05402,0.05604,0.05372,0.06003,0.0531,0.05182
1_3,0.0594,0.05131,0.04625,0.04772,0.0528,0.04751,0.04935,0.04656,0.05581,0.05571,0.05588,0.05792,0.05282,0.05031
1_4,0.04976,0.05479,0.04901,0.04934,0.05033,0.04847,0.04681,0.04581,0.05356,0.05936,0.05044,0.05782,0.05058,0.0511


In [8]:
dict_solutions, df_metrics, params = load_prediction('DNN2')
df_preds = dict_solutions['df_ensemble_median']
precisions, recalls = compute_precision_and_recall(df_preds, y_holdout)


In [9]:
precisions.tail(30).reset_index()

Unnamed: 0,moa,is_top
0,ACETYLCHOLINE RECEPTOR ANTAGONIST,0.1
1,DNA INHIBITOR,0.1
2,SODIUM CHANNEL INHIBITOR,0.111111
3,ANDROGEN RECEPTOR ANTAGONIST,0.117647
4,PDGFR INHIBITOR,0.125
5,PHOSPHODIESTERASE INHIBITOR,0.15
6,STEROL DEMETHYLASE INHIBITOR,0.2
7,PROSTANOID RECEPTOR ANTAGONIST,0.25
8,EGFR INHIBITOR,0.259259
9,PPAR RECEPTOR AGONIST,0.3


In [10]:
recalls.tail(30).reset_index()

Unnamed: 0,moa,0
0,PROSTANOID RECEPTOR ANTAGONIST,0.166667
1,SEROTONIN RECEPTOR ANTAGONIST,0.185185
2,ANDROGEN RECEPTOR ANTAGONIST,0.222222
3,ADRENERGIC RECEPTOR ANTAGONIST,0.222222
4,PHOSPHODIESTERASE INHIBITOR,0.25
5,DOPAMINE RECEPTOR ANTAGONIST,0.25
6,ACETYLCHOLINE RECEPTOR ANTAGONIST,0.333333
7,STEROL DEMETHYLASE INHIBITOR,0.333333
8,P38 MAPK INHIBITOR,0.333333
9,VOLTAGE-GATED SODIUM CHANNEL BLOCKER,0.333333


In [11]:
df_preds

Unnamed: 0_level_0,Unnamed: 1_level_0,CCK RECEPTOR ANTAGONIST,PLK INHIBITOR,TRICYCLIC ANTIDEPRESSANT,CHOLESTEROL INHIBITOR,ATP CHANNEL BLOCKER,THROMBOXANE RECEPTOR ANTAGONIST,SRC INHIBITOR,AROMATASE INHIBITOR,OPIOID RECEPTOR ANTAGONIST,HISTAMINE RECEPTOR AGONIST,...,VEGFR INHIBITOR,ADRENERGIC RECEPTOR AGONIST,CALCIUM CHANNEL BLOCKER,HISTAMINE RECEPTOR ANTAGONIST,DNA INHIBITOR,PHOSPHODIESTERASE INHIBITOR,CYCLOOXYGENASE INHIBITOR,ADRENERGIC RECEPTOR ANTAGONIST,DOPAMINE RECEPTOR ANTAGONIST,SEROTONIN RECEPTOR ANTAGONIST
pert_id,pert_idose,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
BRD-A02006392,1.11 uM,0.000714,0.000108,0.001536,0.000296,0.000647,0.000938,0.001012,0.000916,0.000595,0.005868,...,0.000529,0.003026,0.002363,0.015013,0.000585,0.001628,0.004018,0.004784,0.002574,0.004189
BRD-A02006392,10 uM,0.001409,0.000270,0.001659,0.000672,0.000808,0.000604,0.001499,0.000934,0.002014,0.000557,...,0.000329,0.001607,0.006545,0.002066,0.000577,0.003377,0.002896,0.004531,0.002315,0.019060
BRD-A02006392,3.33 uM,0.000106,0.000245,0.002924,0.000777,0.000566,0.001004,0.000321,0.001635,0.001821,0.000635,...,0.000200,0.002788,0.007691,0.009988,0.000572,0.001814,0.005681,0.003481,0.002143,0.003255
BRD-A03216249,1.11 uM,0.047864,0.000534,0.001447,0.000926,0.001212,0.000769,0.000254,0.000538,0.001535,0.001550,...,0.070365,0.000591,0.000927,0.036426,0.002454,0.002315,0.006820,0.000299,0.003235,0.001449
BRD-A03216249,10 uM,0.020773,0.000321,0.008220,0.008295,0.002569,0.006444,0.000408,0.000473,0.002141,0.001191,...,0.010221,0.007593,0.006522,0.006340,0.002450,0.004783,0.002179,0.003130,0.009613,0.004014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BRD-K99792991,10 uM,0.000362,0.000645,0.000171,0.000075,0.000303,0.000130,0.000602,0.000208,0.001317,0.001464,...,0.003247,0.000647,0.020170,0.000876,0.153500,0.000699,0.000602,0.000109,0.001207,0.000109
BRD-K99792991,3.33 uM,0.000328,0.000186,0.001147,0.000191,0.000148,0.001076,0.003387,0.000219,0.000633,0.002372,...,0.003045,0.001495,0.009438,0.003783,0.180467,0.000901,0.000341,0.000084,0.002237,0.000369
BRD-K99964838,1.11 uM,0.002361,0.000333,0.005652,0.000267,0.000280,0.001877,0.011123,0.004042,0.001743,0.000643,...,0.058664,0.001186,0.004844,0.008521,0.000720,0.001886,0.001410,0.001302,0.017114,0.001335
BRD-K99964838,10 uM,0.000246,0.001776,0.004728,0.000042,0.000061,0.000184,0.045817,0.000136,0.000744,0.000375,...,0.204343,0.000108,0.006027,0.000347,0.001360,0.000082,0.000076,0.000970,0.002347,0.000676
