In [21]:
import pandas as pd
import numpy as np
import os

from MOA_L1000 import validation, config
import MOA_L1000.dataprep as dataprep
import MOA_L1000.dataload as dataload
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [60]:

def load_prediction(config_id=1):
    """Load params for the run, load ensemble solution and holdout metrics"""
    fcn_config = eval("config.get_params_{}".format(config_id))
    params = fcn_config()
    directory = params.get("output_directory")
    dict_solutions = {'df_ensemble':None, 'df_ensemble_mean':None, 'df_ensemble_median':None}
    
    filename = os.path.join(directory, "df_solution_ensemble.h5")
    if os.path.exists(filename):
        df_ensemble = pd.read_hdf(filename, 'root').astype('float64')
        dict_solutions['df_ensemble'] = df_ensemble
    filename = os.path.join(directory, "df_solution_ensemble_mean.h5")
    if os.path.exists(filename):
        df_ensemble = pd.read_hdf(filename, 'root').astype('float64')
        dict_solutions['df_ensemble_mean'] = df_ensemble
    filename = os.path.join(directory, "df_solution_ensemble_median.h5")
    if os.path.exists(filename):
        df_ensemble = pd.read_hdf(filename, 'root').astype('float64')
        dict_solutions['df_ensemble_median'] = df_ensemble
        
    filename = os.path.join(directory, "holdout_metrics.csv")
    df_metrics = pd.read_csv(filename, sep="\t", index_col=0)
    return dict_solutions, df_metrics, params


def compute_precision_and_recall(df_solution, df_ground=None):
    """Load solution matrix. Load params for our model. Load
    ensemble solution for our model. Load holdout metrics for our model. """
    assert df_ground is not None
    df_g = df_ground
    df_s = df_solution.loc[df_g.index, :]
    precisions = validation.compute_precision(df_s, df_g)
    recalls = validation.compute_recall(df_s, df_g)
    return precisions, recalls


def load_all_predictions_and_metrics(config_ids = []):
    """ For all solutions, load the ensemble solution, 
     and the holdout metrics"""
    solutions = {}
    metrics = {}
    for config_id in config_ids:
        dict_solutions, df_metrics, params = load_prediction(config_id)
        solutions[config_id] = dict_solutions
        metrics[config_id] = df_metrics
    
    df_metrics = pd.DataFrame(
        {config_id:d['holdout_score'] for config_id, d in metrics.items()}
        )
    return solutions, df_metrics

In [76]:
config_ids = ['DNN1', "DNN2", "DNN3", "DNN4", "DNN5", "DNN6", "DNN7", "DNN8", "DNN9", "DNN10", "DNN11", "DNN12", "DNN13", "DNN14", "DNN15", "DNN17"]
predictions, df_metrics = load_all_predictions_and_metrics(config_ids)
df_metrics

Unnamed: 0,DNN1,DNN2,DNN3,DNN4,DNN5,DNN6,DNN7,DNN8,DNN9,DNN10,DNN11,DNN12,DNN13,DNN14,DNN15,DNN17
0_0,0.05419,0.05138,0.047,0.04875,0.05168,0.04791,0.04737,0.04737,0.05253,0.05565,0.05407,0.05704,0.05017,0.0503,0.04725,0.04559
0_1,0.05362,0.05431,0.04883,0.04779,0.05188,0.0476,0.04857,0.04693,0.0522,0.05718,0.05807,0.05932,0.05277,0.05157,0.04671,0.04629
0_2,0.05524,0.05703,0.04719,0.04905,0.05018,0.04853,0.04735,0.04707,0.05496,0.05593,0.05611,0.05926,0.05373,0.05347,0.04588,0.04624
0_3,0.05382,0.05237,0.04845,0.04695,0.05395,0.04773,0.04839,0.04721,0.0544,0.05552,0.05444,0.05828,0.05228,0.05075,0.04538,0.04481
0_4,0.05772,0.05282,0.04834,0.04952,0.05258,0.0477,0.04821,0.04798,0.05342,0.05659,0.05691,0.05918,0.0528,0.0521,0.04756,0.05167
1_0,0.05342,0.0559,0.0457,0.0485,0.05531,0.04816,0.04835,0.0471,0.05415,0.05612,0.05799,0.05654,0.05126,0.05152,0.04486,0.04696
1_1,0.05651,0.05424,0.04562,0.04916,0.05109,0.04759,0.04805,0.0459,0.05178,0.05755,0.05786,0.05939,0.05214,0.05316,0.04541,0.04672
1_2,0.05306,0.05203,0.0463,0.04851,0.05099,0.0481,0.04845,0.04749,0.05402,0.05604,0.05372,0.06003,0.0531,0.05182,0.04609,0.04643
1_3,0.0594,0.05131,0.04625,0.04772,0.0528,0.04751,0.04935,0.04808,0.05581,0.05571,0.05588,0.05792,0.05282,0.05031,0.04621,0.04617
1_4,0.04976,0.05479,0.04901,0.04934,0.05033,0.04847,0.04681,0.04691,0.05356,0.05936,0.05044,0.05782,0.05058,0.0511,0.04567,0.04618


In [78]:
X_train, X_holdout, w_train, y_train, y_holdout = dataload.load_data_v1()
dict_solutions, df_metrics, params = load_prediction('DNN17')
df_preds = dict_solutions['df_ensemble_median']
precisions, recalls = compute_precision_and_recall(df_preds, y_holdout)
metrics = pd.merge(precisions, recalls, on="moa")


In [82]:
metrics.tail(30).sort_values('precision')
metrics.tail(30).sort_values('recall')

Unnamed: 0_level_0,precision,recall
moa,Unnamed: 1_level_1,Unnamed: 2_level_1
ACETYLCHOLINE RECEPTOR AGONIST,0.0,0.0
CHELATING AGENT,0.0,0.0
HMGCR INHIBITOR,0.0,0.0
ANDROGEN RECEPTOR ANTAGONIST,0.25,0.111111
DNA INHIBITOR,0.272727,0.142857
DOPAMINE RECEPTOR ANTAGONIST,0.038835,0.166667
KIT INHIBITOR,0.333333,0.166667
SRC INHIBITOR,1.0,0.166667
CYCLOOXYGENASE INHIBITOR,0.064516,0.222222
SEROTONIN RECEPTOR ANTAGONIST,0.066116,0.296296


In [45]:
df_preds

Unnamed: 0_level_0,Unnamed: 1_level_0,CCK RECEPTOR ANTAGONIST,PLK INHIBITOR,TRICYCLIC ANTIDEPRESSANT,CHOLESTEROL INHIBITOR,ATP CHANNEL BLOCKER,THROMBOXANE RECEPTOR ANTAGONIST,SRC INHIBITOR,AROMATASE INHIBITOR,OPIOID RECEPTOR ANTAGONIST,HISTAMINE RECEPTOR AGONIST,...,VEGFR INHIBITOR,ADRENERGIC RECEPTOR AGONIST,CALCIUM CHANNEL BLOCKER,HISTAMINE RECEPTOR ANTAGONIST,DNA INHIBITOR,PHOSPHODIESTERASE INHIBITOR,CYCLOOXYGENASE INHIBITOR,ADRENERGIC RECEPTOR ANTAGONIST,DOPAMINE RECEPTOR ANTAGONIST,SEROTONIN RECEPTOR ANTAGONIST
pert_id,pert_idose,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
BRD-A02006392,1.11 uM,0.001596,0.000692,0.001914,0.001720,0.001825,0.004749,0.002169,0.002727,0.002826,0.003465,...,0.005987,0.019938,0.017722,0.023576,0.006306,0.017709,0.040408,0.022174,0.012167,0.026356
BRD-A02006392,10 uM,0.003545,0.000993,0.003025,0.001784,0.004690,0.004297,0.001726,0.002954,0.002501,0.004214,...,0.004604,0.014933,0.015883,0.012560,0.004654,0.033108,0.042938,0.025742,0.015902,0.026101
BRD-A02006392,3.33 uM,0.000999,0.000293,0.002145,0.000929,0.001314,0.004420,0.000632,0.001586,0.002727,0.000907,...,0.003605,0.013139,0.015617,0.010116,0.005312,0.013110,0.022189,0.014730,0.016537,0.027401
BRD-A03216249,1.11 uM,0.139555,0.003569,0.003035,0.003739,0.002245,0.004004,0.001535,0.001603,0.001525,0.002861,...,0.027325,0.002877,0.001283,0.059584,0.017334,0.005838,0.028584,0.001684,0.001623,0.004022
BRD-A03216249,10 uM,0.012110,0.003135,0.006917,0.007079,0.007220,0.014371,0.001967,0.004164,0.004817,0.002425,...,0.019285,0.022003,0.013609,0.031090,0.006947,0.026261,0.027289,0.026535,0.032939,0.075620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BRD-K99792991,10 uM,0.000091,0.001190,0.000118,0.000009,0.000054,0.000035,0.000363,0.000085,0.000273,0.000076,...,0.002050,0.000023,0.008067,0.000038,0.027971,0.000092,0.000067,0.000024,0.000521,0.000192
BRD-K99792991,3.33 uM,0.000990,0.003885,0.002636,0.000280,0.000478,0.001189,0.003644,0.001259,0.002673,0.001023,...,0.010887,0.000800,0.010498,0.002096,0.131270,0.001230,0.000595,0.000306,0.004781,0.001964
BRD-K99964838,1.11 uM,0.001509,0.002688,0.007831,0.002437,0.002441,0.003377,0.003778,0.007829,0.006003,0.001974,...,0.033006,0.010829,0.024114,0.009786,0.006892,0.004830,0.012690,0.019484,0.070567,0.019361
BRD-K99964838,10 uM,0.000374,0.003213,0.004124,0.000082,0.000355,0.000174,0.016739,0.000502,0.000704,0.000650,...,0.162438,0.000648,0.019062,0.000866,0.001965,0.000170,0.000252,0.001867,0.009850,0.003257
