In [1]:
%matplotlib inline
import os
import sys
import glob
import logging
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import common.metrics as metrics
import numpy as np
import pandas as pd
import cPickle as pkl
import common.data_converter as dc
from common.plots import UnknownsDistributionPlot, RSquaredPlot
import lasagne_nn.output_loader as ol

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

ERROR (theano.gpuarray): Could not initialize pygpu, support disabled
Traceback (most recent call last):
  File "/srv/home/ecaceres/anaconda2/envs/features/lib/python2.7/site-packages/theano/gpuarray/__init__.py", line 227, in <module>
    use(config.device)
  File "/srv/home/ecaceres/anaconda2/envs/features/lib/python2.7/site-packages/theano/gpuarray/__init__.py", line 214, in use
    init_dev(device, preallocate=preallocate)
  File "/srv/home/ecaceres/anaconda2/envs/features/lib/python2.7/site-packages/theano/gpuarray/__init__.py", line 99, in init_dev
    **args)
  File "pygpu/gpuarray.pyx", line 658, in pygpu.gpuarray.init
  File "pygpu/gpuarray.pyx", line 587, in pygpu.gpuarray.pygpu_init
GpuArrayException: Could not load "libcuda.so": libcuda.so: cannot open shared object file: No such file or directory


In [2]:
# data handling

def get_env_var(handle):
    ''' Get an environment variable given the handle for the bash variable'''
    tmp = os.getenv(handle)
    if not tmp:
        raise LookupError("Environment variable: {} not set.".format(handle))
    return tmp.strip("'")

def get_preds(dset_dict, expt, dset, fold, ratio=None):
    indir = data_dict[expt]["indir"]
    if ratio is not None:
        knowns_file = data_dict[expt]["knowns_fmt"].format(indir, ratio, dset, fold)
        preds_file = data_dict[expt]["preds_fmt"].format(indir, ratio, dset, fold)
    else:
        knowns_file = data_dict[expt]["knowns_fmt"].format(indir, dset, fold)
        preds_file = data_dict[expt]["preds_fmt"].format(indir, dset, fold)
        
    prediction = np.load(preds_file)
    truth = np.load(knowns_file)
    
    if dset_dict[expt]["npz_fmt"] == True:
        prediction = prediction["arr_0"]
        truth = truth["arr_0"]
    return prediction, truth

In [3]:
def plot_rsquared(prediction, known_value, title="R Squared", img_filename="rsquared_{}.png", result_name='r2',
                  result_suffix='', color="blue", cmap_color="BuPu", fontsize=24, alpha=0.3):
    """
    Plot the r-squared between predicted and known values.
    Args:
        prediction (np.ndarray):
            value to be plotted on y-axis
        known_value (np.ndarray):
            value to be plotted on x-axis
        title (str):
            title of the plot
        img_filename (str):
            format to save as
        result_name (str):
            passed to compute_rsquared
    """
    figsize = (10, 10)
    sns.set(font_scale=4)
    sns.set_style("darkgrid")
    sns.set_context("poster")
    fig = plt.figure(figsize=figsize)

    t = known_value.ravel()
    nonzeros = ~np.isnan(t)
    nonzeros[nonzeros] &= t[nonzeros] > 0.0
    p = prediction.ravel()[nonzeros]
    t = t[nonzeros]
    output_dir = os.path.dirname(img_filename) if img_filename is not None else None
    logging.debug('plotting r^2')
    maxv = t.max() + 1
    g = sns.jointplot(x=p, y=t, alpha=alpha, xlim=(0, 14.5), ylim=(0, 14.5), stat_func=None, 
                      height=9, color=color, s=15, lw=0)
    rsquare = metrics.compute_rsquared(p, t, output_dir=output_dir, result_name=result_name,
                                       result_suffix=result_suffix)
    g = g.annotate((lambda x,y: rsquare), template="{stat}: {val:.4f}",
                   stat="$R^2$", loc="lower right", fontsize=fontsize)
    
    g.set_axis_labels("Predicted pAC50", "Known pAC50", fontsize=fontsize)
    g.ax_joint.tick_params(labelsize=fontsize)
    cmap = plt.cm.get_cmap(cmap_color)
    
    new_color_list = cmap(np.logspace(0.75, 1, 100))
    new_color_list[:, 3] = 0.95
    new_color_list[:-20, 3] = np.linspace(0.05, 0.9, 80)
    
    new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n='Al'+ cmap.name, a=0, b=100),
        new_color_list)
    
    g.ax_joint.hexbin(g.x, g.y, mincnt=2, cmap=new_cmap)
    g.fig.tight_layout()

    return g

In [4]:
def make_space_above(g, topmargin=1):
    """ increase figure size to make topmargin (in inches) space for 
        titles, without changing the axes sizes"""
    s = g.fig.subplotpars
    w, h = g.fig.get_size_inches()

    figh = h - (1-s.top)*h  + topmargin
    g.fig.subplots_adjust(bottom=s.bottom*h/figh, top=1-topmargin/figh)
    g.fig.set_figheight(figh)

In [5]:
reg_data_file = "./regression_preds_file_lookup.pkl"
with open(reg_data_file, "rb") as f:
    data_dict = pkl.load(f)

In [6]:
data_dict.keys()

['STD',
 'NEG_RM_SMA',
 'NEG_RM_scrambled',
 'scrambled_idx_LC',
 'NEG_RM_RATIOS_scrambled',
 'scrambled_idx',
 'NEG_UW',
 'STD_SMA_RATIOS',
 'NEG_RM',
 'NEG_RM_RATIOS',
 'SEA_SMA',
 'scrambled_idx_no_SMA',
 'STD_SMA',
 'scrambled_idx_no_SMA_LC']

In [7]:
with open("./color_dict_pt.pkl", "rb") as f: 
    color_dict_pt = pkl.load(f)
with open("./color_dict_bar.pkl", "rb") as f: 
    color_dict_bar = pkl.load(f)

In [8]:
save_dir = "{}/final_paper_figs".format(get_env_var("HOME"))

In [9]:
!mkdir -p $save_dir

In [10]:
title_fmter = "{}\n({}; Fold {})"
fontsize=28
title_fontsize=33

In [11]:
for fold in np.arange(0, 5, 1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="STD"
        expt_rename="STD"
        ratio=None
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction[~nan_mask], truth[~nan_mask], color="red", cmap_color="gist_heat_r", 
                          img_filename=None, fontsize=fontsize )
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1) 
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))



n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f826c840910>

<matplotlib.figure.Figure at 0x7f826cf4e310>

<matplotlib.figure.Figure at 0x7f826cf4e5d0>

<matplotlib.figure.Figure at 0x7f8269e3a810>

<matplotlib.figure.Figure at 0x7f826c840990>

<matplotlib.figure.Figure at 0x7f8269d37090>

<matplotlib.figure.Figure at 0x7f8269e3a590>

<matplotlib.figure.Figure at 0x7f826a352550>

<matplotlib.figure.Figure at 0x7f8269dc4f50>

<matplotlib.figure.Figure at 0x7f8243bc2250>

<matplotlib.figure.Figure at 0x7f8243bc2310>

<matplotlib.figure.Figure at 0x7f8269b88050>

<matplotlib.figure.Figure at 0x7f8269b209d0>

<matplotlib.figure.Figure at 0x7f826ce1b8d0>

<matplotlib.figure.Figure at 0x7f8269b70c50>

<matplotlib.figure.Figure at 0x7f8269d37b90>

<matplotlib.figure.Figure at 0x7f8269ba8f10>

<matplotlib.figure.Figure at 0x7f826a4835d0>

<matplotlib.figure.Figure at 0x7f8269ba8fd0>

<matplotlib.figure.Figure at 0x7f826ce23050>

<matplotlib.figure.Figure at 0x7f8269f5b390>

<matplotlib.figure.Figure at 0x7f8269bad510>

<matplotlib.figure.Figure at 0x7f8269bad690>

<matplotlib.figure.Figure at 0x7f8269eece10>

<matplotlib.figure.Figure at 0x7f8269b4ac90>

<matplotlib.figure.Figure at 0x7f8269e54c10>

<matplotlib.figure.Figure at 0x7f8243ba0f90>

<matplotlib.figure.Figure at 0x7f8269bdd250>

<matplotlib.figure.Figure at 0x7f826a417610>

<matplotlib.figure.Figure at 0x7f8269bcb290>

<matplotlib.figure.Figure at 0x7f8243ab7290>

<matplotlib.figure.Figure at 0x7f8269c99a50>

<matplotlib.figure.Figure at 0x7f82698aba90>

<matplotlib.figure.Figure at 0x7f826a40a910>

<matplotlib.figure.Figure at 0x7f82698ab850>

<matplotlib.figure.Figure at 0x7f8269dec750>

<matplotlib.figure.Figure at 0x7f82698abad0>

<matplotlib.figure.Figure at 0x7f8243b23ed0>

<matplotlib.figure.Figure at 0x7f8243ab7e50>

<matplotlib.figure.Figure at 0x7f8269a94e90>

In [12]:
# for fold in np.arange(0,5, 1):
#     for dset in ["test"]:
for fold in np.arange(0, 5, 1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="STD_SMA"
        expt_rename = "SNA"
        ratio=None
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction, truth, color="blue", cmap_color="BuPu", 
                          img_filename=None, fontsize=fontsize)
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1)   
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt_rename, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))




n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f8243aa9550>

<matplotlib.figure.Figure at 0x7f8243aa9250>

<matplotlib.figure.Figure at 0x7f8243aa9d90>

<matplotlib.figure.Figure at 0x7f8269b8f710>

<matplotlib.figure.Figure at 0x7f826ce0d150>

<matplotlib.figure.Figure at 0x7f8269df3a10>

<matplotlib.figure.Figure at 0x7f826ce0d9d0>

<matplotlib.figure.Figure at 0x7f8269badc10>

<matplotlib.figure.Figure at 0x7f82698adad0>

<matplotlib.figure.Figure at 0x7f8269d21090>

<matplotlib.figure.Figure at 0x7f8269d21a50>

<matplotlib.figure.Figure at 0x7f826cefdc50>

<matplotlib.figure.Figure at 0x7f826cefdb10>

<matplotlib.figure.Figure at 0x7f824395ad10>

<matplotlib.figure.Figure at 0x7f826cf0c9d0>

<matplotlib.figure.Figure at 0x7f826cf0cc50>

<matplotlib.figure.Figure at 0x7f826cf0ca50>

<matplotlib.figure.Figure at 0x7f826a478090>

<matplotlib.figure.Figure at 0x7f8269ae27d0>

<matplotlib.figure.Figure at 0x7f826c897910>

<matplotlib.figure.Figure at 0x7f82680d7cd0>

<matplotlib.figure.Figure at 0x7f826cefd810>

<matplotlib.figure.Figure at 0x7f8269b6f850>

<matplotlib.figure.Figure at 0x7f8243b8ca10>

<matplotlib.figure.Figure at 0x7f826c840990>

<matplotlib.figure.Figure at 0x7f826a464250>

<matplotlib.figure.Figure at 0x7f826cf26410>

<matplotlib.figure.Figure at 0x7f826cf19450>

<matplotlib.figure.Figure at 0x7f8267f39d90>

<matplotlib.figure.Figure at 0x7f8269bb03d0>

<matplotlib.figure.Figure at 0x7f8269d73890>

<matplotlib.figure.Figure at 0x7f8269c0acd0>

<matplotlib.figure.Figure at 0x7f826cf4eed0>

<matplotlib.figure.Figure at 0x7f8269b53f90>

<matplotlib.figure.Figure at 0x7f826cf4ef50>

<matplotlib.figure.Figure at 0x7f826a417310>

<matplotlib.figure.Figure at 0x7f826a41bbd0>

<matplotlib.figure.Figure at 0x7f826a3e42d0>

<matplotlib.figure.Figure at 0x7f826c897e10>

<matplotlib.figure.Figure at 0x7f8269a056d0>

In [13]:
for fold in np.arange(0,5,1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="NEG_RM"
        expt_rename="Negatives Removed"
        ratio=None
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction, truth, color="orange", cmap_color="YlOrBr", 
                          img_filename=None, fontsize=fontsize)
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1)   
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt_rename, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))



n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f826c8d35d0>

<matplotlib.figure.Figure at 0x7f826cefd790>

<matplotlib.figure.Figure at 0x7f826a3a4d10>

<matplotlib.figure.Figure at 0x7f82695a54d0>

<matplotlib.figure.Figure at 0x7f8243ae09d0>

<matplotlib.figure.Figure at 0x7f8269ecd7d0>

<matplotlib.figure.Figure at 0x7f826a3f8150>

<matplotlib.figure.Figure at 0x7f8269bc28d0>

<matplotlib.figure.Figure at 0x7f8269c8af50>

<matplotlib.figure.Figure at 0x7f8269bcbad0>

<matplotlib.figure.Figure at 0x7f8269bcb550>

<matplotlib.figure.Figure at 0x7f82695a5650>

<matplotlib.figure.Figure at 0x7f826a3a4610>

<matplotlib.figure.Figure at 0x7f8243b743d0>

<matplotlib.figure.Figure at 0x7f82680ca050>

<matplotlib.figure.Figure at 0x7f8269b6f390>

<matplotlib.figure.Figure at 0x7f82680ca110>

<matplotlib.figure.Figure at 0x7f8269b8f5d0>

<matplotlib.figure.Figure at 0x7f82680cae90>

<matplotlib.figure.Figure at 0x7f8269f89090>

<matplotlib.figure.Figure at 0x7f826cf21950>

<matplotlib.figure.Figure at 0x7f8269cd7750>

<matplotlib.figure.Figure at 0x7f8269b6ab90>

<matplotlib.figure.Figure at 0x7f8243bc9450>

<matplotlib.figure.Figure at 0x7f8243bc9810>

<matplotlib.figure.Figure at 0x7f8269e03690>

<matplotlib.figure.Figure at 0x7f8269e03910>

<matplotlib.figure.Figure at 0x7f8243c09210>

<matplotlib.figure.Figure at 0x7f82698b5410>

<matplotlib.figure.Figure at 0x7f8269cdb250>

<matplotlib.figure.Figure at 0x7f8269e23b50>

<matplotlib.figure.Figure at 0x7f8243b960d0>

<matplotlib.figure.Figure at 0x7f8269e239d0>

<matplotlib.figure.Figure at 0x7f8269e32ad0>

<matplotlib.figure.Figure at 0x7f8269f29610>

<matplotlib.figure.Figure at 0x7f8269e4d910>

<matplotlib.figure.Figure at 0x7f8269e32d90>

<matplotlib.figure.Figure at 0x7f8243b58d50>

<matplotlib.figure.Figure at 0x7f8243b58610>

<matplotlib.figure.Figure at 0x7f8269e59a10>

In [14]:
for fold in np.arange(0,5,1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="NEG_RM_SMA"
        expt_rename="Negatives Removed +SNA"
        ratio="1.0"
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction, truth, color="green", cmap_color="BuGn", 
                          img_filename=None, fontsize=fontsize)
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1)   
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt_rename, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))



n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f826cf21cd0>

<matplotlib.figure.Figure at 0x7f8269b6a310>

<matplotlib.figure.Figure at 0x7f82680cae90>

<matplotlib.figure.Figure at 0x7f824395a090>

<matplotlib.figure.Figure at 0x7f82680ca650>

<matplotlib.figure.Figure at 0x7f8269b4a890>

<matplotlib.figure.Figure at 0x7f82680ca110>

<matplotlib.figure.Figure at 0x7f8269ae7690>

<matplotlib.figure.Figure at 0x7f8269d43890>

<matplotlib.figure.Figure at 0x7f826cdc9310>

<matplotlib.figure.Figure at 0x7f8243aa9d50>

<matplotlib.figure.Figure at 0x7f8269cfddd0>

<matplotlib.figure.Figure at 0x7f82a639a6d0>

<matplotlib.figure.Figure at 0x7f8269c63dd0>

<matplotlib.figure.Figure at 0x7f8269c631d0>

<matplotlib.figure.Figure at 0x7f8269cc4b90>

<matplotlib.figure.Figure at 0x7f8243bc1910>

<matplotlib.figure.Figure at 0x7f8243b8c990>

<matplotlib.figure.Figure at 0x7f826a3d3950>

<matplotlib.figure.Figure at 0x7f82695b1690>

<matplotlib.figure.Figure at 0x7f8269cc4310>

<matplotlib.figure.Figure at 0x7f8269f1cd90>

<matplotlib.figure.Figure at 0x7f826a35aad0>

<matplotlib.figure.Figure at 0x7f8269cb58d0>

<matplotlib.figure.Figure at 0x7f8243a9c8d0>

<matplotlib.figure.Figure at 0x7f8243c0c090>

<matplotlib.figure.Figure at 0x7f8269f1cfd0>

<matplotlib.figure.Figure at 0x7f826c8b8f10>

<matplotlib.figure.Figure at 0x7f8243a9cfd0>

<matplotlib.figure.Figure at 0x7f82695b6dd0>

<matplotlib.figure.Figure at 0x7f826c8b8090>

<matplotlib.figure.Figure at 0x7f8243a9c390>

<matplotlib.figure.Figure at 0x7f8243a9cd50>

<matplotlib.figure.Figure at 0x7f826ce0e210>

<matplotlib.figure.Figure at 0x7f826ce0ecd0>

<matplotlib.figure.Figure at 0x7f826cdec7d0>

<matplotlib.figure.Figure at 0x7f826cdec850>

<matplotlib.figure.Figure at 0x7f8269e54650>

<matplotlib.figure.Figure at 0x7f826a432350>

<matplotlib.figure.Figure at 0x7f8243ad0710>

In [15]:
# for fold in np.arange(0,5, 1):
#     for dset in ["test"]:
for fold in np.arange(0,5,1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="scrambled_idx_no_SMA_LC"
        expt_rename="STD scrambled"
        ratio="1.0"
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction, truth, color="gray", cmap_color="Greys", 
                          img_filename=None, fontsize=fontsize)
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1)   
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt_rename, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))



n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f8243b8cad0>

<matplotlib.figure.Figure at 0x7f8269f52710>

<matplotlib.figure.Figure at 0x7f8243b8ce10>

<matplotlib.figure.Figure at 0x7f8269ca5c90>

<matplotlib.figure.Figure at 0x7f8243b8c350>

<matplotlib.figure.Figure at 0x7f8243c0cc90>

<matplotlib.figure.Figure at 0x7f826a46f750>

<matplotlib.figure.Figure at 0x7f826a41ea50>

<matplotlib.figure.Figure at 0x7f826a41e490>

<matplotlib.figure.Figure at 0x7f8269cc4890>

<matplotlib.figure.Figure at 0x7f8269cc4c90>

<matplotlib.figure.Figure at 0x7f8269e03b50>

<matplotlib.figure.Figure at 0x7f8269b83a10>

<matplotlib.figure.Figure at 0x7f8269d73fd0>

<matplotlib.figure.Figure at 0x7f8269cc4750>

<matplotlib.figure.Figure at 0x7f8243bce350>

<matplotlib.figure.Figure at 0x7f8269aa9150>

<matplotlib.figure.Figure at 0x7f8269aa9d90>

<matplotlib.figure.Figure at 0x7f826cddc290>

<matplotlib.figure.Figure at 0x7f8243c0cfd0>

<matplotlib.figure.Figure at 0x7f8269c63650>

<matplotlib.figure.Figure at 0x7f8269e06690>

<matplotlib.figure.Figure at 0x7f8269f523d0>

<matplotlib.figure.Figure at 0x7f8269b83590>

<matplotlib.figure.Figure at 0x7f826a3d3cd0>

<matplotlib.figure.Figure at 0x7f826cdc9cd0>

<matplotlib.figure.Figure at 0x7f8269f52e10>

<matplotlib.figure.Figure at 0x7f8269bc6a10>

<matplotlib.figure.Figure at 0x7f8269d83950>

<matplotlib.figure.Figure at 0x7f826cdc7e10>

<matplotlib.figure.Figure at 0x7f8269d83c10>

<matplotlib.figure.Figure at 0x7f8269e542d0>

<matplotlib.figure.Figure at 0x7f826a464450>

<matplotlib.figure.Figure at 0x7f8269cd7f90>

<matplotlib.figure.Figure at 0x7f8269b46850>

<matplotlib.figure.Figure at 0x7f8243b96890>

<matplotlib.figure.Figure at 0x7f8269596390>

<matplotlib.figure.Figure at 0x7f826a41db10>

<matplotlib.figure.Figure at 0x7f8269bcb450>

<matplotlib.figure.Figure at 0x7f8243bc1910>

In [16]:
# for fold in np.arange(0,5, 1):
#     for dset in ["test"]:
for fold in np.arange(0,5,1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="scrambled_idx_LC"
        expt_rename="SNA scrambled"
        ratio="1.0"
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction, truth, color="purple", cmap_color="PuBu", 
                          img_filename=None, fontsize=fontsize)
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1)   
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt_rename, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))



n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f8269bcb910>

<matplotlib.figure.Figure at 0x7f8269b46710>

<matplotlib.figure.Figure at 0x7f826cf40950>

<matplotlib.figure.Figure at 0x7f824395edd0>

<matplotlib.figure.Figure at 0x7f826a438050>

<matplotlib.figure.Figure at 0x7f826a40f110>

<matplotlib.figure.Figure at 0x7f826a40f6d0>

<matplotlib.figure.Figure at 0x7f826a41df50>

<matplotlib.figure.Figure at 0x7f8269d43fd0>

<matplotlib.figure.Figure at 0x7f8269f52bd0>

<matplotlib.figure.Figure at 0x7f8269b361d0>

<matplotlib.figure.Figure at 0x7f8269d43510>

<matplotlib.figure.Figure at 0x7f8269b36e10>

<matplotlib.figure.Figure at 0x7f8269a75c50>

<matplotlib.figure.Figure at 0x7f826c840910>

<matplotlib.figure.Figure at 0x7f8269596990>

<matplotlib.figure.Figure at 0x7f8269a75310>

<matplotlib.figure.Figure at 0x7f826cf4ef10>

<matplotlib.figure.Figure at 0x7f826cf4e750>

<matplotlib.figure.Figure at 0x7f826a387f50>

<matplotlib.figure.Figure at 0x7f82a639ae50>

<matplotlib.figure.Figure at 0x7f82698d25d0>

<matplotlib.figure.Figure at 0x7f8243c12290>

<matplotlib.figure.Figure at 0x7f826cdba590>

<matplotlib.figure.Figure at 0x7f8269e03390>

<matplotlib.figure.Figure at 0x7f8243adbd90>

<matplotlib.figure.Figure at 0x7f8243aef690>

<matplotlib.figure.Figure at 0x7f826c8cb650>

<matplotlib.figure.Figure at 0x7f826c8cbb90>

<matplotlib.figure.Figure at 0x7f8243b8c150>

<matplotlib.figure.Figure at 0x7f8269cf9e90>

<matplotlib.figure.Figure at 0x7f8269bb0510>

<matplotlib.figure.Figure at 0x7f826cddc590>

<matplotlib.figure.Figure at 0x7f826cdba650>

<matplotlib.figure.Figure at 0x7f826cddc250>

<matplotlib.figure.Figure at 0x7f8269f38b10>

<matplotlib.figure.Figure at 0x7f826cddced0>

<matplotlib.figure.Figure at 0x7f826c8b8a10>

<matplotlib.figure.Figure at 0x7f8269f38d90>

<matplotlib.figure.Figure at 0x7f82699fe690>

In [17]:
for fold in np.arange(0,5,1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="NEG_RM_RATIOS_scrambled"
        expt_rename="Negatives Removed +SNA scrambled"
        ratio="1.0"
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction, truth, color="gold", cmap_color="YlOrBr", 
                          img_filename=None, fontsize=fontsize, alpha=0.9)
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1)   
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt_rename, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))



n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f826a3ea590>

<matplotlib.figure.Figure at 0x7f8243acf950>

<matplotlib.figure.Figure at 0x7f826ce2df10>

<matplotlib.figure.Figure at 0x7f82699c8b90>

<matplotlib.figure.Figure at 0x7f826ced8fd0>

<matplotlib.figure.Figure at 0x7f8269cb5ad0>

<matplotlib.figure.Figure at 0x7f826cddc090>

<matplotlib.figure.Figure at 0x7f826cdecb10>

<matplotlib.figure.Figure at 0x7f8243ad8150>

<matplotlib.figure.Figure at 0x7f826cdc9050>

<matplotlib.figure.Figure at 0x7f826cdc98d0>

<matplotlib.figure.Figure at 0x7f826a464450>

<matplotlib.figure.Figure at 0x7f8269cc4090>

<matplotlib.figure.Figure at 0x7f82698bca10>

<matplotlib.figure.Figure at 0x7f826cf4ecd0>

<matplotlib.figure.Figure at 0x7f82695b6750>

<matplotlib.figure.Figure at 0x7f826cf4ea10>

<matplotlib.figure.Figure at 0x7f8243b85e50>

<matplotlib.figure.Figure at 0x7f8269f89e10>

<matplotlib.figure.Figure at 0x7f826a3d0cd0>

<matplotlib.figure.Figure at 0x7f826a3d0550>

<matplotlib.figure.Figure at 0x7f8269cb8a50>

<matplotlib.figure.Figure at 0x7f826a3fa550>

<matplotlib.figure.Figure at 0x7f8269aa6ad0>

<matplotlib.figure.Figure at 0x7f8269acd390>

<matplotlib.figure.Figure at 0x7f824395e390>

<matplotlib.figure.Figure at 0x7f8269acd410>

<matplotlib.figure.Figure at 0x7f8243c0c910>

<matplotlib.figure.Figure at 0x7f82680e3890>

<matplotlib.figure.Figure at 0x7f8269e91910>

<matplotlib.figure.Figure at 0x7f826cec1790>

<matplotlib.figure.Figure at 0x7f8269bc5090>

<matplotlib.figure.Figure at 0x7f826cec10d0>

<matplotlib.figure.Figure at 0x7f82695b6050>

<matplotlib.figure.Figure at 0x7f8269c1b550>

<matplotlib.figure.Figure at 0x7f826cf20fd0>

<matplotlib.figure.Figure at 0x7f8269bc5c50>

<matplotlib.figure.Figure at 0x7f8269b36810>

<matplotlib.figure.Figure at 0x7f826c8b8610>

<matplotlib.figure.Figure at 0x7f826a40f310>

In [18]:
for fold in np.arange(0,5,1):
    for dset in ["drugmatrix", "timesplit", "test", "train"]:
        expt="NEG_RM_scrambled"
        expt_rename="Negatives Removed scrambled"
        ratio=None
        prediction, truth = get_preds(data_dict, expt, dset, fold, ratio=ratio)
        nan_mask = np.isnan(truth)
        g = plot_rsquared(prediction, truth, color="sienna", cmap_color="gist_heat_r", 
                          img_filename=None, fontsize=fontsize)
        g.fig.subplots_adjust(top=0.9)
        if dset == "drugmatrix":
            name="Drug Matrix"
        elif dset == "timesplit":
            name="Time Split"
        else:
            name = dset.capitalize()
        g.fig.suptitle(title_fmter.format(name, expt_rename, fold), y=1.0, fontsize=fontsize)
        make_space_above(g, topmargin=1.1)    
        plt.savefig("{}/{}_{}_{}_r-squared.png".format(save_dir, expt_rename, dset, fold), dpi=300)
        plt.clf()
        print("n = {}".format(len(truth[~nan_mask])))



n = 3044




n = 96863




n = 93155




n = 372393




n = 3044




n = 96863




n = 93707




n = 371841




n = 3044




n = 96863




n = 92239




n = 373309




n = 3044




n = 96863




n = 92771




n = 372777




n = 3044




n = 96863




n = 93676




n = 371872


<matplotlib.figure.Figure at 0x7f82680d7050>

<matplotlib.figure.Figure at 0x7f8269ca7810>

<matplotlib.figure.Figure at 0x7f8269cb5b50>

<matplotlib.figure.Figure at 0x7f8269b67810>

<matplotlib.figure.Figure at 0x7f82680d7d90>

<matplotlib.figure.Figure at 0x7f82698b5690>

<matplotlib.figure.Figure at 0x7f8269b3ba90>

<matplotlib.figure.Figure at 0x7f8269dd7690>

<matplotlib.figure.Figure at 0x7f8243bdc0d0>

<matplotlib.figure.Figure at 0x7f826a36d4d0>

<matplotlib.figure.Figure at 0x7f8269aa0e10>

<matplotlib.figure.Figure at 0x7f824395e990>

<matplotlib.figure.Figure at 0x7f826cda4590>

<matplotlib.figure.Figure at 0x7f8243bc9e50>

<matplotlib.figure.Figure at 0x7f82a640ddd0>

<matplotlib.figure.Figure at 0x7f8269bfa090>

<matplotlib.figure.Figure at 0x7f8269af3a10>

<matplotlib.figure.Figure at 0x7f82695b8390>

<matplotlib.figure.Figure at 0x7f8269af3e90>

<matplotlib.figure.Figure at 0x7f8269a6e510>

<matplotlib.figure.Figure at 0x7f8269af3e50>

<matplotlib.figure.Figure at 0x7f8269b90810>

<matplotlib.figure.Figure at 0x7f82680d7d10>

<matplotlib.figure.Figure at 0x7f8269e56e90>

<matplotlib.figure.Figure at 0x7f8269e56290>

<matplotlib.figure.Figure at 0x7f8269b45bd0>

<matplotlib.figure.Figure at 0x7f8269b90150>

<matplotlib.figure.Figure at 0x7f8267d7a890>

<matplotlib.figure.Figure at 0x7f826cefd190>

<matplotlib.figure.Figure at 0x7f8269cb5690>

<matplotlib.figure.Figure at 0x7f826cefde90>

<matplotlib.figure.Figure at 0x7f8269b19d10>

<matplotlib.figure.Figure at 0x7f8243bc9890>

<matplotlib.figure.Figure at 0x7f8269d730d0>

<matplotlib.figure.Figure at 0x7f82698baa10>

<matplotlib.figure.Figure at 0x7f8269e1f6d0>

<matplotlib.figure.Figure at 0x7f8267da38d0>

<matplotlib.figure.Figure at 0x7f8269c3ee10>

<matplotlib.figure.Figure at 0x7f8269cef290>

<matplotlib.figure.Figure at 0x7f8269b5e550>