## Why am I doing this?

Based on the perfomance difference between validation and time split sets for scrambled indices, I suspect that the initial good performance on AUPRC is due to targets with high positive counts. To counteract that, I'm going to use an SMA model on the scrambled indices and look to see if that initial bump goes away. 

!cp ../scrambled_idx/scrambled_trains.sh ./

!cp ../scrambled_idx/sneg_args.txt ./

copied args and now modifying

Well fuck. Looks like I trained the scrambled_idx model using SMA. So, now I guess my hypothesis will be inverted. 

!cp ../scrambled_idx/gen_ts_validation.sh ./

!cp ../scrambled_idx/get_best_epochs.py ./

!cp ../scrambled_idx/get_best_epochs.sh ./

!cp ../scrambled_idx/get_metrics.sh ./

!cp ../scrambled_idx/run_with_custom_epochs* ./

now look at data

!cp ../scrambled_idx/plot_scrambled_aucs.ipynb ./

!cp ../scrambled_idx/r2_plots.ipynb ./

## Check training

In [5]:
import pandas as pd
import os
from glob import glob
import numpy as np
import json
from itertools import izip


def get_env_var(handle):
    ''' Get an environment variable given the handle for the bash variable'''
    tmp = os.getenv(handle)
    if not tmp:
        raise LookupError("Environment variable: {} not set.".format(handle))
    return tmp.strip("'")

def format_md_img(link_name, rel_plot_loc):
    img_formatter="[[{}/{}]]".format(link_name, rel_plot_loc)
    return img_formatter


def print_table(list_of_headers):
    fmter =  "| {} {}".format("{}", " {} ")
    for header in list_of_headers:
        fmter = fmter.format("{} {}".format(header, "| {}"), "{}")
    fmter = fmter.format(" ", " ")
    return fmter


class Experiment(dict):
    def __init__(self, name):
        self.name = name
        self.folds = []
        self.converged_epochs = []
        self.trained_paths = []
      
    def __repr__(self):
        return str(vars(self))
    
    def __str__(self):
        return json.dumps(vars(self), indent=2)
        
    def set_convereged_epoch(self, epoch, train_path, fold=None):        
        self.folds.append(fold)
        self.converged_epochs.append(epoch)
        self.trained_paths.append(train_path)
        

home=get_env_var("HOME")
base="{}/labgits/lab-notebook-caceres.wiki/images/20180815_Paper_Retrains/".format(home)

home_save_dir = get_env_var("HOME_SAVE_BASE")
srv_save_dir = get_env_var("DATA_SAVE_BASE")
proj_dir = get_env_var("NMEW_PROJ_BASE")

sneg_pnrs=[1.0]
fold_tmplt = "fold_[0-9]*/pnr_*/*.png"

# github formatting info:

github_wiki_link="https://github.com/keiserlab/lab-notebook-caceres/wiki/images"
expt_name = "20180815_Paper_Retrains"
expt_sub_name="scrambled_idx_no_SMA"
github_wiki_expt = "{}/{}".format(github_wiki_link, expt_name, expt_sub_name)

expt_dir = "{}/lr_nesterov_1024_2048_3072/".format(base)

metrics_for_convergance = ['matthews-corrcoef_binary-5.0_test', 
                           'matthews-corrcoef_binary-6.0_test', 
                           'r2_test']
experiments = []

parent_dir = os.path.join(expt_dir, expt_sub_name)

In [6]:
!mkdir -p $parent_dir


### copied images of interest into parent dir

```
!rsync -azrRv $HOME_SAVE_BASE/output/20180815_Paper_Retrains/trained_nets/./lr_nesterov_1024_2048_3072/scrambled_idx_no_SMA/fold_*/pnr_*/*.png $base
```

In [7]:
png_files = glob(os.path.join(parent_dir, fold_tmplt))

In [11]:
experiments = []
converged_epochs = ("{}/output/20180815_Paper_Retrains/predictions/{}/experiments.json".format(home_save_dir, expt_sub_name))
with open(converged_epochs, "r") as fp:
    expts = json.load(fp)

expt_list = []
for e in expts:
    tmp = Experiment(e["name"])
    for epoch, path, fold in izip(e["converged_epochs"], e["trained_paths"], e["folds"]):
        tmp.set_convereged_epoch(epoch, path, fold)
    experiments.append(tmp)
    del(tmp)    
    
expt_dict = {float(i.name.split("_")[-1]): dict(zip([int(j.split("_")[1]) for j in i.folds], i.converged_epochs)) for i in experiments}

In [12]:
img_order = ['train_loss','test_loss', 'test_no_gt_loss']
for ratio in sneg_pnrs:
    png_subset = [f for f in png_files if str(ratio) in f]
    headers = ["PNR_{}".format(ratio)]
    headers.extend(img_order)
    print(print_table(headers))
    print(print_table([":---"]*len(headers)))
    for f in np.arange(0, 5, 1):
        name_fmter = "fold_{} best epoch: {}".format(str(f), expt_dict[ratio][f])
        fold_fmter = "fold_{}".format(str(f))
        fold_pngs = [i for i in png_subset if fold_fmter in i]
        fold_pngs = sorted(fold_pngs, key=lambda x: img_order.index(x.split("/")[-1].split(".")[0]))
        md_print_fmt = [name_fmter] + [format_md_img(github_wiki_expt, i.split("//")[-1]) for i in fold_pngs]
        print(print_table(md_print_fmt))
    print("\n\n")

| PNR_1.0 | train_loss | test_loss | test_no_gt_loss |      
| :--- | :--- | :--- | :--- |      
| fold_0 best epoch: 0 | [[https://github.com/keiserlab/lab-notebook-caceres/wiki/images/20180815_Paper_Retrains/lr_nesterov_1024_2048_3072/scrambled_idx_no_SMA/fold_0/pnr_1.0/train_loss.png]] | [[https://github.com/keiserlab/lab-notebook-caceres/wiki/images/20180815_Paper_Retrains/lr_nesterov_1024_2048_3072/scrambled_idx_no_SMA/fold_0/pnr_1.0/test_loss.png]] | [[https://github.com/keiserlab/lab-notebook-caceres/wiki/images/20180815_Paper_Retrains/lr_nesterov_1024_2048_3072/scrambled_idx_no_SMA/fold_0/pnr_1.0/test_no_gt_loss.png]] |      
| fold_1 best epoch: 0 | [[https://github.com/keiserlab/lab-notebook-caceres/wiki/images/20180815_Paper_Retrains/lr_nesterov_1024_2048_3072/scrambled_idx_no_SMA/fold_1/pnr_1.0/train_loss.png]] | [[https://github.com/keiserlab/lab-notebook-caceres/wiki/images/20180815_Paper_Retrains/lr_nesterov_1024_2048_3072/scrambled_idx_no_SMA/fold_1/pnr_1.0/test_loss.pn