# nb-model_exp-results-2-2 (h_pba_h_h_vol_h)

In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath, exists, basename
from pathlib import Path
from glob import glob, iglob
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'nb-model_exp-results-2-2.ipynb'
dir_name = 'model'
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 300)
pd.set_option("display.max_columns", 50)

from common_util import MODEL_DIR, load_json, dump_json, rectify_json, load_df, str_now, makedir_if_not_exists, is_valid, isnt, compose, pd_split_ternary_to_binary, df_del_midx_level, midx_intersect, pd_get_midx_level, pd_rows, df_midx_restack
from common_util import NestedDefaultDict
from model.common import EXP_LOG_DIR, EXP_PARAMS_DIR, ASSETS, DATASET_DIR, XG_PROCESS_DIR, XG_DATA_DIR, XG_DIR, PYTORCH_MODELS_DIR, TRAIN_RATIO, EXPECTED_NUM_HOURS
from recon.viz import *
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

CRITICAL:root:script location: /home/kev/crunch/model/nb-model_exp-results-2-2.ipynb
CRITICAL:root:using project dir: /home/kev/crunch/


Prune the xg data down to the data of interest to use in further experiments.

In [2]:
def get_bench(pattern):
    ndd = NestedDefaultDict()
    for path in iglob(pattern, recursive=True):
        ndd[path.split('/')] = load_json(path)

    df_idx = [k[2] for k in ndd.keys()]
    df = pd.DataFrame.from_records([b['benchmark-hold'] for b in ndd.values()], index=df_idx)

    return df

In [3]:
def consolidate_results(pattern, bench_pattern=None):
    ndd = NestedDefaultDict()
    for path in iglob(pattern, recursive=True):
        ndd[path.split('/')] = load_json(path)
    df_keys = [(k[2], k[3], k[4], k[5], k[6]) for k in ndd.keys()]
    df_idx = pd.MultiIndex.from_tuples(df_keys, names=('asset', 'model', 'data', 'params', 'trial')) \
        .remove_unused_levels()
    df = pd.DataFrame.from_records(list(ndd.values()), index=df_idx) \
        .reset_index() \
        .groupby(['asset', 'model', 'params']).mean() # average across trials for each study

    if (is_valid(bench_pattern)):
        bench_df = get_bench(bench_pattern)
        mets = ['accuracy', 'precision', 'recall', 'f1']
        rets = ['profit', 'sharpe', 'cagr']
        for m in mets+rets:
            res_cols = [c for c in df.columns if (c.endswith(m))]
            bench_cols = [c for c in bench_df.columns if (c.endswith(m))]
            bench_col = bench_cols[0]

            for asset in bench_df.index:
#                 print(bench_df.loc[asset, bench_col])
#                 print(df.xs(asset, level=0, drop_level=False)[res_cols])
                df.xs(asset, level=0, drop_level=False).loc[:, res_cols] -= bench_df.loc[asset, bench_col]
#     df = df.style \
#         .background_gradient(subset=[c for c in df.columns if c.endswith(('accuracy', 'profit', 'sharpe', 'cagr'))], cmap='BuGn')
#     .set_sticky(axis="index") \
    return df

In [4]:
def agg_asset(res_df):
    agg_df = res_df.groupby(['model', 'params']).mean()
    return agg_df

In [30]:
def agg_model(res_df):
    agg_df = res_df.groupby(['params']).mean()
    return agg_df

In [31]:
def agg_asset_params(res_df):
    return res_df.groupby(['asset', 'params']).mean()

In [6]:
def add_style(df):
    df = df.style \
         .background_gradient(subset=[c for c in df.columns if c.endswith(('accuracy', 'f1', 'profit', 'sharpe', 'cagr'))], cmap='BuGn')
#     .set_sticky(axis="index") \
    return df

In [7]:
view = ('accuracy', 'f1', 'profit', 'sharpe', 'cagr')

def select_ends(df, ends=view):
    res_cols = [c for c in df.columns if (c.endswith(ends))]
#     for m in ends:
#         res_cols = [c for c in df.columns if (c.endswith(m))]
    return df.loc[:, res_cols]

In [8]:
def select_paramrange(df, r):
    params = map(str, r)
    return df.loc[(slice(None), slice(None), params), :]

In [9]:
def dropfirstmi(df):
    df.index = df.index.droplevel(0)
    return df

def gb_filter_monotonic(df, checkcol='val_binary_sharpe'):
    return dropfirstmi(df.reorder_levels(['asset', 'params', 'model']) \
            .groupby('params') \
            .apply(lambda g: g if (g[checkcol].is_monotonic_increasing) else None) \
            .sort_index())

subgt = lambda g, big, small='base': (g.xs(big, level=2) > g.xs(small, level=2)).all()
monotonic_partial = lambda g: (subgt(g, 'lnp') and subgt(g, 'np', 'lnp')) or (subgt(g, 'cnp') and subgt(g, 'np', 'cnp'))

def gb_filter_monotonic_partial(df, checkcol='val_binary_sharpe'):
    return dropfirstmi(df.reorder_levels(['asset', 'params', 'model']) \
            .groupby('params') \
            .apply(lambda g: g if (monotonic_partial(g[checkcol])) else None) \
            .sort_index())

nonneg = lambda g: (g.xs('np', level=2) > 0).all() and (g.xs('lnp', level=2) > 0).all()

def gb_filter_neg(df, checkcol='val_binary_sharpe'):
    return dropfirstmi(df.reorder_levels(['asset', 'params', 'model']) \
            .groupby('params') \
            .apply(lambda g: g if (nonneg(g[checkcol])) else None) \
            .sort_index())

def gb_filter_lf(df, checkcol='val_binary_longfreq'):
    return dropfirstmi(df.groupby('params') \
            .apply(lambda g: g if ((g[checkcol].between(.2, .8)).all()) else None))

In [10]:
# def get_params_per_dataset():
#     EXP_LOG_DIR

In [11]:
exp_base = basename(dirname(EXP_LOG_DIR))
sm_name = 'anp'

drange = '2009_2018'
xdata = 'h_pba_h_h_vol_h'
ydata = 'ddir'
data_name = f'{drange}_{ydata}_{xdata}'
bench_data_name = f'{drange}_{ydata}'

path_train = f"{exp_base}/{sm_name}/**/{data_name}/**/train.json"
path_val =   f"{exp_base}/{sm_name}/**/{data_name}/**/val.json"
# path_test = f"{exp_base}/{sm_name}/**/{data_name}/**/test.json"

path_bench_train = f"{exp_base}/bench/*/{bench_data_name}/train.json"
path_bench_val = f"{exp_base}/bench/*/{bench_data_name}/val.json"
# path_bench_test = f"{exp_base}/bench/*/{bench_data_name}/test.json"

In [12]:
PRANGE = range(200, 270)

# absolute results
dt = select_paramrange(consolidate_results(path_train), PRANGE);
dv = select_paramrange(consolidate_results(path_val), PRANGE);

# benchmark adjusted results
dt_adj = select_paramrange(consolidate_results(path_train, path_bench_train), PRANGE);
dv_adj = select_paramrange(consolidate_results(path_val, path_bench_val), PRANGE);

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, val, pi)


## Results
Parameter sets >= 200

In [13]:
# sp_500: 120, 56
# russell_2000: 60, 54, 83
# nasdaq_100: 14, 124
# dow_jones: 107, 124, 62
# +110

# *0 -> 62
# *1 -> 83

# *2 -> 14
# *3 -> 54
# *4 -> 124

# *5 -> 56

# *6 -> 107

# *7 -> 60
# *8 -> 110
# *9 -> 120

In [14]:
# (200-209) -> (210-219): output init set to kaiming_uniform
# (200-209) -> (220-229): all in2d replaced with in15d
# (200-209) -> (230-239): all in15d->in2d, in2d->in15d
# (200-209) -> (240-249): double batch size
# (230-239) -> (250-259): double batch size
# (202, 210-212, 214, 221-222, 234-246) -> (260-269): set window size to 40
# final:

In [15]:
train_clf = [col for col in dt.columns if ('clf' in col)]
train_binary = [col for col in dt.columns if ('binary' in col)]
train_binary_sharpe = [col for col in train_binary if (col.endswith('sharpe'))]

In [16]:
val_clf = [col for col in dv.columns if ('clf' in col)]
val_binary = [col for col in dv.columns if ('binary' in col)]
val_binary_sharpe = [col for col in val_binary if (col.endswith('sharpe'))]

In [17]:
add_style(agg_model(agg_asset(dt_adj.loc[:, train_binary_sharpe])))

Unnamed: 0_level_0,train_binary_sharpe,train_binary_long_sharpe,train_binary_short_sharpe
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
200,2.882324,1.949924,1.498942
201,1.475627,0.920224,0.585424
202,1.26128,0.699064,0.520995
203,0.688621,0.233473,0.164763
204,4.725711,3.041591,2.838582
205,1.49954,0.880132,0.64547
206,0.668544,0.269704,0.096268
207,1.150288,0.628059,0.411664
208,1.620358,0.978077,0.698359
209,1.72308,1.025209,0.84865


In [37]:
add_style(agg_model(dv_adj.loc[:, val_binary_sharpe]))

Unnamed: 0_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
200,0.005387,0.086071,-0.278647
201,0.252481,0.255223,-0.136535
202,0.562842,0.445677,0.178011
203,0.366824,0.330875,-0.055055
204,0.185705,0.184625,-0.125952
205,0.231389,0.267095,-0.177488
206,0.203522,0.243271,-0.186095
207,0.300631,0.31767,-0.113352
208,0.086076,0.160731,-0.299686
209,0.369343,0.347679,-0.050516


In [47]:
add_style(agg_asset_params(dv_adj.loc[:, val_binary_sharpe]).sort_index(level=[1, 0]).groupby('params').mean())

Unnamed: 0_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
200,0.005387,0.086071,-0.278647
201,0.252481,0.255223,-0.136535
202,0.562842,0.445677,0.178011
203,0.366824,0.330875,-0.055055
204,0.185705,0.184625,-0.125952
205,0.231389,0.267095,-0.177488
206,0.203522,0.243271,-0.186095
207,0.300631,0.31767,-0.113352
208,0.086076,0.160731,-0.299686
209,0.369343,0.347679,-0.050516


In [46]:
add_style(agg_asset_params(dv_adj.loc[:, val_binary_sharpe]).sort_index(level=[1, 0]))#.groupby('params').mean())

Unnamed: 0_level_0,Unnamed: 1_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,params,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dow_jones,200,-0.202269,0.348789,-0.906343
nasdaq_100,200,-0.108818,-0.068354,-0.273494
russell_2000,200,0.149195,-0.022375,0.232599
sp_500,200,0.183442,0.086223,-0.167348
dow_jones,201,0.46999,0.656575,-0.437492
nasdaq_100,201,0.54754,0.494905,0.137478
russell_2000,201,0.445558,0.202486,0.43316
sp_500,201,-0.453162,-0.333075,-0.679287
dow_jones,202,0.874768,0.948598,-0.04921
nasdaq_100,202,0.471305,0.379927,0.132853


In [48]:
#add_style(agg_asset(select_paramrange(dv_adj, range(260, 270))).sort_index(level=[1, 0]))

## Compare Results Over Val by Params

In [21]:
sel_param = lambda d, p: d.xs(p, level=2, drop_level=False)
sel_asset = lambda d, a: d.xs(a, level=0, drop_level=False)

In [22]:
sp_500 = sel_asset(dv_adj.loc[:, val_binary_sharpe], 'sp_500').sort_index(level=2)
russell_2000 = sel_asset(dv_adj.loc[:, val_binary_sharpe], 'russell_2000').sort_index(level=2)
nasdaq_100 = sel_asset(dv_adj.loc[:, val_binary_sharpe], 'nasdaq_100').sort_index(level=2)
dow_jones = sel_asset(dv_adj.loc[:, val_binary_sharpe], 'dow_jones').sort_index(level=2)

In [24]:
add_style(sp_500)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,model,params,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sp_500,base,200,0.049629,-0.010425,-0.275051
sp_500,cnp,200,0.349536,0.132122,0.038718
sp_500,lnp,200,0.51631,0.352681,0.036623
sp_500,np,200,-0.181709,-0.129485,-0.469682
sp_500,base,201,-0.964619,-0.660179,-1.076088
sp_500,cnp,201,-0.31195,-0.31195,-0.56501
sp_500,lnp,201,-0.202622,-0.0947,-0.492752
sp_500,np,201,-0.333459,-0.265471,-0.583297
sp_500,base,202,-0.416747,-0.238052,-0.631365
sp_500,cnp,202,1.171191,0.625997,0.771164


In [23]:
add_style(gb_filter_monotonic(sp_500))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,params,model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sp_500,241,base,-1.222298,-0.56501,-1.222298
sp_500,241,cnp,-0.116081,0.285326,-0.702
sp_500,241,lnp,0.092279,0.092279,-0.56501
sp_500,241,np,0.352291,0.680041,-0.397613
sp_500,248,base,-0.564533,-0.043586,-1.236256
sp_500,248,cnp,-0.558581,-0.048676,-1.251424
sp_500,248,lnp,-0.557996,-0.045453,-1.244169
sp_500,248,np,0.499841,0.664547,-0.377622
sp_500,249,base,-0.063143,0.270289,-0.829252
sp_500,249,cnp,0.488849,0.572172,-0.362029


In [107]:
add_style(gb_filter_neg(gb_filter_monotonic_partial(sp_500)))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,params,model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sp_500,203,base,0.113244,0.145049,-0.421904
sp_500,203,cnp,-0.330437,-0.139165,-0.889881
sp_500,203,lnp,0.705651,0.583412,0.030078
sp_500,203,np,0.781086,0.593163,0.127933
sp_500,204,base,-0.552518,-0.402667,-0.774013
sp_500,204,cnp,-1.233138,-0.890459,-1.162506
sp_500,204,lnp,0.440248,0.12403,0.35334
sp_500,204,np,0.583704,0.285842,0.222009
sp_500,205,base,0.297786,0.499854,-0.343597
sp_500,205,cnp,0.553193,0.385039,0.023605


In [108]:
add_style(gb_filter_monotonic(russell_2000))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,params,model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
russell_2000,247,base,-0.028315,-0.171185,0.143836
russell_2000,247,cnp,0.279551,0.04235,0.337626
russell_2000,247,lnp,0.576315,0.277131,0.518862
russell_2000,247,np,0.768505,0.336708,0.844689


In [109]:
add_style(gb_filter_neg(gb_filter_monotonic_partial(russell_2000)))

Unnamed: 0_level_0,Unnamed: 1_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
params,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
201,base,0.195808,0.00572,0.269333
201,cnp,-0.151888,-0.205597,0.032244
201,lnp,0.824378,0.472682,0.684209
201,np,0.913934,0.537139,0.746852
206,base,0.520605,0.450434,0.29046
206,cnp,0.690348,0.557371,0.418669
206,lnp,0.598864,0.426718,0.43496
206,np,1.150827,0.872971,0.751604
217,base,0.08871,0.123025,-0.008472
217,cnp,0.223257,0.195602,0.109989


In [110]:
add_style(gb_filter_monotonic(nasdaq_100))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,params,model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
nasdaq_100,210,base,-0.005665,-0.032225,-0.188084
nasdaq_100,210,cnp,0.13847,0.067838,-0.081068
nasdaq_100,210,lnp,0.446864,0.232965,0.208572
nasdaq_100,210,np,0.56899,0.399496,0.204025
nasdaq_100,220,base,-0.749615,-0.541149,-0.710509
nasdaq_100,220,cnp,-0.009824,-0.028037,-0.195887
nasdaq_100,220,lnp,0.046436,0.075493,-0.181308
nasdaq_100,220,np,0.490913,0.334775,0.156174
nasdaq_100,238,base,0.361292,0.306362,-0.013893
nasdaq_100,238,cnp,0.405523,0.394813,-0.011729


In [111]:
add_style(gb_filter_neg(gb_filter_monotonic_partial(nasdaq_100)))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,params,model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
nasdaq_100,201,base,0.246602,0.150473,-0.008361
nasdaq_100,201,cnp,0.778347,0.714597,0.267767
nasdaq_100,201,lnp,0.581991,0.522778,0.154849
nasdaq_100,201,np,0.583218,0.591772,0.135658
nasdaq_100,202,base,0.629,0.359758,0.337397
nasdaq_100,202,cnp,-0.386389,-0.274728,-0.474365
nasdaq_100,202,lnp,0.760547,0.837919,0.223377
nasdaq_100,202,np,0.882062,0.596759,0.445001
nasdaq_100,205,base,-0.196595,-0.084193,-0.447035
nasdaq_100,205,cnp,0.049274,0.164545,-0.26143


In [112]:
add_style(gb_filter_monotonic(dow_jones))

ValueError: Cannot remove 1 levels from an index with 1 levels: at least one level must be left.

In [114]:
add_style(gb_filter_neg(gb_filter_monotonic_partial(dow_jones)))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,val_binary_sharpe,val_binary_long_sharpe,val_binary_short_sharpe
asset,params,model,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dow_jones,201,base,0.384828,0.640154,-0.499991
dow_jones,201,cnp,0.635738,0.970582,-0.317809
dow_jones,201,lnp,0.412479,0.568648,-0.483099
dow_jones,201,np,0.446915,0.446915,-0.449068
dow_jones,204,base,-0.148747,0.400011,-0.867965
dow_jones,204,cnp,-0.342331,0.135648,-1.213873
dow_jones,204,lnp,0.495003,0.798197,-0.413577
dow_jones,204,np,0.704348,0.964007,-0.26293
dow_jones,212,base,0.733815,0.815027,-0.198121
dow_jones,212,cnp,1.497108,1.284512,0.458422
