# nb-model_exp-results-final

In [1]:
import sys
import os
from glob import glob, iglob
from os import sep
from os.path import dirname, realpath, exists, basename
from pathlib import Path
from distutils.dir_util import copy_tree
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'nb-model_exp-results-final.ipynb'
dir_name = 'model'
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
import optuna

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 300)
pd.set_option("display.max_columns", 50)

from common_util import MODEL_DIR, dump_df, benchmark, NestedDefaultDict, load_json, makedir_if_not_exists
from model.common import ASSETS, INTERVAL_YEARS, INTRADAY_LEN, EXP_LOG_DIR, EXP_PARAMS_DIR
from model.common import OPTUNA_DB_FNAME, OPTUNA_N_TRIALS, OPTUNA_TIMEOUT_HOURS
from recon.viz import *
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

CRITICAL:root:script location: /home/kev/crunch/model/nb-model_exp-results-final.ipynb
CRITICAL:root:using project dir: /home/kev/crunch/


## Final Results

In [2]:
def get_bench(pattern):
    ndd = NestedDefaultDict()
    for path in iglob(pattern, recursive=True):
        ndd[path.split('/')] = load_json(path)

    df_idx = [k[-3:-1] for k in ndd.keys()]
    midx = pd.MultiIndex.from_tuples([k[-3:-1] for k in ndd.keys()], names=['asset', 'data'])
    df = pd.DataFrame.from_records([b['benchmark-hold'] for b in ndd.values()], index=midx)

    return df

In [3]:
def get_final(split, data_names):
    res = []
    for asset_name in ASSETS:
        asset_dir = f"{test_dir}{asset_name}{sep}"

        for data_name in data_names:
            paths = [path for path in iglob(f"{asset_dir}/**/{data_name}/**/{split}.json", recursive=True)]
            idxs = list(map(lambda p: p.replace(asset_dir, '').replace(data_name+sep, '').replace(f"{sep}{split}.json", ''), paths))
            idx = [(asset_name, data_name, *i.split(sep)) for i in idxs]
            midx = pd.MultiIndex.from_tuples(idx, names=['asset', 'data', 'model', 'study', 'trial'])
            r = list(map(load_json, paths))
            res.append(pd.DataFrame.from_records(r, index=midx))
    return pd.concat(res)

In [4]:
def get_sel(split):
    return [f'{split}_clf_accuracy',
           f'{split}_kelly_sharpe', f'{split}_kelly_cagr',
           f'{split}_binary_sharpe', f'{split}_binary_cagr',
           f'{split}_conf_sharpe', f'{split}_conf_cagr',
           f'{split}_binary_longfreq']

In [5]:
def get_best(res_df, key, sel):
    max_idx = res_df.groupby('model')[key].idxmax()
    return res_df.loc[max_idx, sel]

In [6]:
def bench_diff(res_df, bench_df, split):
    res_df[f'{split}_clf_accuracy'] -= bench_df[f'{split}_clf_accuracy']
    for retmet in ['sharpe', 'cagr']:
        res_df[f'{split}_kelly_{retmet}'] -= bench_df[f'{split}_{retmet}']
        res_df[f'{split}_binary_{retmet}'] -= bench_df[f'{split}_{retmet}']
        res_df[f'{split}_conf_{retmet}'] -= bench_df[f'{split}_{retmet}']
    return res_df

In [7]:
def add_style(df):
    df = df.style \
         .background_gradient(subset=[c for c in df.columns if c.endswith(('accuracy', 'f1', 'profit', 'sharpe', 'cagr'))], cmap='BuGn')
#     .set_sticky(axis="index") \
    return df

In [8]:
sm_name = 'anp'
model_name = 'np'
# data_names = ['2009_2018_ddir_h_pba_h', '2009_2018_ddir_h_pba_h_h_vol_h']
data_names = ['2007_2018_ddir_h_pba_h', '2007_2018_ddir_h_pba_h_h_vol_h']
#test_dir = EXP_LOG_DIR +sep.join(['final_3', sm_name]) +sep
test_dir = EXP_LOG_DIR +sep.join(['resfinal_5', sm_name]) +sep

bench = {split: get_bench(f"{EXP_LOG_DIR}/bench/**/{split}.json") for split in ['train', 'val', 'test']}
final = {split: get_final(split, data_names) for split in ['train', 'val', 'test']}

In [9]:
datas = data_names[:2]
year = datas[0][:4]
split = 'test'
benchyear = bench[split].xs(f'{year}_2018_ddir', level=1)
key = f'{split}_kelly_sharpe'
sel = get_sel(split)
params = load_json("params.json", EXP_LOG_DIR+'resfinal_5')

res = []
for asset_name in ASSETS:
    b = benchyear.loc[asset_name]
    asset_res = []
    for data_name in datas:
        for model_name in ["base", "cnp", "lnp", "np"]:
            p = params[asset_name][data_name][model_name]
            best = final[split].xs((asset_name, data_name, model_name), drop_level=False)
            if (len(lvl := best.index.get_level_values(-1)) > 0):
                pv = [v for v in lvl if (p and v.endswith(p))]
                if (len(pv) > 0):
                    best = best.xs(pv[0], level=-1, drop_level=False).loc[:, sel]
                    asset_res.append(best)
                else:
                    pass
                    #best = best.xs(lvl[0], level=-1, drop_level=False).replace(value=0.0, regex=r'^.*$').loc[:, sel] 
                    #asset_res.append(best)
                
    asset_res = pd.concat(asset_res)
    res.append(asset_res)
    #display(add_style(asset_res))
    #best = bench_diff(best, b, split)
res = pd.concat(res)
display(add_style(res))

  return runner(coro)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
asset,data,model,study,trial,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
sp_500,2007_2018_ddir_h_pba_h,base,471,7_000414,0.51875,1.007975,0.036698,0.833483,0.090223,0.893415,0.063972,0.614062
sp_500,2007_2018_ddir_h_pba_h,lnp,471,3_000608,0.501562,0.72724,0.010301,0.655686,0.068969,0.675624,0.040504,0.5875
sp_500,2007_2018_ddir_h_pba_h_h_vol_h,lnp,376,0_000874,0.495313,0.620191,0.007795,0.69006,0.073276,0.689759,0.041332,0.485938
russell_2000,2007_2018_ddir_h_pba_h,base,445,7_000430,0.521875,-0.324901,-0.014836,0.262435,0.027808,0.12983,0.007726,0.6375
russell_2000,2007_2018_ddir_h_pba_h,cnp,442,9_000316,0.51875,0.605867,0.003486,0.760667,0.104055,0.760106,0.055115,0.55
russell_2000,2007_2018_ddir_h_pba_h,lnp,442,7_000538,0.514063,0.717244,0.008286,0.492912,0.062428,0.5156,0.037333,0.407813
russell_2000,2007_2018_ddir_h_pba_h,np,445,6_000692,0.509375,0.481784,0.007943,0.721536,0.097883,0.708144,0.05418,0.515625
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,base,337,1_000992,0.514063,1.019543,0.036477,1.113075,0.161176,1.111006,0.098778,0.489063
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,cnp,337,9_000526,0.509375,0.777495,0.018141,0.758767,0.103763,0.7775,0.062151,0.553125
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,lnp,338,8_000065,0.510938,1.459713,0.047278,1.089026,0.157116,1.183966,0.102727,0.48125


In [10]:
datas = data_names[:2]
year = datas[0][:4]
split = 'val'
benchyear = bench[split].xs(f'{year}_2018_ddir', level=1)
key = f'{split}_kelly_sharpe'
sel = get_sel(split)
params = load_json("params.json", EXP_LOG_DIR+'resfinal_5')

res = []
for asset_name in ASSETS:
    b = benchyear.loc[asset_name]
    asset_res = []
    for data_name in datas:
        for model_name in ["base", "cnp", "lnp", "np"]:
            p = params[asset_name][data_name][model_name]
            best = final[split].xs((asset_name, data_name, model_name), drop_level=False)
            if (len(lvl := best.index.get_level_values(-1)) > 0):
                pv = [v for v in lvl if (p and v.endswith(p))]
                if (len(pv) > 0):
                    best = best.xs(pv[0], level=-1, drop_level=False).loc[:, sel]
                    asset_res.append(best)
    asset_res = pd.concat(asset_res)
    res.append(asset_res)
    #display(add_style(asset_res))
    #best = bench_diff(best, b, split)
res = pd.concat(res)
display(add_style(res))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,val_clf_accuracy,val_kelly_sharpe,val_kelly_cagr,val_binary_sharpe,val_binary_cagr,val_conf_sharpe,val_conf_cagr,val_binary_longfreq
asset,data,model,study,trial,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
sp_500,2007_2018_ddir_h_pba_h,base,471,7_000414,0.529688,1.1391,0.041588,1.075833,0.119238,1.108677,0.080545,0.557813
sp_500,2007_2018_ddir_h_pba_h,lnp,471,3_000608,0.514063,0.177048,0.002418,0.464162,0.046262,0.439921,0.025369,0.567187
sp_500,2007_2018_ddir_h_pba_h_h_vol_h,lnp,376,0_000874,0.526563,0.810325,0.010108,1.46761,0.168176,1.415232,0.087612,0.49375
russell_2000,2007_2018_ddir_h_pba_h,base,445,7_000430,0.559211,0.561942,0.023045,1.344464,0.200636,1.185098,0.10987,0.606908
russell_2000,2007_2018_ddir_h_pba_h,cnp,442,9_000316,0.539474,1.273717,0.006844,1.269028,0.187776,1.278259,0.096244,0.498355
russell_2000,2007_2018_ddir_h_pba_h,lnp,442,7_000538,0.516447,0.793785,0.008708,1.174664,0.171875,1.1609,0.089708,0.350329
russell_2000,2007_2018_ddir_h_pba_h,np,445,6_000692,0.544408,0.750817,0.012033,1.426877,0.214833,1.383222,0.111196,0.453947
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,base,337,1_000992,0.514803,1.673802,0.062014,1.271202,0.188118,1.381414,0.125119,0.493421
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,cnp,337,9_000526,0.526316,0.706098,0.017547,0.859021,0.119993,0.856603,0.069616,0.541118
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,lnp,338,8_000065,0.525,0.680627,0.019473,0.961625,0.134717,0.932759,0.07742,0.473438


In [11]:
datas = data_names[:2]
year = datas[0][:4]
split = 'train'
benchyear = bench[split].xs(f'{year}_2018_ddir', level=1)
key = f'{split}_kelly_sharpe'
sel = get_sel(split)
display(year)
params = load_json("params.json", EXP_LOG_DIR+'resfinal_5')

res = []
for asset_name in ASSETS:
    display(asset_name)
    b = benchyear.loc[asset_name]
    asset_res = []
    for data_name in datas:
        for model_name in ["base", "cnp", "lnp", "np"]:
            p = params[asset_name][data_name][model_name]
            best = final[split].xs((asset_name, data_name, model_name), drop_level=False)
            if (len(lvl := best.index.get_level_values(-1)) > 0):
                pv = [v for v in lvl if (p and v.endswith(p))]
                if (len(pv) > 0):
                    best = best.xs(pv[0], level=-1, drop_level=False).loc[:, sel]
                    asset_res.append(best)
    asset_res = pd.concat(asset_res)
    res.append(asset_res)
    #display(add_style(asset_res))
    #best = bench_diff(best, b, split)
res = pd.concat(res)
display(add_style(res))

'2007'

'sp_500'

'russell_2000'

'nasdaq_100'

'dow_jones'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,train_clf_accuracy,train_kelly_sharpe,train_kelly_cagr,train_binary_sharpe,train_binary_cagr,train_conf_sharpe,train_conf_cagr,train_binary_longfreq
asset,data,model,study,trial,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
sp_500,2007_2018_ddir_h_pba_h,base,471,7_000414,0.90625,10.500305,1.310036,10.358203,7.663572,10.582874,3.488374,0.485518
sp_500,2007_2018_ddir_h_pba_h,lnp,471,3_000608,0.591463,2.502274,0.103115,1.724295,0.486255,1.880877,0.288224,0.466463
sp_500,2007_2018_ddir_h_pba_h_h_vol_h,lnp,376,0_000874,0.55564,1.482514,0.035994,1.67609,0.443656,1.699688,0.23031,0.290396
russell_2000,2007_2018_ddir_h_pba_h,base,445,7_000430,0.755335,6.865677,0.825102,5.967131,4.31039,6.293795,2.13241,0.519055
russell_2000,2007_2018_ddir_h_pba_h,cnp,442,9_000316,0.542683,1.1922,0.021914,0.924584,0.259756,0.953468,0.146308,0.592988
russell_2000,2007_2018_ddir_h_pba_h,lnp,442,7_000538,0.557927,0.959142,0.031577,1.404652,0.456495,1.385595,0.237341,0.425305
russell_2000,2007_2018_ddir_h_pba_h,np,445,6_000692,0.556402,0.996965,0.046079,0.492929,0.108037,0.574474,0.086173,0.564024
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,base,337,1_000992,0.775915,8.23769,0.819861,6.872304,5.067599,7.314838,2.342862,0.521341
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,cnp,337,9_000526,0.666921,5.880765,0.370604,4.622895,2.440187,4.964595,1.187216,0.575457
russell_2000,2007_2018_ddir_h_pba_h_h_vol_h,lnp,338,8_000065,0.709604,5.532711,0.442792,4.919991,2.853016,5.187528,1.374717,0.479421


In [12]:
datas = data_names[:2]
year = datas[0][:4]
split = 'test'
benchyear = bench[split].xs(f'{year}_2018_ddir', level=1)
key = f'{split}_kelly_sharpe'
sel = get_sel(split)
display(year)

for asset_name in ASSETS:
    display(asset_name)
    b = benchyear.loc[asset_name]
    for data_name in datas:
        best = get_best(final[split].xs((asset_name, data_name)), key, sel)
        best = bench_diff(best, b, split)
        display(add_style(best))

'2007'

'sp_500'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,471,7_000414,-0.00625,0.574439,-0.006422,0.399948,0.047104,0.459879,0.020853,0.614062
cnp,476,8_000655,-0.009375,-0.563546,-0.044003,-0.285453,-0.032772,-0.299779,-0.036982,0.946875
lnp,476,5_000111,-0.0125,0.458327,-0.038517,0.286864,0.033797,0.299328,-0.001494,0.315625
np,476,4_000507,-0.00625,0.344562,-0.040595,0.785583,0.095081,0.780038,0.026673,0.49375


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,376,9_000190,0.014063,0.967469,-0.006164,1.481762,0.185626,1.422015,0.086796,0.501563
cnp,482,7_000769,0.023438,0.535037,-0.039824,0.747123,0.089656,0.744125,0.024509,0.901563
lnp,376,1_000941,0.0,0.486154,-0.037138,0.470322,0.055968,0.475878,0.009856,0.63125
np,376,1_000941,0.001563,0.372827,-0.036173,0.081606,0.009445,0.105176,-0.012231,0.989062


'russell_2000'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,442,9_000316,-0.015625,-0.498159,-0.087222,-0.042727,-0.006396,-0.084546,-0.045425,0.49375
cnp,442,9_000316,-0.003125,-0.068491,-0.086701,0.086309,0.013867,0.085748,-0.035073,0.55
lnp,442,7_000538,-0.007812,0.042886,-0.081901,-0.181446,-0.02776,-0.158758,-0.052854,0.407813
np,445,6_000692,-0.0125,-0.192574,-0.082244,0.047178,0.007695,0.033785,-0.036007,0.515625


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,337,1_000992,-0.007812,0.345185,-0.05371,0.438717,0.070989,0.436648,0.008591,0.489063
cnp,337,6_000927,0.0125,0.21289,-0.065089,0.368455,0.059392,0.362636,-0.002681,0.503125
lnp,338,8_000065,-0.010938,0.785355,-0.04291,0.414668,0.066929,0.509608,0.01254,0.48125
np,337,5_000366,-0.017188,0.112398,-0.077022,-0.147145,-0.022519,-0.10992,-0.047912,0.548438


'nasdaq_100'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,455,7_000202,-0.057813,0.468121,-0.027874,0.564195,0.076329,0.552255,0.023969,0.604688
cnp,452,1_000177,-0.05625,0.461966,-0.045577,0.403631,0.054067,0.43605,0.004482,0.646875
lnp,452,2_000087,-0.060938,0.4429,-0.049578,0.593582,0.080461,0.604767,0.01501,0.445312
np,455,6_000127,-0.064063,0.120222,-0.048501,0.032631,0.004116,0.065227,-0.021347,0.520312


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,420,7_000963,-0.019819,0.390362,-0.028991,0.102187,0.014158,0.187297,-0.006665,0.633224
cnp,425,2_000000,-0.041201,0.524099,-0.048354,0.267826,0.036659,0.32283,-0.005201,0.536184
lnp,420,9_000808,-0.026398,1.155357,0.002272,0.836107,0.11692,0.944127,0.059248,0.643092
np,423,5_000906,-0.064227,0.495212,-0.03025,0.03653,0.005417,0.157227,-0.011479,0.523026


'dow_jones'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,342,5_000515,-0.05,0.261887,-0.053124,-0.230282,-0.027776,-0.152624,-0.039304,0.4375
cnp,342,0_000644,-0.001562,-0.498248,-0.068676,-0.0844,-0.010559,-0.100736,-0.03854,0.320312
lnp,342,5_000515,-0.025,0.468608,-0.045994,-0.194472,-0.023573,-0.077419,-0.033667,0.56875
np,342,1_000675,-0.028125,0.074449,-0.066425,-0.697997,-0.081227,-0.668784,-0.072375,0.69375


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,445,3_000944,-0.005099,-0.074663,-0.057496,0.124772,0.016092,0.101248,-0.02016,0.702303
cnp,447,0_000457,-0.049507,-0.618103,-0.069135,-0.306895,-0.036216,-0.355909,-0.051683,0.555921
lnp,445,8_000737,-0.016612,0.039571,-0.043635,0.309316,0.039219,0.256711,-0.002139,0.585526
np,447,2_000010,-0.001809,-0.067603,-0.042682,0.025606,0.003867,0.001117,-0.019047,0.751645


In [11]:
sm_name = 'anp'
model_name = 'np'
data_names = ['1996_2018_ddir_h_pba_h']
#test_dir = EXP_LOG_DIR +sep.join(['final_3', sm_name]) +sep
test_dir = EXP_LOG_DIR +sep.join(['final', sm_name]) +sep

bench = {split: get_bench(f"{EXP_LOG_DIR}/bench/**/{split}.json") for split in ['train', 'val', 'test']}
final = {split: get_final(split, data_names) for split in ['train', 'val', 'test']}

In [12]:
datas = data_names[:2]
year = datas[0][:4]
split = 'test'
benchyear = bench[split].xs(f'{year}_2018_ddir', level=1)
key = f'{split}_kelly_sharpe'
sel = get_sel(split)
display(year)

for asset_name in ASSETS:
    display(asset_name)
    b = benchyear.loc[asset_name]
    for data_name in datas:
        best = get_best(final[split].xs((asset_name, data_name)), key, sel)
        best = bench_diff(best, b, split)
        display(add_style(best))

'1996'

'sp_500'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,471,1_000110,-0.03125,-0.229629,-0.074218,-0.359223,-0.042299,-0.332089,-0.05723,0.488567
cnp,476,5_000111,-0.023628,-0.22227,-0.078557,0.1275,0.015226,0.103712,-0.031371,0.410061
lnp,471,0_000474,-0.012195,-0.096233,-0.067988,0.232577,0.027812,0.198589,-0.020058,0.51372
np,471,3_000608,0.0,0.042844,-0.062361,-0.020728,-0.002718,-0.001753,-0.032004,1.0


'russell_2000'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,440,4_001000,-0.015244,0.080177,-0.059447,0.187021,0.029069,0.184899,-0.014299,0.510671
cnp,445,2_000891,0.000762,0.16708,-0.078429,0.045371,0.007006,0.048459,-0.034047,0.999238
lnp,442,4_000141,-0.016768,0.177243,-0.043422,0.066978,0.010355,0.104452,-0.015556,0.542683
np,442,6_000206,0.001524,0.334315,-0.077184,0.567704,0.090366,0.56658,0.005978,0.803354


'nasdaq_100'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,452,5_000035,-0.075457,0.251312,-0.059927,-0.110365,-0.013828,-0.079043,-0.035515,0.283537
cnp,452,0_000940,-0.03125,0.636432,-0.038644,0.7223,0.094927,0.736676,0.027464,0.515244
lnp,452,1_000177,-0.043445,0.544501,-0.045251,0.644556,0.084362,0.653385,0.019053,0.445122
np,452,2_000087,-0.044969,0.321201,-0.047915,-0.06451,-0.008099,0.005093,-0.026854,0.66311


'dow_jones'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,342,3_000923,-0.012957,-0.272332,-0.092107,0.044254,0.005337,0.034774,-0.043004,0.685976
cnp,342,1_000675,0.003049,0.131671,-0.08436,0.054561,0.006601,0.069048,-0.038483,0.996189
lnp,342,3_000923,-0.034299,-0.32546,-0.0932,-0.435487,-0.052121,-0.428949,-0.071362,0.440549
np,342,1_000675,0.004573,0.064093,-0.084148,0.078043,0.00949,0.084999,-0.037011,0.983994


In [15]:
sm_name = 'anp'
model_name = 'np'
data_names = ['2007_2018_ddir_h_pba_h', '2007_2018_ddir_h_pba_h_h_vol_h']
#test_dir = EXP_LOG_DIR +sep.join(['final_3', sm_name]) +sep
test_dir = EXP_LOG_DIR +sep.join(['final_5', sm_name]) +sep

bench = {split: get_bench(f"{EXP_LOG_DIR}/bench/**/{split}.json") for split in ['train', 'val', 'test']}
final = {split: get_final(split, data_names) for split in ['train', 'val', 'test']}

In [16]:
datas = data_names[:2]
year = datas[0][:4]
split = 'test'
benchyear = bench[split].xs(f'{year}_2018_ddir', level=1)
key = f'{split}_kelly_sharpe'
sel = get_sel(split)
display(year)

for asset_name in ASSETS:
    display(asset_name)
    b = benchyear.loc[asset_name]
    for data_name in datas:
        best = get_best(final[split].xs((asset_name, data_name)), key, sel)
        best = bench_diff(best, b, split)
        display(add_style(best))

'2007'

'sp_500'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,471,5_000159,-0.025,0.853245,-0.022469,-0.586771,-0.066063,-0.405173,-0.043288,0.504687
cnp,476,4_000507,0.007812,0.673701,-0.037062,0.790722,0.095725,0.796977,0.028813,0.607812
lnp,476,2_000979,-0.029687,0.479624,-0.035903,0.290111,0.034186,0.310888,-1e-05,0.523438
np,471,3_000608,0.015625,0.653798,-0.038259,-0.011856,-0.001596,0.01691,-0.018608,0.807813


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,376,0_000874,-0.035937,0.275717,-0.028621,0.486679,0.057995,0.470291,0.01497,0.364063
cnp,376,7_000698,-0.004687,0.555841,-0.02936,0.937759,0.114358,0.910871,0.041459,0.670312
lnp,376,5_000371,-0.028125,0.764527,-0.014469,0.274687,0.032333,0.37541,0.009768,0.521875
np,376,5_000371,-0.009375,0.245491,-0.03806,-0.183263,-0.021188,-0.154924,-0.028263,0.946875


'russell_2000'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,442,9_000316,-0.007812,0.001723,-0.081978,-0.419743,-0.063546,-0.381838,-0.070496,0.532813
cnp,445,4_000516,0.0,0.113382,-0.081948,-0.045115,-0.006769,-0.033639,-0.042704,1.0
lnp,445,2_000891,-0.010938,0.529307,-0.086887,-0.145116,-0.022209,-0.130153,-0.052454,0.445312
np,445,4_000516,-0.014063,0.384484,-0.0865,-0.260679,-0.039785,-0.243106,-0.060854,0.560937


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,337,5_000366,-0.0125,0.7496,-0.055116,0.148861,0.023805,0.25327,-0.014331,0.4875
cnp,337,3_000192,-0.003125,0.359319,-0.064443,0.173847,0.027811,0.21533,-0.017303,0.571875
lnp,338,2_000399,-0.00625,0.409172,-0.071638,-0.164633,-0.025229,-0.095678,-0.046511,0.629687
np,338,1_000951,0.001562,0.585577,-0.038076,0.174654,0.027884,0.28455,-0.003995,0.50625


'nasdaq_100'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,452,9_000841,-0.035938,0.432562,-0.049911,0.515291,0.069488,0.53155,0.009646,0.620313
cnp,452,3_000275,-0.05625,0.433504,-0.042715,-0.244278,-0.031795,-0.099949,-0.035951,0.61875
lnp,452,3_000275,-0.03125,0.59613,-0.040621,0.632804,0.08595,0.647192,0.022191,0.59375
np,455,2_000571,-0.046875,0.485509,-0.028005,0.26533,0.035213,0.333195,0.004144,0.696875


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,420,3_000773,-0.031332,1.088857,-0.016745,1.253292,0.17907,1.246663,0.078294,0.536184
cnp,423,2_000084,-0.019819,1.108334,0.001523,1.18078,0.168076,1.185729,0.082859,0.620066
lnp,423,7_000784,-0.024753,1.309258,-0.030586,0.950542,0.133718,1.018394,0.050069,0.424342
np,423,2_000084,-0.026398,0.610236,-0.032753,0.213399,0.029238,0.321927,-0.000915,0.564145


'dow_jones'

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,342,5_000515,-0.051562,-0.038238,-0.06124,-0.15016,-0.018347,-0.128949,-0.038696,0.420312
cnp,342,6_000561,0.01875,0.95698,-0.052071,0.416933,0.050669,0.474969,-0.000518,0.959375
lnp,342,1_000675,-0.051562,0.218615,-0.063417,-1.134192,-0.128569,-1.058326,-0.095139,0.707812
np,342,3_000923,0.00625,0.15045,-0.063545,0.053638,0.005978,0.0601,-0.027929,1.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,test_clf_accuracy,test_kelly_sharpe,test_kelly_cagr,test_binary_sharpe,test_binary_cagr,test_conf_sharpe,test_conf_cagr,test_binary_longfreq
model,study,trial,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
base,445,0_000339,-0.014967,0.03205,-0.045309,0.091467,0.011973,0.089346,-0.016153,0.672697
cnp,447,5_000943,-0.006743,-0.000638,-0.044008,0.373582,0.04737,0.289812,0.001534,0.476974
lnp,447,5_000943,-0.008388,0.65863,-0.027637,0.268346,0.034041,0.381623,0.003829,0.672697
np,445,9_000643,0.00477,0.532195,-0.061643,-0.081369,-0.009199,-0.041126,-0.034214,0.912829


In [25]:
datas = data_names[2:]
year = datas[0][:4]
split = 'test'
benchyear = bench[split].xs(f'{year}_2018_ddir', level=1)
key = f'{split}_kelly_sharpe'
sel = get_sel(split)
display(year)

for asset_name in ASSETS:
    display(asset_name)
    b = benchyear.loc[asset_name]
    for data_name in datas:
        best = get_best(final[split].xs((asset_name, data_name)), key, sel)
        best = bench_diff(best, b, split)
        display(add_style(best))

IndexError: list index out of range