# nb-model_tcn-4

In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath, exists
from pathlib import Path
from functools import partial
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'nb-model_tcn.ipynb'
dir_name = 'model'
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import pytorch_lightning as pl
import optuna
from verification.batch_norm import BatchNormVerificationCallback
from verification.batch_gradient import BatchGradientVerificationCallback
# from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# from pytorch_lightning import loggers as pl_loggers

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 50)

from common_util import MODEL_DIR, str_now, is_valid, isnt, makedir_if_not_exists, load_df, load_json, dump_json, rectify_json
from model.common import ASSETS, INTERVAL_YEARS, OPTUNA_DB_FNAME, OPTUNA_N_TRIALS, OPTUNA_TIMEOUT_HOURS, INTRADAY_LEN
from model.common import PYTORCH_ACT1D_LIST, PYTORCH_INIT_LIST
from model.xg_util import get_xg_feature_dfs, get_xg_label_target_dfs, get_hardcoded_feature_dfs, get_hardcoded_label_target_dfs
from model.oview import df_study_stats
from recon.viz import *
# from model.pl_np import ANP
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

# %autoreload 1
# %aimport model.np_util, model.pl_np, model.pl_generic, model.metrics_util, model.model_util

CRITICAL:root:script location: /home/kev/crunch/model/nb-model_tcn.ipynb
CRITICAL:root:using project dir: /home/kev/crunch/


## load optuna trials

In [2]:
studies = {}
for asset_name in ASSETS:
    model_name = 'stcn'
    asset_name = asset_name
    fdata_name = 'h_pba_mzo,h_vol_mzo'
    ldata_name = 'ddir'
    monitor = 'val_accuracy'
    optimize_min = {
        'val_loss': True
    }.get(monitor, False)
    interval = INTERVAL_YEARS
    data_name = (f'{interval[0]}_{interval[1]}'
        f'_{ldata_name}_{fdata_name}').replace(',', '_')

    # model options: stcn, anp
    if (model_name in ('stcn', 'StackedTCN', 'GenericModel_StackedTCN')):
        from model.pl_generic import GenericModel
        from model.model_util import StackedTCN
        pl_model_fn, pt_model_fn = GenericModel, StackedTCN
    elif (model_name in ('anp', 'AttentiveNP', 'NPModel_AttentiveNP')):
        from model.pl_np import NPModel
        from model.np_util import AttentiveNP
        pl_model_fn, pt_model_fn = NPModel, AttentiveNP
    model_name = f'{pl_model_fn.__name__}_{pt_model_fn.__name__}'

    study_dir = MODEL_DIR \
        +sep.join(['olog', model_name, asset_name, data_name, monitor]) +sep
    study_name = ','.join([model_name, asset_name, data_name, monitor])
    study_db_path = f'sqlite:///{study_dir}{OPTUNA_DB_FNAME}'

    print(f'study name:  {study_name}')
    print(f'study dir:   {study_dir}')
    print(f'study db:    {study_db_path}')
    print()

    studies[asset_name] = optuna.load_study(storage=study_db_path, study_name=study_name) \
        .trials_dataframe().sort_values(by='value')

study name:  GenericModel_StackedTCN,sp_500,2007_2018_ddir_h_pba_mzo_h_vol_mzo,val_accuracy
study dir:   /home/kev/crunch/model/olog/GenericModel_StackedTCN/sp_500/2007_2018_ddir_h_pba_mzo_h_vol_mzo/val_accuracy/
study db:    sqlite:////home/kev/crunch/model/olog/GenericModel_StackedTCN/sp_500/2007_2018_ddir_h_pba_mzo_h_vol_mzo/val_accuracy/trials.db

study name:  GenericModel_StackedTCN,russell_2000,2007_2018_ddir_h_pba_mzo_h_vol_mzo,val_accuracy
study dir:   /home/kev/crunch/model/olog/GenericModel_StackedTCN/russell_2000/2007_2018_ddir_h_pba_mzo_h_vol_mzo/val_accuracy/
study db:    sqlite:////home/kev/crunch/model/olog/GenericModel_StackedTCN/russell_2000/2007_2018_ddir_h_pba_mzo_h_vol_mzo/val_accuracy/trials.db

study name:  GenericModel_StackedTCN,nasdaq_100,2007_2018_ddir_h_pba_mzo_h_vol_mzo,val_accuracy
study dir:   /home/kev/crunch/model/olog/GenericModel_StackedTCN/nasdaq_100/2007_2018_ddir_h_pba_mzo_h_vol_mzo/val_accuracy/
study db:    sqlite:////home/kev/crunch/model/olog/Ge

## analyze trials

In [3]:
def f(study_df):
    pass

In [4]:
#stats = df_study_stats(studies['sp_500'], optimize_min)

In [5]:
print(ASSETS)

('sp_500', 'russell_2000', 'nasdaq_100', 'dow_jones')


In [5]:
study_df = studies[ASSETS[0]]
n = 10

In [6]:
for asset_name, study_df in studies.items():
    completed_trials = study_df.loc[study_df['state'] == 'COMPLETE']
    pruned_trials = study_df.loc[study_df['state'] == 'PRUNED']
    if (optimize_min):
        topn = completed_trials.nsmallest(n, 'value', keep='all')
        #botn = completed_trials.nlargest(n, 'value', keep='all')
    else:
        topn = completed_trials.nlargest(n, 'value', keep='all')
        #botn = completed_trials.nsmallest(n, 'value', keep='all')
    
    print(asset_name)
    display(topn)

sp_500


Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_class_weights[0],params_depth,params_epochs,params_global_dropout,params_input_dropout,params_kernel_sizes,params_lr,params_output_dropout,params_size,state
61,61,0.552632,2020-12-31 06:12:19.853341,2020-12-31 06:14:37.946183,0 days 00:02:18.092842,512,0.54,4,400,0.04,0.01,15,1e-06,0.02,31,COMPLETE
385,385,0.552632,2021-01-01 18:29:14.443177,2021-01-01 18:30:00.759563,0 days 00:00:46.316386,256,0.54,2,200,0.35,0.1,47,1e-06,0.45,11,COMPLETE
492,492,0.550607,2021-01-01 20:42:18.572605,2021-01-01 20:43:44.527055,0 days 00:01:25.954450,128,0.51,2,400,0.31,0.28,9,3e-06,0.03,6,COMPLETE
1376,1376,0.548583,2021-01-09 16:13:11.002207,2021-01-09 16:19:15.643853,0 days 00:06:04.641646,384,0.58,4,600,0.37,0.62,39,2e-06,0.98,25,COMPLETE
355,355,0.546559,2021-01-01 17:49:14.774573,2021-01-01 17:49:53.619662,0 days 00:00:38.845089,512,0.4,2,200,0.93,0.66,39,5e-06,0.22,6,COMPLETE
732,732,0.544534,2021-01-04 07:55:30.745398,2021-01-04 07:56:14.824801,0 days 00:00:44.079403,128,0.44,3,200,0.35,0.01,15,2e-06,0.3,8,COMPLETE
1305,1305,0.544534,2021-01-09 14:16:14.649808,2021-01-09 14:17:09.880983,0 days 00:00:55.231175,128,0.47,3,200,0.02,0.59,31,7e-06,0.76,19,COMPLETE
1341,1341,0.544534,2021-01-09 15:13:57.976231,2021-01-09 15:17:29.934257,0 days 00:03:31.958026,256,0.6,3,600,0.41,0.43,39,2e-06,0.82,30,COMPLETE
881,881,0.54251,2021-01-05 15:40:46.318609,2021-01-05 15:41:30.477928,0 days 00:00:44.159319,256,0.58,2,200,0.11,0.3,41,1.1e-05,0.8,11,COMPLETE
166,166,0.54251,2020-12-31 07:59:04.106851,2020-12-31 08:01:03.869250,0 days 00:01:59.762399,384,0.42,3,500,0.53,0.7,23,7.6e-05,0.77,4,COMPLETE


russell_2000


Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_class_weights[0],params_depth,params_epochs,params_global_dropout,params_input_dropout,params_kernel_sizes,params_lr,params_output_dropout,params_size,state
1010,1010,0.561866,2021-01-08 09:57:56.236964,2021-01-08 09:59:53.092934,0 days 00:01:56.855970,256,0.54,5,400,0.08,0.0,7,0.00015,0.15,6,COMPLETE
1177,1177,0.561866,2021-01-08 13:45:13.431492,2021-01-08 13:46:22.636905,0 days 00:01:09.205413,512,0.5,3,200,0.23,1.0,39,1e-06,0.74,26,COMPLETE
1420,1420,0.557809,2021-01-10 01:08:47.940836,2021-01-10 01:09:59.499286,0 days 00:01:11.558450,256,0.4,5,300,0.49,0.32,25,0.001115,0.42,2,COMPLETE
1042,1042,0.555781,2021-01-08 10:31:55.466394,2021-01-08 10:33:26.214691,0 days 00:01:30.748297,512,0.53,3,300,0.05,0.21,41,5e-06,0.76,22,COMPLETE
1007,1007,0.549696,2021-01-08 09:55:46.901070,2021-01-08 09:57:06.010054,0 days 00:01:19.108984,256,0.48,5,300,0.27,0.0,7,0.000295,0.22,2,COMPLETE
1410,1410,0.547667,2021-01-10 00:50:04.548193,2021-01-10 00:51:17.823651,0 days 00:01:13.275458,512,0.52,4,300,0.3,0.87,7,8e-06,0.74,30,COMPLETE
281,281,0.547667,2021-01-03 04:41:40.515240,2021-01-03 04:43:49.178506,0 days 00:02:08.663266,512,0.46,2,600,0.03,0.57,33,0.00086,0.69,2,COMPLETE
364,364,0.545639,2021-01-03 15:48:04.774826,2021-01-03 15:49:59.176321,0 days 00:01:54.401495,128,0.44,2,500,0.2,0.61,49,0.001641,0.07,4,COMPLETE
751,751,0.545639,2021-01-05 19:39:31.281760,2021-01-05 19:41:55.638144,0 days 00:02:24.356384,128,0.43,4,500,0.75,0.01,9,0.000162,0.26,31,COMPLETE
783,783,0.543611,2021-01-05 21:37:50.735784,2021-01-05 21:40:20.658861,0 days 00:02:29.923077,384,0.49,2,600,0.29,0.52,47,5e-06,0.29,6,COMPLETE


nasdaq_100


Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_class_weights[0],params_depth,params_epochs,params_global_dropout,params_input_dropout,params_kernel_sizes,params_lr,params_output_dropout,params_size,state
278,278,0.577236,2021-01-02 04:32:26.218989,2021-01-02 04:34:09.025161,0 days 00:01:42.806172,512,0.44,4,400,0.46,0.21,17,0.000108,0.82,5,COMPLETE
1457,1457,0.573171,2021-01-10 12:20:26.837070,2021-01-10 12:23:27.616805,0 days 00:03:00.779735,256,0.5,4,200,0.47,0.63,47,6.7e-05,0.69,19,COMPLETE
1264,1264,0.571138,2021-01-10 03:59:41.920958,2021-01-10 04:01:08.860723,0 days 00:01:26.939765,128,0.42,3,400,0.68,0.07,23,0.000211,0.96,3,COMPLETE
964,964,0.569106,2021-01-06 10:32:27.734282,2021-01-06 10:35:30.360117,0 days 00:03:02.625835,512,0.44,3,600,0.27,0.51,33,1.4e-05,0.69,16,COMPLETE
1263,1263,0.569106,2021-01-10 03:39:58.589113,2021-01-10 03:59:41.888934,0 days 00:19:43.299821,512,0.42,5,600,0.16,0.36,47,5e-06,0.79,17,COMPLETE
1241,1241,0.569106,2021-01-08 22:25:42.809816,2021-01-08 22:28:04.122696,0 days 00:02:21.312880,512,0.49,2,600,0.03,0.26,33,6e-06,0.66,14,COMPLETE
1371,1371,0.569106,2021-01-10 09:02:51.612983,2021-01-10 09:04:27.291415,0 days 00:01:35.678432,256,0.45,3,400,1.0,0.14,23,9.7e-05,0.92,1,COMPLETE
1199,1199,0.569106,2021-01-08 21:18:47.531716,2021-01-08 21:21:08.865508,0 days 00:02:21.333792,384,0.46,5,500,0.92,0.54,7,0.000201,0.79,3,COMPLETE
813,813,0.567073,2021-01-06 06:33:52.138125,2021-01-06 06:35:47.166104,0 days 00:01:55.027979,256,0.44,3,500,0.61,0.96,9,0.000119,0.87,6,COMPLETE
430,430,0.567073,2021-01-02 07:50:45.307131,2021-01-02 07:51:55.376076,0 days 00:01:10.068945,128,0.4,3,300,0.69,0.08,23,4.7e-05,1.0,13,COMPLETE


dow_jones


Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_class_weights[0],params_depth,params_epochs,params_global_dropout,params_input_dropout,params_kernel_sizes,params_lr,params_output_dropout,params_size,state
496,496,0.558045,2021-01-04 03:04:09.728299,2021-01-04 03:06:21.669779,0 days 00:02:11.941480,512,0.5,4,400,0.13,0.77,15,2.3e-05,0.64,27,COMPLETE
428,428,0.556008,2021-01-03 22:48:13.734913,2021-01-03 22:50:45.292490,0 days 00:02:31.557577,512,0.45,3,600,0.0,0.33,15,0.000426,0.18,15,COMPLETE
397,397,0.549898,2021-01-03 15:08:17.587929,2021-01-03 15:11:08.687863,0 days 00:02:51.099934,128,0.56,4,600,0.15,0.17,7,0.000154,0.82,10,COMPLETE
658,658,0.549898,2021-01-05 10:52:45.769397,2021-01-05 10:55:18.440244,0 days 00:02:32.670847,256,0.43,5,400,0.07,0.28,41,0.000148,0.24,4,COMPLETE
1441,1441,0.549898,2021-01-10 21:22:49.704712,2021-01-10 21:24:56.394988,0 days 00:02:06.690276,384,0.52,3,400,0.76,0.38,33,2e-06,0.33,15,COMPLETE
1402,1402,0.547862,2021-01-10 19:59:24.363512,2021-01-10 20:02:15.387254,0 days 00:02:51.023742,256,0.52,5,500,0.04,0.81,25,0.000232,0.4,5,COMPLETE
1186,1186,0.543788,2021-01-09 05:40:24.190958,2021-01-09 05:43:27.314136,0 days 00:03:03.123178,384,0.5,4,300,0.49,0.33,33,2e-06,0.3,27,COMPLETE
1360,1360,0.543788,2021-01-10 17:29:19.807248,2021-01-10 17:34:29.041737,0 days 00:05:09.234489,384,0.55,4,500,0.48,0.93,49,1e-06,0.65,16,COMPLETE
743,743,0.543788,2021-01-06 12:09:17.821085,2021-01-06 12:11:24.808526,0 days 00:02:06.987441,512,0.52,5,400,0.07,0.78,15,3e-06,0.68,30,COMPLETE
337,337,0.543788,2021-01-03 12:28:46.683391,2021-01-03 12:31:25.285477,0 days 00:02:38.602086,512,0.45,4,400,0.56,0.87,17,1.4e-05,0.85,31,COMPLETE
