# nb-model_tcn

In [2]:
import sys
import os
from os import sep
from os.path import dirname, realpath, exists
from pathlib import Path
from functools import partial
from itertools import permutations
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'nb-model_tcn-1.ipynb'
dir_name = 'model'
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import pytorch_lightning as pl
from verification.batch_norm import BatchNormVerificationCallback
from verification.batch_gradient import BatchGradientVerificationCallback
# from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# from pytorch_lightning import loggers as pl_loggers

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 50)

from common_util import MODEL_DIR, NestedDefaultDict, str_now, is_valid, isnt, makedir_if_not_exists, load_df, load_json, dump_json, rectify_json
from model.common import ASSETS, INTERVAL_YEARS, OPTUNA_DB_FNAME, OPTUNA_N_TRIALS, OPTUNA_TIMEOUT_HOURS, INTRADAY_LEN
from model.common import PYTORCH_ACT1D_LIST, PYTORCH_INIT_LIST
from model.xg_util import get_xg_feature_dfs, get_xg_label_target_dfs, get_hardcoded_feature_dfs, get_hardcoded_label_target_dfs
from model.pl_xgdm import XGDataModule
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)


ImportError: attempted relative import with no known parent package

## run trials

In [2]:
def run_trial(asset_name, fdata_name, ldata_name):
    max_epochs = None
    min_epochs = 20
    model_name = 'stcn'
    monitor = 'val_accuracy'
    num_classes = 2

    optimize_dir = {
        'val_loss': 'minimize'
    }.get(monitor, 'maximize')

    # model options: stcn, anp
    if (model_name in ('stcn', 'StackedTCN', 'GenericModel_StackedTCN')):
        from model.pl_generic import GenericModel
        from model.model_util import StackedTCN
        pl_model_fn, pt_model_fn = GenericModel, StackedTCN
    elif (model_name in ('anp', 'AttentiveNP', 'NPModel_AttentiveNP')):
        from model.pl_np import NPModel
        from model.np_util import AttentiveNP
        pl_model_fn, pt_model_fn = NPModel, AttentiveNP
    elif (model_name in ('ffn', 'FFN', 'GenericModel_FNN')):
        from model.pl_generic import GenericModel
        from model.model_util import FFN
        pl_model_fn, pt_model_fn = GenericModel, FFN
    model_name = f'{pl_model_fn.__name__}_{pt_model_fn.__name__}'

    m_params = pt_model_fn.suggest_params(trial=None, num_classes=num_classes, add_ob=True)
    t_params = pl_model_fn.suggest_params(trial=None, num_classes=num_classes)
    logging.getLogger("lightning").setLevel(logging.ERROR)
    print('cuda status: {}'.format('âœ“' if (torch.cuda.is_available()) else 'ðŸž©'))

    m_params['size'] = 1
    m_params['depth'] = 1
    m_params['kernel_sizes'] = 8
    m_params['input_dropout'] = 0
    m_params['output_dropout'] = 0
    m_params['global_dropout'] = 0
    m_params['block_act'] ='relu'
    m_params['block_init'] = 'kaiming_uniform'
    m_params['out_act'] = 'relu'
    m_params['out_init'] = 'kaiming_uniform'
    m_params['ob_act'] = 'relu'
    m_params['ob_init'] = 'kaiming_uniform'
    m_params['ob_out_shapes'] = [2]
    m_params['pad_mode'] = 'full'

    class_weights = torch.zeros(num_classes, dtype=torch.float32, device='cpu', requires_grad=False)
    class_weights[0] = .52
    class_weights[1] = 1-class_weights[0]
    opt = {
        'name': 'adam',
        'kwargs': {
            'lr': 1e-3,
            'betas': (0.9, 0.999),
            'weight_decay': 0,
            'amsgrad': False
        }
    }
    t_params['loss'] = 'ce'
    t_params['class_weights'] = None #class_weights
    t_params['train_shuffle'] = False
    t_params['batch_size'] = 64
    t_params['epochs'] = 100
    t_params['opt'] = opt
    t_params['window_size'] = 1

    dm = XGDataModule(t_params, asset_name, fdata_name, ldata_name, fret=None, overwrite_cache=False)
    dm.prepare_data()
    dm.setup()
    trial_time = str_now().replace(' ', '_').replace(':', '-')
    study_dir = MODEL_DIR +sep.join(['log', model_name, asset_name, dm.name]) +sep
    makedir_if_not_exists(study_dir)
    trial_dir = f'{study_dir}{trial_time}{sep}'
    bench_fname = 'benchmark.json'
    if (not exists('{study_dir}{bench_fname}')):
        bench = dm.get_benchmarks()
        dump_json(bench, bench_fname, study_dir)
    print('trial dir:', trial_dir)
    csv_log = pl.loggers.csv_logs.CSVLogger(trial_dir, name='', version='')
    tb_log = pl.loggers.tensorboard.TensorBoardLogger(trial_dir, name='', version='', log_graph=False)
    chk_callback = pl.callbacks.ModelCheckpoint(f'{trial_dir}chk{sep}', monitor=monitor, mode=optimize_dir[:3])
    ver_callbacks = (BatchNormVerificationCallback(), BatchGradientVerificationCallback())
    mdl = pl_model_fn(pt_model_fn, m_params, t_params, dm.fobs)

    makedir_if_not_exists(trial_dir)
    dump_json(rectify_json(m_params), 'params_m.json', trial_dir)
    dump_json(rectify_json(t_params), 'params_t.json', trial_dir)

    trainer = pl.Trainer(max_epochs=max_epochs or t_params['epochs'],
            min_epochs=min_epochs, logger=[csv_log, tb_log],
            callbacks=[chk_callback, *ver_callbacks],
            limit_val_batches=1.0, gradient_clip_val=0., #track_grad_norm=2,
            auto_lr_find=False, amp_level='O1', precision=16,
            default_root_dir=trial_dir, weights_summary=None,
            #overfit_batches=1,
            gpus=-1 if (torch.cuda.is_available()) else None)
    trainer.fit(mdl, datamodule=dm)

In [3]:
# perms = list('hpomzug')
# n = 10

# for asset_name in ASSETS:
#     for src in ('pba', 'vol', 'buzz'):
#         for c in perms:
#             for i in range(n):
#                 run_trial(asset_name, f'h_{src}_{c}', 'ddir')

In [None]:
srcs = ('pba', 'vol', 'buzz')
perms = [''.join(c) for c in permutations('moz')]
n = 10

for asset_name in ASSETS:
    print(asset_name)
    for src in srcs:
        print(src)
        for c in perms:
            print(c)
            for i in range(n):
                run_trial(asset_name, f'h_{src}_{c}', 'ddir')