# Model Debug

In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath
from pathlib import Path
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname
    
def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'model_debug.ipynb'   # FILL
dir_name = 'model'   # FILL
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dask import delayed, compute

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option('display.max_columns', 50)

from common_util import RECON_DIR, JSON_SFX_LEN, DT_CAL_DAILY_FREQ, set_loglevel, chained_filter, get_variants, dump_df, load_json, gb_transpose, reindex_on_time_mask, pd_common_index_rows, filter_cols_below, inner_join, outer_join, ser_shift, list_get_dict, window_iter, benchmark
from model.common import DATASET_DIR, EXPECTED_NUM_HOURS, default_dataset
from model.data_util import datagen, prepare_transpose_data, prepare_label_data
from data.data_api import DataAPI
from data.access_util import df_getters as dg, col_subsetters2 as cs2
from recon.dataset_util import prep_dataset, gen_group
from recon.split_util import get_train_test_split, gen_time_series_split

Using TensorFlow backend.


In [2]:
set_loglevel('info')

In [3]:
dataset_name = default_dataset
assets_str = 'sp_500'
assets = list(map(str.strip, assets_str.split(',')))

dataset_dict = load_json(dataset_name, dir_path=DATASET_DIR)
dataset = prep_dataset(dataset_dict, assets=assets)

logging.info('dataset: {} {} df(s)'.format(len(dataset['features']), dataset_name))
logging.info('assets: {}'.format(str('all' if (assets==None) else ', '.join(assets))))
logging.debug('fpaths: {}'.format(str(list(dataset['features']['dfs'].keys()))))
logging.debug('lpaths: {}'.format(str(list(dataset['labels']['dfs'].keys()))))

INFO:root:dataset: 2 mvp_dnorm_raw_pba_avgprice.json df(s)
INFO:root:assets: sp_500


In [7]:
for i, (fpath, lpath, frec, lrec, fcol, lcol, feature, label) in enumerate(datagen(dataset, feat_prep_fn=prepare_transpose_data, label_prep_fn=prepare_label_data, how='ser_to_ser')):
    logging.info('(X, y) -> ({fdesc}, {ldesc})'.format(fdesc=frec.desc, ldesc=lrec.desc))
    logging.info('(X, y) -> ({fcol}, {lcol})'.format(fcol=fcol, lcol=lcol))
#     print(feature)
#     print(label)

INFO:root:(X, y) -> (raw_pba_dmx, raw_pba_oa_retxeod_direod)
INFO:root:(X, y) -> (pba_avgPrice, pba_oa)


index             8         9         10        11        12        13  \
1998-01-02  0.253259 -0.797306 -0.426221 -0.204441 -0.227237 -1.000000   
1998-01-05 -0.064573  0.503539  0.916682  1.000000  0.103837 -1.000000   
1998-01-06  1.000000  0.398773 -0.097617 -0.708027 -0.175780 -0.120505   
1998-01-07  1.000000  0.330649 -0.246504 -0.564115 -1.000000 -1.000000   
1998-01-08  1.000000 -0.160591  0.825262  0.142098 -0.149437  0.512964   
1998-01-09  1.000000  0.687234  0.471789  0.172331  0.221367 -0.283046   
1998-01-12 -1.000000 -0.378521  0.190147  0.250093  0.568414  0.148465   
1998-01-13 -0.920261 -0.876623 -1.000000 -0.420022 -0.586936  0.000731   
1998-01-14 -0.276345 -0.644027 -0.953910 -1.000000 -0.492781 -0.224276   
1998-01-15  0.651377  0.334816  1.000000 -0.051536 -0.092874 -0.040457   
1998-01-16 -1.000000 -0.677311  0.310660  0.758621  1.000000  0.686537   
1998-01-20 -1.000000 -0.783760 -0.503782 -0.415803 -0.456941  0.024162   
1998-01-21  0.856563  1.000000  0.5445

KeyboardInterrupt: 