# nb-model_xg-data-template

In [9]:
import sys
import os
from os import sep
from os.path import dirname, realpath
from pathlib import Path
from functools import partial, reduce
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'nb-model_xg-data-template.ipynb'
dir_name = 'model'
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
from dask import delayed, compute
from torch.utils.data import TensorDataset, DataLoader
import torch

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option('display.max_columns', 50)

from common_util import RECON_DIR, JSON_SFX_LEN, DT_CAL_DAILY_FREQ, is_type, pd_common_idx_rows, remove_dups_list, NestedDefaultDict, set_loglevel, search_df, chained_filter, get_variants, load_df, dump_df, load_json, gb_transpose, pd_common_index_rows, filter_cols_below, inner_join, outer_join, ser_shift, list_get_dict, window_iter, benchmark
from common_util import midx_get_level, pd_rows, midx_intersect, pd_common_idx_rows, midx_split, pd_midx_to_arr, window_iter, np_is_ndim, identity_fn
from model.common import DATASET_DIR, XG_PROCESS_DIR, XG_DATA_DIR, XG_DIR, EXPECTED_NUM_HOURS, default_dataset
from model.xg_util import xgload
from recon.dataset_util import GEN_GROUP_CONSTRAINTS, gen_group
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

## Load Data

In [10]:
assets = ['sp_500', 'russell_2000', 'nasdaq_100', 'dow_jones']

In [11]:
f = xgload(XG_DATA_DIR +'features' +sep)
l = xgload(XG_DATA_DIR +'labels' +sep)
t = xgload(XG_DATA_DIR +'targets' +sep)

In [12]:
print('num f: {}'.format(len(list(f))))
print('num l: {}'.format(len(list(l))))
print('num t: {}'.format(len(list(t))))

num f: 2520
num l: 912
num t: 1360


### ddir / dret

In [13]:
ddir_pba_hoc = {a: list(l.childkeys([a, 'ddir', 'ddir', 'pba_hoc_hdxret_ddir'])) for a in assets}
ddir_vol_hoc = {a: list(l.childkeys([a, 'ddir', 'ddir', 'vol_hoc_hdxret_ddir'])) for a in assets}

In [14]:
dret_pba_hoc = {a: list(t.childkeys([a, 'dret', 'dret', 'pba_hoc_hdxret_dret'])) for a in assets}
dret_vol_hoc = {a: list(t.childkeys([a, 'dret', 'dret', 'vol_hoc_hdxret_dret'])) for a in assets}

### ddir1 / dret1

In [15]:
ddir1_pba_hoc_lin = {a: list(l.childkeys([a, 'ddir1', 'ddir1_lin', 'pba_hoc_hdxret1_ddir1'])) for a in assets}
ddir1_pba_hoc_log = {a: list(l.childkeys([a, 'ddir1', 'ddir1_log', 'pba_hoc_hdxret1_ddir1'])) for a in assets}
ddir1_vol_hoc_lin = {a: list(l.childkeys([a, 'ddir1', 'ddir1_lin', 'vol_hoc_hdxret1_ddir1'])) for a in assets}
ddir1_vol_hoc_log = {a: list(l.childkeys([a, 'ddir1', 'ddir1_log', 'vol_hoc_hdxret1_ddir1'])) for a in assets}

In [16]:
dret1_pba_hoc_lin = {a: list(t.childkeys([a, 'dret1', 'dret1_lin', 'pba_hoc_hdxret1_dret1'])) for a in assets}
dret1_pba_hoc_log = {a: list(t.childkeys([a, 'dret1', 'dret1_log', 'pba_hoc_hdxret1_dret1'])) for a in assets}
dret1_vol_hoc_lin = {a: list(t.childkeys([a, 'dret1', 'dret1_lin', 'vol_hoc_hdxret1_dret1'])) for a in assets}
dret1_vol_hoc_log = {a: list(t.childkeys([a, 'dret1', 'dret1_log', 'vol_hoc_hdxret1_dret1'])) for a in assets}

## Play