# nb-model_xg-mdl

In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath
from pathlib import Path
from functools import partial, reduce
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'nb-model_xg-mdl.ipynb'
dir_name = 'model'
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
from dask import delayed, compute
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import weight_norm

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option('display.max_columns', 50)

from common_util import MODEL_DIR, RECON_DIR, JSON_SFX_LEN, DT_CAL_DAILY_FREQ, is_type, pd_common_idx_rows, remove_dups_list, NestedDefaultDict, set_loglevel, search_df, chained_filter, get_variants, load_df, dump_df, load_json, gb_transpose, pd_common_index_rows, filter_cols_below, inner_join, outer_join, ser_shift, list_get_dict, window_iter, benchmark
from common_util import np_assert_identical_len_dim, midx_get_level, pd_rows, midx_intersect, pd_common_idx_rows, midx_split, pd_midx_to_arr, window_iter, np_at_least_nd, np_is_ndim, identity_fn
from model.common import DATASET_DIR, XG_PROCESS_DIR, XG_DATA_DIR, XG_DIR, PYTORCH_MODELS_DIR, ERROR_CODE, TEST_RATIO, VAL_RATIO, EXPECTED_NUM_HOURS, default_dataset
from model.common import PYTORCH_ACT_MAPPING, PYTORCH_OPT_MAPPING, PYTORCH_LOSS_MAPPING
from model.xg_util import xgload
from model.train_util import get_obs_shape, pd_get_np_tvt
#from model.model_util import CLF_MAP
from recon.dataset_util import GEN_GROUP_CONSTRAINTS, gen_group
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

CRITICAL:root:script location: /home/kev/crunch/model/nb-model_xg-mdl.ipynb
CRITICAL:root:using project dir: /home/kev/crunch/


## Load Data

In [2]:
assets = ['sp_500', 'russell_2000', 'nasdaq_100', 'dow_jones']
chosen_asset = assets[0]

In [3]:
f = xgload(XG_DATA_DIR +'features' +sep)
l = xgload(XG_DATA_DIR +'labels' +sep)
t = xgload(XG_DATA_DIR +'targets' +sep)

In [4]:
print('num f: {}'.format(len(list(f))))
print('num l: {}'.format(len(list(l))))
print('num t: {}'.format(len(list(t))))

num f: 2520
num l: 1008
num t: 1504


### ddir / dret

In [5]:
ddir_pba_hoc = {a: list(l.childkeys([a, 'ddir', 'ddir', 'pba_hoc_hdxret_ddir'])) for a in assets}
ddir_vol_hoc = {a: list(l.childkeys([a, 'ddir', 'ddir', 'vol_hoc_hdxret_ddir'])) for a in assets}

In [6]:
dret_pba_hoc = {a: list(t.childkeys([a, 'dret', 'dret', 'pba_hoc_hdxret_dret'])) for a in assets}
dret_vol_hoc = {a: list(t.childkeys([a, 'dret', 'dret', 'vol_hoc_hdxret_dret'])) for a in assets}

### ddir1 / dret1

In [7]:
ddir1_pba_hoc_lin = {a: list(l.childkeys([a, 'ddir1', 'ddir1_lin', 'pba_hoc_hdxret1_ddir1'])) for a in assets}
ddir1_pba_hoc_log = {a: list(l.childkeys([a, 'ddir1', 'ddir1_log', 'pba_hoc_hdxret1_ddir1'])) for a in assets}
ddir1_vol_hoc_lin = {a: list(l.childkeys([a, 'ddir1', 'ddir1_lin', 'vol_hoc_hdxret1_ddir1'])) for a in assets}
ddir1_vol_hoc_log = {a: list(l.childkeys([a, 'ddir1', 'ddir1_log', 'vol_hoc_hdxret1_ddir1'])) for a in assets}

In [8]:
dret1_pba_hoc_lin = {a: list(t.childkeys([a, 'dret1', 'dret1_lin', 'pba_hoc_hdxret1_dret1'])) for a in assets}
dret1_pba_hoc_log = {a: list(t.childkeys([a, 'dret1', 'dret1_log', 'pba_hoc_hdxret1_dret1'])) for a in assets}
dret1_vol_hoc_lin = {a: list(t.childkeys([a, 'dret1', 'dret1_lin', 'vol_hoc_hdxret1_dret1'])) for a in assets}
dret1_vol_hoc_log = {a: list(t.childkeys([a, 'dret1', 'dret1_log', 'vol_hoc_hdxret1_dret1'])) for a in assets}

### Features

In [9]:
list(set([k[1] for k in f.childkeys([assets[0]])]))

['dwrod',
 'dwrxmx',
 'dlogret',
 'hdgau',
 'hdzn',
 'dwrzn',
 'hdmx',
 'hohlca',
 'dwrpt',
 'dc',
 'dohlca',
 'hdpt',
 'hdod',
 'dffd',
 'ddiff',
 'hduni',
 'dwrmx']

In [10]:
kc_end = ['ddiff', 'ddiff_pba_vol']
ft_all = {a: list(f.childkeys([a, *kc_end])) for a in assets}
feat = ft_all[chosen_asset]

In [11]:
feat

[['sp_500',
  'ddiff',
  'ddiff_pba_vol',
  'pba_dohlca_ddiff',
  'pba_dohlca_ddiff(1)'],
 ['sp_500',
  'ddiff',
  'ddiff_pba_vol',
  'vol_dohlca_ddiff',
  'vol_dohlca_ddiff(1)']]

## Select Data

In [12]:
features_df = inner_join(f[feat[0]], f[feat[1]])
feature_df, label_df, target_df = pd_common_idx_rows(features_df, l[ddir_pba_hoc[chosen_asset][0]], t[dret_pba_hoc[chosen_asset][0]])
assert(feature_df.shape[0]==label_df.shape[0]==target_df.shape[0])

In [13]:
ftrain, fval, ftest = map(np_at_least_nd, pd_get_np_tvt(feature_df, as_midx=False))
ltrain, lval, ltest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(label_df, as_midx=False))
ttrain, tval, ttest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(target_df, as_midx=False))
np_assert_identical_len_dim(ftrain, ltrain, ttrain)

## Mdl

In [20]:
from common_util import window_iter

class TemporalMixin:
	"""
	A Mixin for models which look at the entire effective history of a data point at once.
	This is in contrast to models like RNNs where each data point may be a group of timesteps fed in sequence.
	"""

	def preproc(self, params, data):
		"""
		Reshaping transform for temporal data. All data must have same number of dimensions and observations.

		Runs a "moving window unstack" operation through the first data such that each row of the result contains the history
		of the original up to and including that row based on a input_windows parameter in params. The input_windows
		determines how far back the history each row will record; a input_windows of '1' results in no change.
		This method also adds a singleton dimension between the first and second after the moving window unstack; this is to
		denote the "number of channels" for CNN based learning algorithms.

		example with input_windows of '2':
													0 | a b c
													1 | d e f ---> 1 | a b c d e f
													2 | g h i      2 | d e f g h i
													3 | j k l      3 | g h i j k l

		All data after the first tuple item are assumed to be label/target vectors and are reshaped to align with the new first
		tuple item.

		Args:
			params (dict): params dictionary
			data (tuple): tuple of numpy data with features as first element

		Returns:
			Tuple of reshaped data
		"""
		np_assert_identical_len_dim(*data)
		# Reshape features into overlapping moving window samples
		f = np.array([np.concatenate(vec, axis=-1) for vec in window_iter(data[0], n=params['input_windows'])])
		rest = [vec[params['input_windows']-1:] for vec in data[1:]]  # Realign by dropping observations prior to the first step
		np_assert_identical_len_dim(f, *rest)
		return (f, *rest)

In [71]:
class Chomp1d(nn.Module):
	"""
	This transform is meant to chop off any trailing elements caused by 1d convolution padding,
	only guaranteed to work for this purpose if the stride is 1.
	"""
	def __init__(self, chomp_size):
		super(Chomp1d, self).__init__()
		self.chomp_size = chomp_size

	def forward(self, x):
		return x[:, :, :-self.chomp_size].contiguous() # XXX - chomps off (kernel_size-1)*dilation off right end

In [72]:
class TemporalBlock(nn.Module):
	"""
	TCN Block Class

	DC: Dilated Casual Convolution
	WN: Weight Normalization
	RU: ReLu
	DO: Dropout

	----| DC |-->| WN |-->| RU |-->| DO |-->| DC |-->| WN |-->| RU |-->| DO |--(+)---->
		|-------------------------|1x1 Conv (optional)|-------------------------|

	May not work correctly for a stride greater than 1
	"""
	def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout):
		super(TemporalBlock, self).__init__()
		self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation))
		self.chomp1 = Chomp1d(padding)
		self.act1 = nn.ELU()
		self.dropout1 = nn.Dropout(dropout)

		#self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation))
		#self.chomp2 = Chomp1d(padding)
		#self.relu2 = nn.ReLU()
		#self.dropout2 = nn.Dropout(dropout)

		#self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1, self.conv2, self.chomp2, self.relu2, self.dropout2)
		self.net = nn.Sequential(self.conv1, self.chomp1, self.act1)
		self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if (n_inputs != n_outputs) else None
		self.relu = nn.ReLU()
		self.init_weights()

	def init_weights(self, init_method='xavier_uniform'):
		"""
		Initialize convolutional layer weights.
		TCN networks are influenced heavily by weight initialization.
		"""
		if (init_method == 'normal'):
			self.conv1.weight.data.normal_(0, 0.01)
			#self.conv2.weight.data.normal_(0, 0.01)
			if (self.downsample is not None):
				self.downsample.weight.data.normal_(0, 0.01)

		elif (init_method in ('xavier_uniform', 'glorot_uniform')):
			nn.init.xavier_uniform_(self.conv1.weight, gain=np.sqrt(2))
			#nn.init.xavier_uniform_(self.conv2.weight, gain=np.sqrt(2))
			if (self.downsample is not None):
				nn.init.xavier_uniform_(self.downsample.weight, gain=np.sqrt(2))

	def forward(self, x):
		out = self.net(x)
		res = x if (self.downsample is None) else self.downsample(x)
		try:
			comb = self.relu(out + res)
		except RuntimeError as e:
			print(e)
			print('out.shape: {}'.format(out.shape))
			print('res.shape: {}'.format(res.shape))
			sys.exit(0)
		return comb


In [73]:
class TemporalConvNet(nn.Module):
	"""
	Temporal Conv Net with Optional Attention Blocks.
	This class can be used to build classifiers and regressors.
	"""
	def __init__(self, num_input_channels, channels, kernel_size, dropout, attention, max_attn_len):
		super(TemporalConvNet, self).__init__()
		layers = []
		num_levels = len(channels)
		for i in range(num_levels):
			dilation_size = 2 ** i
			padding_size = (kernel_size-1) * dilation_size
			in_channels = num_input_channels if (i == 0) else channels[i-1]
			out_channels = channels[i]
			layers += [TemporalBlock(in_channels, out_channels, kernel_size=kernel_size, stride=1, dilation=dilation_size, padding=padding_size, dropout=dropout)]
		self.network = nn.Sequential(*layers)

	def forward(self, x):
		return self.network(x)

In [74]:
class TCN(nn.Module):
	"""
	TCN Based Network

	Args:
		num_input_channels (int): number of input channels
		channels (list): list of output channel sizes in order from first to last
		num_outputs (int): number of outputs, usually the number of classes to predict (defaults to binary case)
		kernel_size (int > 1): CNN kernel size
		dropout (float [0, 1]): dropout probability, probability of an element to be zeroed during training
		attention (bool): whether or not to include attention block after each tcn block
		max_attn_len (int > 0): max length of attention (only relevant if attention is set to True)
	"""
	def __init__(self, num_input_channels, channels, num_outputs=1, kernel_size=2, dropout=0.2, attention=False, max_attn_len=80):
		super(TCN, self).__init__()
		self.tcn = TemporalConvNet(num_input_channels, channels, kernel_size=kernel_size, dropout=dropout, attention=attention, max_attn_len=max_attn_len)
		if (attention):
			self.out = nn.Linear(max_attn_len, num_outputs) # TODO - verify correctness of using max_attn_len as input size to output layer
		else:
			self.out = nn.Linear(channels[-1], num_outputs)
		self.logprob = nn.LogSoftmax(dim=1)

	def forward(self, x):
		"""
		Input must have have shape (N, C_in, L_in) where
			N: number of batches
			C_in: number of input channels
			L_in: length of input sequence

		Output shape will be (N, C_out) where
			N: number of batches
			C_out: number of classes
		"""
		out_embedding = self.tcn(x)
		out_score = self.out(out_embedding[:, :, -1])
		out_prob = self.logprob(out_score)
		return out_prob

In [75]:
def make_model(params, feat_shape, num_outputs=2):
	"""
	Top level Temporal CNN Classifer.
	Note: Receptive Field Size = Number TCN Blocks * Kernel Size * Last Layer Dilation Size

	Parameters:
		input_windows (int > 0): Number of aggregation windows in the input layer
		topology (list): Topology of the TCN divided by the window size
		kernel_size (int > 1): CNN kernel size
		dropout (float [0, 1]): dropout probability, probability of an element to be zeroed
		attention (bool): whether or not to include attention block after each tcn block
		max_attn_len (int > 0): max length of attention (only relevant if attention is set to True)
	"""
	# Fix params
	params['epochs'] = int(params['epochs'])
	params['batch_size'] = int(params['batch_size'])
	params['input_windows'] = int(params['input_windows'])
	params['kernel_size'] = int(params['kernel_size'])
	params['max_attn_len'] = int(params['max_attn_len'])

	input_channels, window_size = feat_shape[-2:]
	eff_history = window_size * params['input_windows']					# Effective history = window_size * input_windows
	real_topology = window_size * np.array(params['topology'])				# Scale topology by the window size
	real_topology = np.clip(real_topology, a_min=1, a_max=None).astype(int)			# Make sure that layer outputs are always greater than zero
	mdl = TCN(num_input_channels=input_channels, channels=real_topology.tolist(), num_outputs=num_outputs, kernel_size=params['kernel_size'],
						dropout=params['dropout'], attention=params['attention'], max_attn_len=params['max_attn_len'])
	return mdl

In [76]:
test_par = {
    'epochs': 100,
    'batch_size': 64,
    'input_windows': 3,
    'topology': [5],
    'kernel_size': 3,
    'dropout': .8,
    'attention': False,
    'max_attn_len': 90,
}

In [77]:
make_model(test_par, f_shape, num_outputs=2)

TypeError: torch.nn.modules.activation.ELU is not a Module subclass

In [36]:
clf.space

NameError: name 'clf' is not defined

In [37]:
params = {
    'epochs': 300,
    'batch_size': 256,
    'loss': 'nll',
    'opt': {'name': 'Adam', 'lr': 1e-3},
    'input_windows': 5,
    'topology': [5],
    'kernel_size': 3,
    'dropout': 0,
    'attention': False,
    'max_attn_len': 120
}

In [39]:
obj_fn = clf.make_const_data_objective(feature_df, label_df, exp_logdir=None, exp_meta=None, clf_type='binary',
									meta_obj='val_loss', obj_agg='last', obj_mode='min', meta_var=None,
									val_ratio=VAL_RATIO, test_ratio=TEST_RATIO, shuffle=False)

In [40]:
res = obj_fn(params)

RuntimeError: CUDA error: device-side assert triggered