# nb-model_xg-mdl-tcn-1

In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath
from pathlib import Path
from collections import OrderedDict
from functools import partial, reduce
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'nb-model_xg-mdl.ipynb'
dir_name = 'model'
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
from dask import delayed, compute
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import torchfunc
import pytorch_lightning as pl

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option('display.max_columns', 50)

from common_util import MODEL_DIR, RECON_DIR, JSON_SFX_LEN, DT_CAL_DAILY_FREQ, is_type, pd_common_idx_rows, remove_dups_list, NestedDefaultDict, set_loglevel, search_df, chained_filter, get_variants, load_df, dump_df, load_json, gb_transpose, pd_common_index_rows, filter_cols_below, inner_join, outer_join, ser_shift, list_get_dict, window_iter, benchmark
from common_util import isnt, window_iter, np_assert_identical_len_dim, midx_get_level, pd_rows, midx_intersect, pd_common_idx_rows, midx_split, pd_midx_to_arr, window_iter, np_at_least_nd, np_is_ndim, identity_fn
from model.common import DATASET_DIR, XG_PROCESS_DIR, XG_DATA_DIR, XG_DIR, PYTORCH_MODELS_DIR, ERROR_CODE, TEST_RATIO, VAL_RATIO, EXPECTED_NUM_HOURS, default_dataset
from model.common import PYTORCH_ACT_MAPPING, PYTORCH_OPT_MAPPING, PYTORCH_SCH_MAPPING, PYTORCH_LOSS_MAPPING
from model.xg_util import xgload
from model.train_util import pd_get_np_tvt, batchify
from model.model_util import *
from recon.dataset_util import GEN_GROUP_CONSTRAINTS, gen_group
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

CRITICAL:root:script location: /home/kev/crunch/model/nb-model_xg-mdl.ipynb
CRITICAL:root:using project dir: /home/kev/crunch/


## Load Data

In [2]:
assets = ['sp_500', 'russell_2000', 'nasdaq_100', 'dow_jones']
chosen_asset = assets[0]

In [3]:
f = xgload(XG_DATA_DIR +'features' +sep)
l = xgload(XG_DATA_DIR +'labels' +sep)
t = xgload(XG_DATA_DIR +'targets' +sep)

In [4]:
print('num f: {}'.format(len(list(f))))
print('num l: {}'.format(len(list(l))))
print('num t: {}'.format(len(list(t))))

num f: 2520
num l: 1008
num t: 1504


### ddir / dret

In [5]:
ddir_pba_hoc = {a: list(l.childkeys([a, 'ddir', 'ddir', 'pba_hoc_hdxret_ddir'])) for a in assets}
ddir_vol_hoc = {a: list(l.childkeys([a, 'ddir', 'ddir', 'vol_hoc_hdxret_ddir'])) for a in assets}

In [6]:
dret_pba_hoc = {a: list(t.childkeys([a, 'dret', 'dret', 'pba_hoc_hdxret_dret'])) for a in assets}
dret_vol_hoc = {a: list(t.childkeys([a, 'dret', 'dret', 'vol_hoc_hdxret_dret'])) for a in assets}

### ddir1 / dret1

In [7]:
groups = ['lin', 'log']
fmt3, fmt4 = '{}_{}', '{}_hdxret1_{}'

In [8]:
e = 'ddir1'
b = 'pba_hoc'; ddir1_pba_hoc = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'pba_hlh'; ddir1_pba_hlh = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hoc'; ddir1_vol_hoc = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hlh'; ddir1_vol_hlh = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}

In [9]:
e = 'dret1'
b = 'pba_hoc'; dret1_pba_hoc = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'pba_hlh'; dret1_pba_hlh = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hoc'; dret1_vol_hoc = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hlh'; dret1_vol_hlh = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}

### ddir2/dret2

In [10]:
scalars = ['0.5', '1', '2']
stats = ['avg', 'std', 'mad', 'max', 'min']
fmt4, fmt5 = '{}_hdxret2_{}', '{}_hdxret2({}*{},1)_{}'

In [11]:
e = 'ddir2'
b = 'pba_hoc'; ddir2_pba_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'pba_hlh'; ddir2_pba_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hoc'; ddir2_vol_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hlh'; ddir2_vol_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}

In [12]:
e = 'dret2'
b = 'pba_hoc'; dret2_pba_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'pba_hlh'; dret2_pba_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hoc'; dret2_vol_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hlh'; dret2_vol_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}

### dxfbdir1 / dxfbret1

In [13]:
groups = ['lin', 'log']
fmt3, fmt4 = '{}_{}', '{}_hdxcret1_{}'

In [14]:
e = 'dxfbdir1'
b = 'pba_hoc'; dxfbdir1_pba_hoc = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'pba_hlh'; dxfbdir1_pba_hlh = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hoc'; dxfbdir1_vol_hoc = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hlh'; dxfbdir1_vol_hlh = {a: {g: list(l.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}

In [15]:
e = 'dxfbcret1'
b = 'pba_hoc'; dxfbcret1_pba_hoc = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'pba_hlh'; dxfbcret1_pba_hlh = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hoc'; dxfbcret1_vol_hoc = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}
b = 'vol_hlh'; dxfbcret1_vol_hlh = {a: {g: list(t.childkeys([a, e, fmt3.format(e, g), fmt4.format(b, e)])) for g in groups} for a in assets}

### dxfbdir2 / dxfbcret2

In [16]:
scalars = ['0.5', '1', '2']
stats = ['avg', 'std', 'mad', 'max', 'min']
fmt4, fmt5 = '{}_hdxcret2_{}', '{}_hdxcret2({}*{},1)_{}'

In [17]:
e = 'dxfbdir2'
b = 'pba_hoc'; dxfbdir2_pba_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'pba_hlh'; dxfbdir2_pba_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hoc'; dxfbdir2_vol_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hlh'; dxfbdir2_vol_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}

In [18]:
e = 'dxfbcret2'
b = 'pba_hoc'; dxfbcret2_pba_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'pba_hlh'; dxfbcret2_pba_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hoc'; dxfbcret2_vol_hoc = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}
b = 'vol_hlh'; dxfbcret2_vol_hlh = {a: {d: [[a, e, e, fmt4.format(b, e), fmt5.format(b, c, d, e)] for c in scalars] for d in stats} for a in assets}

### Features

In [19]:
list(set([k[1] for k in f.childkeys([assets[0]])]))

['dc',
 'dwrxmx',
 'hdmx',
 'hohlca',
 'hdzn',
 'hdgau',
 'ddiff',
 'dohlca',
 'dwrod',
 'dlogret',
 'dwrpt',
 'dffd',
 'hdpt',
 'dwrzn',
 'dwrmx',
 'hdod',
 'hduni']

In [20]:
kc_end = ['ddiff', 'ddiff_pba_vol']
ft_all = {a: list(f.childkeys([a, *kc_end])) for a in assets}
feat = ft_all[chosen_asset]

In [21]:
feat

[['sp_500',
  'ddiff',
  'ddiff_pba_vol',
  'pba_dohlca_ddiff',
  'pba_dohlca_ddiff(1)'],
 ['sp_500',
  'ddiff',
  'ddiff_pba_vol',
  'vol_dohlca_ddiff',
  'vol_dohlca_ddiff(1)']]

## Select Data

In [22]:
features_df = inner_join(f[feat[0]], f[feat[1]])
no_zero = lambda df: df[df.values.sum(axis=1) != 0]
to_bin = lambda df: (df+1)*.5
feature_df, label_df, target_df = pd_common_idx_rows(features_df, to_bin(no_zero(l[ddir_pba_hoc[chosen_asset][0]])), t[dret_pba_hoc[chosen_asset][0]])
assert(feature_df.shape[0]==label_df.shape[0]==target_df.shape[0])

In [35]:
#ftrain, fval, ftest = map(np_at_least_nd, pd_get_np_tvt(feature_df, as_midx=False))
#ltrain, lval, ltest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(label_df, as_midx=False))
#ttrain, tval, ttest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(target_df, as_midx=False))
#np_assert_identical_len_dim(ftrain, ltrain, ttrain)
#np_assert_identical_len_dim(fval, lval, tval)
#np_assert_identical_len_dim(ftest, ltest, ttest)

## Mdl

In [77]:
test_par = {
	'epochs': 200,
	'batch_size': 64,
	'input_windows': 10,
	'topology': [40, 20, 10],
	'activation': 'elu',
	'opt': {
		'name': 'adam',
		'kwargs': {
			'lr': .001
		}
	},
	'sch': {
		'name': 'rpl',
		'kwargs': {
			'mode': 'min',
			'factor': 0.1,
			'patience': 10,
			'threshold': 0.0001,
			'threshold_mode': 'rel',
			'cooldown': 0,
			'min_lr': 0
		}
	},
	'loss': 'nll',
	'kernel_size': 10,
	'dropout': .8,
	'attention': False,
	'max_attn_len': 90,
}

In [78]:
class TCNModel(pl.LightningModule):
    """
    Top level Temporal CNN Classifer.
    Note: Receptive Field Size = Number TCN Blocks * Kernel Size * Last Layer Dilation Size

    Parameters:
        input_windows (int > 0): Number of aggregation windows in the input layer
        topology (list): Topology of the TCN divided by the window size
        kernel_size (int > 1): CNN kernel size
        dropout (float [0, 1]): dropout probability, probability of an element to be zeroed
        attention (bool): whether or not to include attention block after each tcn block
        max_attn_len (int > 0): max length of attention (only relevant if attention is set to True)
    """
    def __init__(self, params, data, class_weights=None):
        """
        Init method

        Args:
            params (dict): dictionary of model (hyper)parameters
            data (tuple): tuple of pd.DataFrames
            class_weights (dict): class weighting scheme
        """
        # init superclass
        super(TCNModel, self).__init__()
        self.params = params
        #self.batch_size = params['batch_size']
        loss_fn = PYTORCH_LOSS_MAPPING.get(self.params['loss'])
        self.loss = loss_fn() if (isnt(class_weights)) else loss_fn(weight=class_weights)
        ## if you specify an example input, the summary will show input/output for each layer
        #self.example_input_array = torch.rand(5, 28 * 28)
        self.__setup_data__(data)
        self.__build_model__()

    def __build_model__(self):
        """
        TCN Based Network

        Args:
            activation (str): hidden activations to use
            num_input_channels (int): number of input channels
            channels (list): list of output channel sizes in order from first to last
            num_outputs (int): number of outputs, usually the number of classes to predict (defaults to binary case)
            kernel_size (int > 1): CNN kernel size
            dropout (float [0, 1]): dropout probability, probability of an element to be zeroed during training
            attention (bool): whether or not to include attention block after each tcn block
            max_attn_len (int > 0): max length of attention (only relevant if attention is set to True)
        """
        input_channels, window_size = self.feat_shape[-2:]
        eff_history = window_size * self.params['input_windows']						# Effective history = window_size * input_windows
        scaled_topology = window_size * np.array(self.params['topology'])				# Scale topology by the window size
        channels = np.clip(scaled_topology, a_min=1, a_max=None).astype(int).tolist()	# Make sure layer outputs are always greater than zero
        self.tcn = TemporalConvNet(PYTORCH_ACT_MAPPING.get(self.params['activation']), num_input_channels=input_channels,
            channels=channels, kernel_size=self.params['kernel_size'], dropout=self.params['dropout'], attention=False, max_attn_len=100)
        self.out = nn.Linear(channels[-1], self.num_outputs)
        self.logprob = nn.LogSoftmax(dim=1)

    def forward(self, x):
        """
        Input must have have shape (N, C_in, L_in) where
            N: number of batches
            C_in: number of input channels
            L_in: length of input sequence

        Output shape will be (N, C_out) where
            N: number of batches
            C_out: number of classes
        """
        out_embedding = self.tcn(x)
        out_score = self.out(out_embedding[:, :, -1])
        out_prob = self.logprob(out_score)
        return out_prob

    def training_step(self, batch, batch_idx):
        """
        Lightning calls this inside the training loop
        """
        x, y = batch[:2]
        y_hat = self.forward(x)
        loss_val = self.loss(y_hat, y)

        # in DP mode (default) make sure if result is scalar, there's another dim in the beginning
        if (self.trainer.use_dp or self.trainer.use_ddp2):
            loss_val = loss_val.unsqueeze(0)

        tqdm_dict = {'train_loss': loss_val}
        output = OrderedDict({
            'loss': loss_val,
            'progress_bar': tqdm_dict,
            'log': tqdm_dict
        })

        return output # can also return a scalar (loss val) instead of a dict 

    def validation_step(self, batch, batch_idx):
        """
        Lightning calls this inside the validation loop
        """
        x, y = batch[:2]
        y_hat = self.forward(x)
        loss_val = self.loss(y_hat, y)

        # acc
        labels_hat = torch.argmax(y_hat, dim=1)
        val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
        val_acc = torch.tensor(val_acc)

        if (self.on_gpu):
            val_acc = val_acc.cuda(loss_val.device.index)

        # in DP mode (default) make sure if result is scalar, there's another dim in the beginning
        if (self.trainer.use_dp or self.trainer.use_ddp2):
            loss_val = loss_val.unsqueeze(0)
            val_acc = val_acc.unsqueeze(0)

        output = OrderedDict({
            'val_loss': loss_val,
            'val_acc': val_acc,
        })

        return output # can also return a scalar (loss val) instead of a dict 

    def validation_end(self, outputs):
        """
        Called at the end of validation to aggregate outputs
        :param outputs: list of individual outputs of each validation step
        """
        # if returned a scalar from validation_step, outputs is a list of tensor scalars
        # we return just the average in this case (if we want)
        # return torch.stack(outputs).mean()

        val_loss_mean = 0
        val_acc_mean = 0
        for output in outputs:
            val_loss = output['val_loss']

            # reduce manually when using dp
            if (self.trainer.use_dp or self.trainer.use_ddp2):
                val_loss = torch.mean(val_loss)
            val_loss_mean += val_loss

            # reduce manually when using dp
            val_acc = output['val_acc']
            if (self.trainer.use_dp or self.trainer.use_ddp2):
                val_acc = torch.mean(val_acc)

            val_acc_mean += val_acc

        val_loss_mean /= len(outputs)
        val_acc_mean /= len(outputs)
        tqdm_dict = {'val_loss': val_loss_mean, 'val_acc': val_acc_mean}
        result = {'progress_bar': tqdm_dict, 'log': tqdm_dict, 'val_loss': val_loss_mean}
        return result

    def configure_optimizers(self):
        """
        construct and return optimizers
        """
        opt_fn = PYTORCH_OPT_MAPPING.get(self.params['opt']['name'])
        opt = opt_fn(self.parameters(), **self.params['opt']['kwargs'])
        return opt
        #sch_fn = PYTORCH_SCH_MAPPING.get(self.params['sch']['name'])
        #sch = sch_fn(opt, **self.params['sch']['kwargs'])
        #return [opt], [sch]

    def __setup_data__(self, data):
        feature_df, label_df, target_df = data
        ftrain, fval, ftest = map(np_at_least_nd, pd_get_np_tvt(feature_df, as_midx=False))
        ltrain, lval, ltest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(label_df, as_midx=False))
        ttrain, tval, ttest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(target_df, as_midx=False))
        self.flt_train = (ftrain, ltrain, ttrain)
        self.flt_val = (fval, lval, tval)
        self.flt_test = (ftest, ltest, ttest)
        np_assert_identical_len_dim(*self.flt_train)
        np_assert_identical_len_dim(*self.flt_val)
        np_assert_identical_len_dim(*self.flt_test)
        # Needed for model building:
        self.feat_shape = ftrain.shape
        self.num_outputs = max(map(lambda a: len(np.unique(a)), (ltrain, lval, ltest)))

    def preproc(self, params, data):
        """
        Reshaping transform for temporal data. All data must have same number of dimensions and observations.

        Runs a "moving window unstack" operation through the first data such that each row of the result contains the history
        of the original up to and including that row based on a input_windows parameter in params. The input_windows
        determines how far back the history each row will record; a input_windows of '1' results in no change.
        This method also adds a singleton dimension between the first and second after the moving window unstack; this is to
        denote the "number of channels" for CNN based learning algorithms.

        example with input_windows of '2':
                                                    0 | a b c
                                                    1 | d e f ---> 1 | a b c d e f
                                                    2 | g h i      2 | d e f g h i
                                                    3 | j k l      3 | g h i j k l

        All data after the first tuple item are assumed to be label/target vectors and are reshaped to align with the new first
        tuple item.

        Args:
            params (dict): params dictionary
            data (tuple): tuple of numpy data with features as first element

        Returns:
            Tuple of reshaped data
        """
        np_assert_identical_len_dim(*data)
        # Reshape features into overlapping moving window samples
        f = np.array([np.concatenate(vec, axis=-1) for vec in window_iter(data[0], n=params['input_windows'])])
        rest = [vec[params['input_windows']-1:] for vec in data[1:]]  # Realign by dropping observations prior to the first step
        np_assert_identical_len_dim(f, *rest)
        return (f, *rest)

    @pl.data_loader
    def train_dataloader(self):
        logging.info('train_dataloader called')
        return batchify(self.params, self.preproc(self.params, self.flt_train), False)

    @pl.data_loader
    def val_dataloader(self):
        logging.info('val_dataloader called')
        return batchify(self.params, self.preproc(self.params, self.flt_val), False)

    @pl.data_loader
    def test_dataloader(self):
        logging.info('test_dataloader called')
        return batchify(self.params, self.preproc(self.params, self.flt_test), False)

    @staticmethod
    def add_model_specific_args(parent_parser, root_dir):  # pragma: no cover
        """
        Parameters you define here will be available to your model through self.params
        """
        pass

In [79]:
params = test_par
mdl = TCNModel(params, (feature_df, label_df, target_df))

In [82]:
mdl

TCNModel(
  (loss): NLLLoss()
  (tcn): TemporalConvNet(
    (network): Sequential(
      (0): TemporalBlock(
        (conv1): Conv1d(10, 40, kernel_size=(10,), stride=(1,), padding=(9,))
        (chomp1): Chomp1d()
        (act1): ELU(alpha=1.0)
        (dropout1): Dropout(p=0.8, inplace=False)
        (net): Sequential(
          (0): Conv1d(10, 40, kernel_size=(10,), stride=(1,), padding=(9,))
          (1): Chomp1d()
          (2): ELU(alpha=1.0)
          (3): Dropout(p=0.8, inplace=False)
        )
        (downsample): Conv1d(10, 40, kernel_size=(1,), stride=(1,))
        (relu): ReLU()
      )
      (1): TemporalBlock(
        (conv1): Conv1d(40, 20, kernel_size=(10,), stride=(1,), padding=(18,), dilation=(2,))
        (chomp1): Chomp1d()
        (act1): ELU(alpha=1.0)
        (dropout1): Dropout(p=0.8, inplace=False)
        (net): Sequential(
          (0): Conv1d(40, 20, kernel_size=(10,), stride=(1,), padding=(18,), dilation=(2,))
          (1): Chomp1d()
          (2): ELU(

In [56]:
def tcn_setup_data(data):
        feature_df, label_df, target_df = data
        ftrain, fval, ftest = map(np_at_least_nd, pd_get_np_tvt(feature_df, as_midx=False))
        ltrain, lval, ltest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(label_df, as_midx=False))
        ttrain, tval, ttest = map(partial(np_at_least_nd, axis=-1), pd_get_np_tvt(target_df, as_midx=False))
        return ftrain, ltrain, ttrain

In [57]:
def preproc(params, data):
    """
    Reshaping transform for temporal data. All data must have same number of dimensions and observations.

    Runs a "moving window unstack" operation through the first data such that each row of the result contains the history
    of the original up to and including that row based on a input_windows parameter in params. The input_windows
    determines how far back the history each row will record; a input_windows of '1' results in no change.
    This method also adds a singleton dimension between the first and second after the moving window unstack; this is to
    denote the "number of channels" for CNN based learning algorithms.

    example with input_windows of '2':
                                                0 | a b c
                                                1 | d e f ---> 1 | a b c d e f
                                                2 | g h i      2 | d e f g h i
                                                3 | j k l      3 | g h i j k l

    All data after the first tuple item are assumed to be label/target vectors and are reshaped to align with the new first
    tuple item.

    Args:
        params (dict): params dictionary
        data (tuple): tuple of numpy data with features as first element

    Returns:
        Tuple of reshaped data
    """
    np_assert_identical_len_dim(*data)
    # Reshape features into overlapping moving window samples
    f = np.array([np.concatenate(vec, axis=-1) for vec in window_iter(data[0], n=params['input_windows'])])
    rest = [vec[params['input_windows']-1:] for vec in data[1:]]  # Realign by dropping observations prior to the first step
    np_assert_identical_len_dim(f, *rest)
    return (f, *rest)

In [58]:
d = preproc(params, tcn_setup_data((feature_df, label_df, target_df)))

In [61]:
d[0].shape

(1337, 10, 20)

In [99]:
d[0][0][0][:, :, :-4]

IndexError: too many indices for array

In [51]:
chomp_size = 10
x[:, :, :-chomp_size]

TypeError: tuple indices must be integers or slices, not tuple

In [29]:
trainer = pl.Trainer(max_nb_epochs=params['epochs'], gpus=1, amp_level='O2', use_amp=True)
trainer.fit(mdl)

gpu available: True, used: True
VISIBLE GPUS: 0
using 16bit precision
Selected optimization level O2:  FP16 training with FP32 batchnorm and FP32 master weights.

Defaults for this optimization level are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic


  0%|          | 0/5 [00:00<?, ?it/s]

                        Name             Type Params
0                       loss          NLLLoss    0  
1                        tcn  TemporalConvNet   15 K
2                tcn.network       Sequential   15 K
3              tcn.network.0    TemporalBlock    4 K
4        tcn.network.0.conv1           Conv1d    4 K
5       tcn.network.0.chomp1          Chomp1d    0  
6         tcn.network.0.act1              ELU    0  
7     tcn.network.0.dropout1          Dropout    0  
8          tcn.network.0.net       Sequential    4 K
9   tcn.network.0.downsample           Conv1d  440  
10        tcn.network.0.relu             ReLU    0  
11             tcn.network.1    TemporalBlock    8 K
12       tcn.network.1.conv1           Conv1d    8 K
13      tcn.network.1.chomp1          Chomp1d    0  
14        tcn.network.1.act1              ELU    0  
15    tcn.network.1.dropout1          Dropout    0  
16         tcn.network.1.net       Sequential    8 K
17  tcn.network.1.downsample           Conv1d 

 73%|███████▎  | 11/15 [00:00<00:03,  1.32it/s, batch_nb=9, epoch=0, gpu=0, loss=4.367, train_loss=2.2, v_nb=5]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0


100%|██████████| 15/15 [00:00<00:00, 43.07it/s, batch_nb=10, epoch=199, gpu=0, loss=0.610, train_loss=0.515, v_nb=5, val_acc=0.491, val_loss=0.785]

1

100%|██████████| 15/15 [00:12<00:00, 43.07it/s, batch_nb=10, epoch=199, gpu=0, loss=0.610, train_loss=0.515, v_nb=5, val_acc=0.491, val_loss=0.785]

In [91]:
#PYTORCH_ACT_MAPPING, PYTORCH_OPT_MAPPING, PYTORCH_LOSS_MAPPING

In [55]:
#print(torchfunc.performance.tips(mdl))