# Pyro Model Testing

In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath
from pathlib import Path
from functools import partial, reduce
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname

def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fname = 'model_test.ipynb'      # FILL
dir_name = 'model'              # FILL
fix_path(get_cwd(fname, dir_name +sep))

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch
# from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
from torch.distributions import constraints
import pyro
from pyro.contrib.autoguide import AutoMultivariateNormal
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro.distributions import Normal, Categorical, Bernoulli
pyro.enable_validation(True)
pyro.set_rng_seed(37)
from dask import delayed, compute
import matplotlib.pyplot as plt

from ipywidgets import interact, interact_manual, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option('display.max_columns', 50)

from common_util import RECON_DIR, JSON_SFX_LEN, DT_CAL_DAILY_FREQ, is_type, pd_common_idx_rows, remove_dups_list, set_loglevel, chained_filter, get_variants, dump_df, load_json, gb_transpose, np_inner, pd_common_index_rows, filter_cols_below, inner_join, outer_join, ser_shift, list_get_dict, window_iter, pyt_unsqueeze_to, pyt_reverse_dim_order, benchmark
from common_util import isnt, midx_get_level, midx_intersect, str_to_list, pd_common_idx_rows, midx_split, pd_midx_to_arr, window_iter, np_is_ndim, get_class_name, get0
from model.common import DATASET_DIR, HOPT_WORKER_BIN, INTRADAY_LEN, default_model, default_backend, default_dataset, default_trials_count
from model.data_util import datagen, align_first_last_cols, prune_nulls, prepare_transpose_data, prepare_label_data, prepare_target_data
from model.model_util import BINARY_CLF_MAP
from recon.dataset_util import prep_dataset, gen_group
from recon.split_util import get_train_test_split, gen_time_series_split, index_three_split, pd_binary_clip

Using TensorFlow backend.


In [2]:
set_loglevel('info')

### List Datasets

In [3]:
os.listdir(DATASET_DIR)

['labels_eod.json',
 'mvp_dnorm_raw.json',
 'drl.json',
 'mvp_labels_eod.json',
 'dnorm_raw_pba_ohlca.json',
 'mvp_labels_fbxeod.json',
 'mvp_dnorm_raw_pba_avgprice.json',
 'dma.json',
 'dnorm_raw.json',
 'raw_pba_ohlca.json',
 'targets_eod.json',
 'mvp_targets_eod.json',
 'row_masks.json',
 'ddiff.json',
 'dnorm_sym.json',
 'mvp_targets_fbxeod.json',
 'sym_raw.json',
 'dnorm_dmx_raw_pba_ohlca.json']

### Set Fixed Experiment Parameters ("Commandline" Arguments)

In [4]:
logdir = None
cmd_input = {
    'model=': 'BinTCN',
    'backend=': 'pytorch',
    'dataset=': 'dnorm_dmx_raw_pba_ohlca.json',
    'assets=': 'sp_500', # 'russell_2000'
    'trials_count=': 50,
}

In [5]:
model_code = cmd_input['model='] if (cmd_input['model='] is not None) else default_model
backend_name = cmd_input['backend='] if (cmd_input['backend='] is not None) else default_backend
dataset_fname = cmd_input['dataset='] if (cmd_input['dataset='] is not None) else default_dataset
assets = str_to_list(cmd_input['assets=']) if (cmd_input['assets='] is not None) else None
trials_count = int(cmd_input['trials_count=']) if (cmd_input['trials_count='] is not None) else default_trials_count

### Load Dataset

In [6]:
mod_obj = BINARY_CLF_MAP[backend_name][model_code]()
mod_name = get_class_name(mod_obj)
dataset_name = dataset_fname[:-JSON_SFX_LEN]
dataset_dict = load_json(dataset_fname, dir_path=DATASET_DIR)

In [7]:
dataset_dict

{'features': [['norm', 'dnorm_dmx_raw_pba_ohlca']],
 'labels': 'mvp_labels_eod.json',
 'targets': 'mvp_targets_eod.json',
 'row_masks': 'row_masks.json'}

In [8]:
dataset = prep_dataset(dataset_dict, assets=assets, filters_map=None)

In [9]:
logging.info('model: {}'.format(mod_name))
logging.info('backend: {}'.format(backend_name))
logging.info('dataset: {} {} df(s)'.format(len(dataset['features']['dfs']), dataset_name))
logging.info('assets: {}'.format(str('all' if (assets==None) else ', '.join(assets))))

INFO:root:model: BinaryTCN
INFO:root:backend: pytorch
INFO:root:dataset: 1 dnorm_dmx_raw_pba_ohlca df(s)
INFO:root:assets: sp_500


### Show Data Options

In [10]:
flts_data = []
flts_choices = {}
for i, (fpath, lpath, tpath, frec, lrec, trec, fcol, lcol, tcol, flt) in enumerate(datagen(dataset, feat_prep_fn=prepare_transpose_data, label_prep_fn=prepare_label_data, target_prep_fn=prepare_target_data, how='df_to_df', delayed=True)):
    ident = '{fdesc}[{fcol}], {ldesc}[{lcol}], {tdesc}[{tcol}])'.format(fdesc=frec.desc, fcol=fcol, ldesc=lrec.desc, lcol=lcol, tdesc=trec.desc, tcol=tcol)
    logging.info('{data_idx} (X, y, z) -> ({data_id})'.format(data_idx=i, data_id=ident))
    flts_data.append(flt)
    flts_choices[ident] = i

INFO:root:0 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod_direod[:], raw_pba_oc_retxeod_reteod[:]))
INFO:root:1 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod(0.25%)_direod[:], raw_pba_oc_retxeod(0.25%)_reteod[:]))
INFO:root:2 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod(0.5*avg,1)_direod[:], raw_pba_oc_retxeod(0.5*avg,1)_reteod[:]))
INFO:root:3 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod(0.5*max,1)_direod[:], raw_pba_oc_retxeod(0.5*max,1)_reteod[:]))
INFO:root:4 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod(0.5*min,1)_direod[:], raw_pba_oc_retxeod(0.5*min,1)_reteod[:]))
INFO:root:5 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod(0.5*std,1)_direod[:], raw_pba_oc_retxeod(0.5*std,1)_reteod[:]))
INFO:root:6 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod(0.50%)_direod[:], raw_pba_oc_retxeod(0.50%)_reteod[:]))
INFO:root:7 (X, y, z) -> (raw_pba_dmx[:], raw_pba_oc_retxeod(1*avg,1)_direod[:], raw_pba_oc_retxeod(1*avg,1)_reteod[:]))
INFO:root:8 (X, y, z) -> (raw_pba_dmx[:], 

### Select Feature and Label/Target

In [11]:
feature, label, target = flts_data[1].compute()

  labels = getattr(columns, 'labels', None) or [
  return pd.MultiIndex(levels=new_levels, labels=labels, names=columns.names)
  labels, = index.labels


In [12]:
f = feature
pos_l, neg_l = pd_binary_clip(label) # Clip Label by Side
l = pos_l
t = target

### Set Device

In [13]:
dev = torch.device('cuda') if (torch.cuda.is_available()) else torch.device('cpu')

### Split Data into Ndarrays

In [14]:
val_ratio = .2
test_ratio = .2
train_ratio = 1-(val_ratio+test_ratio)
f_train_idx, f_val_idx, f_test_idx = midx_split(f.index, train_ratio, val_ratio, test_ratio)
l_train_idx, l_val_idx, l_test_idx = midx_split(l.index, train_ratio, val_ratio, test_ratio)
t_train_idx, t_val_idx, t_test_idx = midx_split(t.index, train_ratio, val_ratio, test_ratio)

In [15]:
f_train_pd, f_val_pd, f_test_pd = f.loc[f_train_idx], f.loc[f_val_idx], f.loc[f_test_idx]
l_train_pd, l_val_pd, l_test_pd = l.loc[l_train_idx], l.loc[l_val_idx], l.loc[l_test_idx]
t_train_pd, t_val_pd, t_test_pd = t.loc[t_train_idx], t.loc[t_val_idx], t.loc[t_test_idx]

In [16]:
if (is_type(f.index, pd.core.index.MultiIndex)):
    f_train_np, f_val_np, f_test_np = map(pd_midx_to_arr, [f_train_pd.stack(), f_val_pd.stack(), f_test_pd.stack()])
else:
    f_train_np, f_val_np, f_test_np = f_train_pd.values, f_val_pd.values, f_test_pd.values
l_train_np, l_val_np, l_test_np = l_train_pd.values, l_val_pd.values, l_test_pd.values
t_train_np, t_val_np, t_test_np = t_train_pd.values, t_val_pd.values, t_test_pd.values

In [17]:
val_tar = torch.tensor(t_val_np, dtype=torch.float32, device=dev, requires_grad=False).squeeze()

### Set Input Shape

In [18]:
input_shape = tuple(f_train_np.shape[-2:]) if (len(f_train_np.shape) > 2) else (1, f_train_np.shape[-1])

### Set Data

In [19]:
train_data = (f_train_np, l_train_np)
val_data = (f_val_np, l_val_np)

### Hyperparameter List

In [20]:
mod_obj.get_space()

{'epochs': <hyperopt.pyll.base.Apply at 0x7fc48842fc50>,
 'batch_size': <hyperopt.pyll.base.Apply at 0x7fc3f48bb940>,
 'loss': <hyperopt.pyll.base.Apply at 0x7fc48842f828>,
 'opt': <hyperopt.pyll.base.Apply at 0x7fc48842fac8>,
 'input_windows': <hyperopt.pyll.base.Apply at 0x7fc48842bb00>,
 'topology': <hyperopt.pyll.base.Apply at 0x7fc48842b4a8>,
 'kernel_size': <hyperopt.pyll.base.Apply at 0x7fc48842f358>,
 'dropout': <hyperopt.pyll.base.Apply at 0x7fc48842f438>,
 'attention': <hyperopt.pyll.base.Apply at 0x7fc48842f588>,
 'max_attn_len': <hyperopt.pyll.base.Apply at 0x7fc48842f710>}

### Set Hyperparameters

In [21]:
pmf = list(reversed(l_train_pd.value_counts(normalize=True)))

In [63]:
params = {
    'epochs': 100,
    'batch_size': 128, #256
    'loss': 'nll',
    'cw': pmf,
    'cw': None,
    'opt': {
        'name': 'Adam',
        'lr': .0001
    },
    'input_windows': 5,
    'topology': [10],
    'kernel_size': 8,
    'dropout': 0,
    'attention': False,
    'max_attn_len': 80,
}

### Final Preprocessing and Batchificaton

In [64]:
def preproc(params, data):
    """
    Reshaping transform for temporal data.

    Runs a "moving window unstack" operation through the first data such that each row of the result contains the history
    of the original up to and including that row based on a input_windows parameter in params. The input_windows
    determines how far back the history each row will record; a input_windows of '1' results in no change.
    This method also adds a singleton dimension between the first and second after the moving window unstack; this is to
    denote the "number of channels" for CNN based learning algorithms.

    example with input_windows of '2':
                                                0 | a b c 
                                                1 | d e f ---> 1 | a b c d e f
                                                2 | g h i      2 | d e f g h i
                                                3 | j k l      3 | g h i j k l

    All data after the first tuple item are assumed to be label/target vectors and are reshaped to align with the new first
    tuple item.
    """
    # Reshape features into overlapping moving window samples
    f = np.array([np.concatenate(vec, axis=-1) for vec in window_iter(data[0], n=params['input_windows'])])

    if (len(f.shape) < 3):
        f = np.expand_dims(f, 1) # Add a singleton dimension for single channel data if needed

    l = []
    for vec in data[1:]:
        r = vec[params['input_windows']-1:]								# Realign by dropping lables prior to the first step
        m = np.expand_dims(r, 1) if (np_is_ndim(vec)) else r 			# Make array vector of vectors if it is one dimensional
        l.append(m)

    return (f, *l)

In [65]:
def batchify(params, data, device, override_batch_size=None, shuffle_batches=False):
    """
    Takes in final numpy data and returns torch DataLoader over torch tensor minibatches of specified torch device.
    """
    f = torch.tensor(data[0], dtype=torch.float32, device=device, requires_grad=True)
    if (params['loss'] in ['bce', 'bcel']):
        l = [torch.tensor(d, dtype=torch.float32, device=device, requires_grad=False) for d in data[1:]]
    elif (params['loss'] in ['ce', 'nll']):
        l = [torch.tensor(d, dtype=torch.float32, device=device, requires_grad=False).squeeze() for d in data[1:]]
    ds = TensorDataset(f, *l)
    dl = DataLoader(ds, batch_size=params['batch_size'] if (isnt(override_batch_size)) else override_batch_size, shuffle=shuffle_batches)
    return dl

In [83]:
train_data = (f_train_np, l_train_np)
val_data = (f_val_np, l_val_np)
train_dl = batchify(params, preproc(params, train_data), dev, shuffle_batches=True)
val_dl = batchify(params, preproc(params, val_data), dev, override_batch_size=val_data[-1].size, shuffle_batches=False)

In [68]:
for Xb, yb in train_dl:
    print('Xb: {}, yb: {}'.format(Xb.shape, yb.shape))
    break

Xb: torch.Size([128, 5, 40]), yb: torch.Size([128])


### Define Embedding Model

In [69]:
class Net(torch.nn.Module):

    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.h1 = torch.nn.Linear(input_size, hidden_size)
        self.out = torch.nn.Linear(hidden_size, output_size)
        self.relu = torch.nn.ReLU()
        self.log_softmax = torch.nn.LogSoftmax(dim=1)

    def forward(self, x):
        out_embedding = self.h1(x)
        out_score = self.relu(self.out(out_embedding))
        out_prob = self.log_softmax(out_score)
        return out_score

In [70]:
chunk = train_data[0].shape[1] * train_data[0].shape[2]
channel_chunk = chunk
batch_size = params['batch_size']
input_size = chunk * params['input_windows']
hidden_size = chunk * params['topology'][0]
output_size = 1
print(batch_size, input_size, hidden_size, output_size)
net = Net(input_size, hidden_size, output_size).to(dev)

128 200 400 1


### Embedding Model Test

In [119]:
for j in range(params['epochs']):
    epoch_loss = 0.0
    for batch_id, data_batch in enumerate(train_dl):
        data_batch[0] = data_batch[0].reshape(-1, input_size)
#         data_batch[0] = pyt_reverse_dim_order(data_batch[0])
        print(data_batch[0].shape)
        print(net(data_batch[0]).squeeze())
        break
    break

torch.Size([128, 200])
tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0598, 0.0000, 0.4629,
        0.0000, 0.0849, 0.0000, 0.2883, 0.1847, 0.1470, 0.0000, 0.0000, 0.0000,
        0.0000, 0.2478, 0.0000, 0.4654, 0.0000, 0.0000, 0.0000, 0.0123, 0.0000,
        0.1761, 0.0000, 0.0000, 0.0000, 0.0792, 0.0074, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0420, 0.0730, 0.0000, 0.0000, 0.2345, 0.0000, 0.2320,
        0.0000, 0.2375, 0.1283, 0.0000, 0.2735, 0.0000, 0.2961, 0.0000, 0.0000,
        0.0000, 0.1338, 0.0119, 0.0000, 0.0000, 0.0813, 0.0205, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0944, 0.1692, 0.1305, 0.0000, 0.0501,
        0.0000, 0.4059, 0.0000, 0.0760, 0.0000, 0.2544, 0.0000, 0.1592, 0.0530,
        0.0231, 0.1915, 0.0000, 0.1715, 0.0000, 0.3514, 0.0000, 0.0000, 0.1123,
        0.0000, 0.2179, 0.0875, 0.0000, 0.1536, 0.0000, 0.1482, 0.2847, 0.0000,
        0.6408, 0.0000, 0.2461, 0.1544, 0.2377, 0.0000, 0.0000, 0.2728, 0.0000,
        0.2316, 0

In [72]:
net

Net(
  (h1): Linear(in_features=200, out_features=400, bias=True)
  (out): Linear(in_features=400, out_features=1, bias=True)
  (relu): ReLU()
  (log_softmax): LogSoftmax()
)

### Define Generative Model
Samples an embedding model using priors $P(A)$

In [120]:
pyro.clear_param_store()
softplus = torch.nn.Softplus()

In [121]:
def model(data):
    h1w_prior = Normal(loc=torch.zeros_like(net.h1.weight, device=dev), scale=torch.ones_like(net.h1.weight, device=dev))
    h1b_prior = Normal(loc=torch.zeros_like(net.h1.bias, device=dev), scale=torch.ones_like(net.h1.bias, device=dev))
    outw_prior = Normal(loc=torch.zeros_like(net.out.weight, device=dev), scale=torch.ones_like(net.out.weight, device=dev))
    outb_prior = Normal(loc=torch.zeros_like(net.out.bias, device=dev), scale=torch.ones_like(net.out.bias, device=dev))

    priors = {
                'h1.weight': h1w_prior.independent(2),
                'h1.bias': h1b_prior.independent(1),
                'out.weight': outw_prior.independent(2),
                'out.bias': outb_prior.independent(1)
    }
    lifted_module = pyro.random_module('module', net, priors)
    lifted_model = lifted_module().to(dev)

#     with pyro.plate('plate', batch_size, device=dev):
#         pyro.sample('obs', Bernoulli(logits=lifted_model(data[0]).squeeze()).independent(1), obs=data[1])
        
    with pyro.plate('plate', batch_size, device=dev):
        pyro.sample('obs', Categorical(logits=lifted_model(data[0])).independent(1), obs=data[1])

### Define Variational Inference Posterior

In [122]:
def guide(data):
    # Hidden layer weight and bias distribution priors
    h1w_mu_param = pyro.param('h1w_mu', torch.randn_like(net.h1.weight, device=dev))
    h1w_sigma_param = softplus(pyro.param('h1w_sigma', torch.randn_like(net.h1.weight, device=dev)))
    h1b_mu_param = pyro.param('h1b_mu', torch.randn_like(net.h1.bias, device=dev))
    h1b_sigma_param = softplus(pyro.param('h1b_sigma', torch.randn_like(net.h1.bias, device=dev)))
    h1w_prior = Normal(loc=h1w_mu_param, scale=h1w_sigma_param)
    h1b_prior = Normal(loc=h1b_mu_param, scale=h1b_sigma_param)

    # Output layer weight and bias distribution priors
    outw_mu_param = pyro.param('outw_mu', torch.randn_like(net.out.weight, device=dev))
    outw_sigma_param = softplus(pyro.param('outw_sigma', torch.randn_like(net.out.weight, device=dev)))
    outb_mu_param = pyro.param('outb_mu', torch.randn_like(net.out.bias, device=dev))
    outb_sigma_param = softplus(pyro.param('outb_sigma', torch.randn_like(net.out.bias, device=dev)))
    outw_prior = Normal(loc=outw_mu_param, scale=outw_sigma_param)
    outb_prior = Normal(loc=outb_mu_param, scale=outb_sigma_param)

    priors = {
                'h1.weight': h1w_prior.independent(2),
                'h1.bias': h1b_prior.independent(1),
                'out.weight': outw_prior.independent(2),
                'out.bias': outb_prior.independent(1)
    }

    lifted_module = pyro.random_module('module', net, priors)
    return lifted_module().to(dev)

In [123]:
# guide = AutoMultivariateNormal(model)

### Define SVI Optimization Problem

In [124]:
optim = Adam({"lr": 0.001})
svi = SVI(model, guide, optim, loss=Trace_ELBO())

### Train SVI

In [125]:
iterations = 1000
for j in range(iterations):
    epoch_loss = 0.0
    for batch_id, data_batch in enumerate(train_dl):
        data_batch[0] = data_batch[0].reshape(-1, input_size)
#         data_batch[0] = pyt_reverse_dim_order(data_batch[0])
#         data_batch[1] = pyt_unsqueeze_to(data_batch[1], data_batch[0].dim())
#         data_batch[1] = pyt_reverse_dim_order(data_batch[1])
#         print(data_batch[0].shape)
#         print(data_batch[1].shape)
        # calculate the loss and take a gradient step
        epoch_loss += svi.step(data_batch) / len(data_batch)
    
    if (j % 100 == 0):
        print("Epoch {} loss: {} ".format(j, epoch_loss))

ValueError: Error while computing log_prob at site 'obs':
The value argument must be within the support
           Trace Shapes:              
            Param Sites:              
           Sample Sites:              
 module$$$h1.weight dist     | 400 200
                   value     | 400 200
                log_prob     |        
   module$$$h1.bias dist     | 400    
                   value     | 400    
                log_prob     |        
module$$$out.weight dist     |   1 400
                   value     |   1 400
                log_prob     |        
  module$$$out.bias dist     |   1    
                   value     |   1    
                log_prob     |        
                obs dist 128 | 128    
                   value     | 128    

In [105]:
num_samples = 10
def predict(x):
    sampled_models = [guide(None) for _ in range(num_samples)]
    yhats = [model(x).data for model in sampled_models]
    mean = torch.mean(torch.stack(yhats), 0)
    return np.argmax(mean.cpu().numpy(), axis=1)

In [107]:
print('Prediction when network is forced to predict')
correct = 0
total = 0
for j, data_batch in enumerate(val_dl):
    data_batch[0] = data_batch[0].reshape(-1, input_size)
    predicted = predict(data_batch[0])
    total += data_batch[1].size(0)
    correct += (predicted == data_batch[1]).sum().item()
print("accuracy: %d %%" % (100 * correct / total))

Prediction when network is forced to predict


TypeError: eq() received an invalid combination of arguments - got (numpy.ndarray), but expected one of:
 * (Tensor other)
      didn't match because some of the arguments have invalid types: ([31;1mnumpy.ndarray[0m)
 * (Number other)
      didn't match because some of the arguments have invalid types: ([31;1mnumpy.ndarray[0m)


In [88]:
preds = []
for i in range(100):
    for data_batch in val_dl:
        data_batch[0] = data_batch[0].reshape(-1, input_size)
        sampled_model = guide(data_batch[0])
        pred = sampled_model(data_batch[0]).data.cpu().numpy().flatten()
        preds.append(pred)

In [50]:
for name in pyro.get_param_store().get_all_param_names():
    print((name, pyro.param(name).data.cpu().numpy()))

('h1w_mu', array([[ 0.06061935,  0.78333384,  0.9648807 , ..., -0.49211693,
        -0.39323905, -1.3315772 ],
       [-1.8267834 , -0.58437175, -0.30872568, ...,  0.262584  ,
        -1.2409006 ,  0.607822  ],
       [ 1.800545  , -0.30165407,  1.1286403 , ...,  0.5021332 ,
         0.22980645,  0.49681905],
       ...,
       [-0.25249648, -1.8088641 , -0.08010844, ...,  2.4268255 ,
         0.35542777, -0.23396581],
       [ 2.1971266 , -0.93359876,  1.3790072 , ...,  0.73735726,
        -2.3852098 , -1.2463719 ],
       [-1.0418869 , -2.4348843 , -0.8035702 , ..., -1.7562288 ,
        -1.2917346 , -0.9114405 ]], dtype=float32))
('h1w_sigma', array([[-0.40544316, -2.3487933 , -0.93532467, ..., -0.28864723,
        -0.9305421 ,  0.1803772 ],
       [ 0.4386679 ,  1.2234645 ,  0.7307406 , ...,  1.6464808 ,
        -0.49710417,  0.9930913 ],
       [ 0.44662485,  0.22739658,  0.9912317 , ..., -0.3423796 ,
         0.9007679 , -0.35563305],
       ...,
       [ 0.04292707,  1.6384434 , 

### Results

In [33]:
train_range = l_train_pd.value_counts(normalize=True, sort=True, ascending=True).values
val_range = l_val_pd.value_counts(normalize=True, sort=True, ascending=True).values

In [34]:
display('      #0         #1')
display('train {}'.format(train_range))
display('val {}'.format(val_range))

'      #0         #1'

'train [0.46604215 0.53395785]'

'val [0.47188755 0.52811245]'

In [35]:
results['last']

{'loss': 0.0029391172548962965,
 'val_loss': 2.336294651031494,
 'acc': 1.0,
 'val_acc': 0.5015353121801432}

In [36]:
vt = val_tar[val_tar.size()[0]-pred_dir.size()[0]:]

In [37]:
pred_dir @ vt

tensor(0.3285, device='cuda:0')

In [38]:
(pred_conf * pred_dir) @ vt

tensor(0.3652, device='cuda:0')

In [39]:
vt.sum()

tensor(0.5334, device='cuda:0')

In [None]:
# def model(data):
#     w_tensor_shape, b_tensor_shape = (hidden_size, input_size), (hidden_size, )
#     h1w_prior = Normal(loc=torch.zeros(w_tensor_shape, device=dev), scale=torch.ones(w_tensor_shape, device=dev))
#     h1b_prior = Normal(loc=torch.zeros(b_tensor_shape, device=dev), scale=torch.ones(b_tensor_shape, device=dev))
#     w_tensor_shape, b_tensor_shape = (output_size, hidden_size), (output_size, )
#     outw_prior = Normal(loc=torch.zeros(w_tensor_shape, device=dev), scale=torch.ones(w_tensor_shape, device=dev))
#     outb_prior = Normal(loc=torch.zeros(b_tensor_shape, device=dev), scale=torch.ones(b_tensor_shape, device=dev))

#     h1w_prior = Normal(loc=torch.zeros_like(net.h1.weight, device=dev), scale=torch.ones_like(net.h1.weight, device=dev))
#     h1b_prior = Normal(loc=torch.zeros_like(net.h1.bias, device=dev), scale=torch.ones_like(net.h1.bias, device=dev))
#     outw_prior = Normal(loc=torch.zeros_like(net.out.weight, device=dev), scale=torch.ones_like(net.out.weight, device=dev))
#     outb_prior = Normal(loc=torch.zeros_like(net.out.bias, device=dev), scale=torch.ones_like(net.out.bias, device=dev))

#     priors = {
#                 'h1.weight': h1w_prior,
#                 'h1.bias': h1b_prior,
#                 'out.weight': outw_prior,
#                 'out.bias': outb_prior
#     }
#     print('model prior shape - e: {}, b: {}'.format(priors['h1.weight'].event_shape,  priors['h1.weight'].batch_shape))
    
#     lifted_module = pyro.random_module('module', net, priors)
#     lifted_model = lifted_module().to(dev)

#     with pyro.plate('plate', size=batch_size, device=dev) as smp:
#         pyro.sample('obs', Categorical(logits=lifted_model(data[0])), obs=data[1])

In [None]:
def guide(data):
    dist_batch_shape = (data[0].shape[-1],)

    # First layer weight distribution priors
    h1w_mu_param = pyro.param('h1w_mu', torch.randn_like(net.h1.weight, device=dev))
    h1w_sigma_param = softplus(pyro.param('h1w_sigma', torch.randn_like(net.h1.weight, device=dev)))
    h1w_prior = Normal(loc=h1w_mu_param, scale=h1w_sigma_param)

    # First layer bias distribution priors
    h1b_mu_param = pyro.param('h1b_mu', torch.randn_like(net.h1.bias, device=dev))
    h1b_sigma_param = softplus(pyro.param('h1b_sigma', torch.randn_like(net.h1.bias, device=dev)))
    h1b_prior = Normal(loc=h1b_mu_param, scale=h1b_sigma_param)

    # Output layer weight distribution priors
    outw_mu_param = pyro.param('outw_mu', torch.randn_like(net.out.weight, device=dev))
    outw_sigma_param = softplus(pyro.param('outw_sigma', torch.randn_like(net.out.weight, device=dev)))
    outw_prior = Normal(loc=outw_mu_param, scale=outw_sigma_param)

    # Output layer bias distribution priors
    outb_mu_param = pyro.param('outb_mu', torch.randn_like(net.out.bias, device=dev))
    outb_sigma_param = softplus(pyro.param('outb_sigma', torch.randn_like(net.out.bias, device=dev)))
    outb_prior = Normal(loc=outb_mu_param, scale=outb_sigma_param)

    priors = {
                'h1.weight': h1w_prior.to_event(event_dims),
                'h1.bias': h1b_prior.to_event(event_dims),
                'out.weight': outw_prior.to_event(event_dims),
                'out.bias': outb_prior.to_event(event_dims)
    }
    print('guide prior shape - e: {}, b: {}'.format(priors['h1.weight'].event_shape,  priors['h1.weight'].batch_shape))

    lifted_module = pyro.random_module('module', net, priors)
    return lifted_module()