# Modeling with Deep Inference Pipelines: Demo

### Preparations

In [1]:
import os
import sys
sys.path.append(os.path.abspath('../..'))
from nowoe.data.api.smartwatch.utilities.preparations import *
import pandas
import plotly_express as px
import numpy
import json
import pickle
import gzip
import matplotlib.pyplot as plt
from copy import deepcopy
from functools import reduce
from tqdm import tqdm
from datetime import datetime, timezone
from typing import Dict, List, Tuple, Union, Any, Iterator
import torch.utils.data.dataloader
from nowoe.data.api.smartwatch.utilities.timestamp import get_utc_date_from_utc_timestamp
from nowoe.data.api.smartwatch.data_manager.module import SmartwatchDataManager
from nowoe.deep_learning.data.dataset.smartwatch_study.single_slice import get_dataloaders, SmartwatchStudySingleSliceDataset, single_slice_collate_fn
from nowoe.deep_learning.data.preprocessing.single_slice.normalization import MinMaxSingleSliceNormalization, ZScoreSingleSliceNormalization
from nowoe.deep_learning.data.tensorizer.single_slice import CustomTensorizer
from nowoe.deep_learning.data.augmentation.single_slice.sample_from_distribution.gaussian_mixtures import GaussianMixturesSingleSliceAugmentation
from nowoe.deep_learning.pipeline.model.slice import LateFusedSeparateRNNSliceModel

Getting the dataloaders:

In [2]:
dataloaders = get_dataloaders(
    batch_size=50,
    root_dir='../../resources/warrior_wellness/Analysis/local_repo/',
    subject_splits={
        "train": [f'SWS_{i:02d}' for i in range(0,10)],
        "test": [f'SWS_{i:02d}' for i in range(10,15)]},
    dataset_config={
        'slice_lengths': [3600],
        'slice_time_step': (5 * 60),
        'label_milestone_per_window': 1.0,
        'metadata_cache_filepath': './dataset_cache/dataset-cache-2.pkl.gz',
        'no_cache': False,
        'parallel_threads': 10
    },
    sampler_configs=dict(
       train=dict(
           negative_sample_count=100,
            positive_sample_count=50,
            target_variable='overall_quantized_stress_value',
           split_name="train"
       ),
       test=dict(
               negative_sample_count=100,
            positive_sample_count=50,
            target_variable='overall_quantized_stress_value',
           split_name="test"
       )
        )
)

2022-02-26 11:01:27,637 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - initializing data manager...
2022-02-26 11:04:02,725 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - preparing the dataset...
2022-02-26 11:04:03,005 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.dataset - INFO - Loaded metadata from cache file: /home/shayan/phoenix/nowoe_framework/nowoe_framework/notebooks/smartwatch/dataset_cache/dataset-cache-2.pkl.gz
2022-02-26 11:04:03,006 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - preparing samplers...


Preparing a min-max normalizer:

In [3]:
normalizer = MinMaxSingleSliceNormalization(
    feature_names_per_data_source=dict(
        daily=[
            'heart_rate_tsvalue', 
            'distanceInMeters', 
            'floorsClimbed', 
            'bmrKilocalories', 
            'durationInSeconds',
            'activeTimeInSeconds', 
            'activityStressDurationInSeconds',
            'minHeartRateInBeatsPerMinute',
            'stressDurationInSeconds',
            'highStressDurationInSeconds',
            'maxStressLevel',
            'averageHeartRateInBeatsPerMinute',
        ],
        pulseOx=[
            'durationInSeconds',
            'spo2_tsvalue'
        ],
        respiration=[
           'durationInSeconds',
           'epoch_to_breath_tsvalue'
        ],
        stress=[
            'durationInSeconds',
            'stress_level_tsvalue'
        ]
    )
)
normalizer.learn_normalizers(dataloaders['train'])

2022-02-26 11:04:03,162 - nowoe.deep_learning.data.preprocessing.single_slice.normalization.base - INFO - [learning normalizers] sampling from the given dataloader...
100%|███████████████████████████████████████████| 16/16 [00:07<00:00,  2.21it/s]
2022-02-26 11:04:10,395 - nowoe.deep_learning.data.preprocessing.single_slice.normalization.base - INFO - [learning distributions] fitting models (of class <class 'sklearn.preprocessing._data.MinMaxScaler'>) to the distributions...
2022-02-26 11:04:10,403 - nowoe.deep_learning.data.preprocessing.single_slice.normalization.base - INFO - [learning distributions] completed.


Preparing the tensorizer:

In [4]:
tensorizer = CustomTensorizer(
       config=dict(
            timestamp_column='utc_timestamp',
           value_config=dict(
               daily=dict(
                    embed=dict(
                        columns=['activityType'],
                        embedding_dim=[8],
                    ),
                    bring=[
                        'durationInSeconds',
                        'heart_rate_tsvalue', 
                        'distanceInMeters', 
                        'floorsClimbed', 
                        'bmrKilocalories', 
                        'activeTimeInSeconds', 
                        'activityStressDurationInSeconds',
                        'minHeartRateInBeatsPerMinute',
                        'stressDurationInSeconds',
                        'highStressDurationInSeconds',
                        'maxStressLevel',
                        'averageHeartRateInBeatsPerMinute',
                    ]
               ),
               pulseOx=dict(
                   bring=[
                       'durationInSeconds',
                       'spo2_tsvalue'
                   ]
               ),
               respiration=dict(
                   bring=[
                       'durationInSeconds',
                       'epoch_to_breath_tsvalue'
                   ]
               ),
               stress=dict(
                   bring=[
                       'durationInSeconds',
                       'stress_level_tsvalue'
                   ]
               )
           )
       )
)
tensorizer.learn_embeddings(dataloaders['train'])
tensorizer.build_embeddings()

100%|███████████████████████████████████████████| 16/16 [00:06<00:00,  2.51it/s]


In [23]:
def count_iter(dl):
    c = 0
    for _ in tqdm(dl):
        c += 1
    print(c)

class A:
    def __init__(self):
        pass
    
    def a(self, dl):
        count_iter(dl)

Getting the labels_layout:

In [5]:
labels_layout = set()
for b in tqdm(dataloaders['train']):
    labels_layout = labels_layout.union([e['overall_quantized_stress_value'] for e in b['meta']])
labels_layout = sorted(list(labels_layout))

100%|███████████████████████████████████████████| 16/16 [00:06<00:00,  2.48it/s]


## Demo: Late Fusion Recurrent Neural Network - A Single Slice Model

The following configurations allow us to effectively define an instance of `LateFusedSeparateRNNSliceModel`, which is the model-family for single-slice models allowing the specific definition of a recurrent network for processing each data source separately:

In [6]:
model = LateFusedSeparateRNNSliceModel(
    config=dict(
        branches=dict(
            daily=dict(
                rnn_model="LSTM",
                rnn_args=dict(
                    input_size=20,
                    hidden_size=32,
                    bidirectional=True,
                    batch_first=True,
                    bias=False,
                    dropout=0.2
                ), #torch args
                project_args=dict(
                    input_dim=64,
                    projection_dim=32
                ),  # will be projected to this dimension if not None.
            ),
            respiration=dict(
                rnn_model="LSTM",
                rnn_args=dict(
                    input_size=2,
                    hidden_size=4,
                    bidirectional=True,
                    batch_first=True,
                    bias=False,
                    dropout=0.2
                ), #torch args
                project_args=dict(
                    input_dim=8,
                    projection_dim=4
                ),  # will be projected to this dimension if not None.
            ),
            pulseOx=dict(
                rnn_model="LSTM",
                rnn_args=dict(
                    input_size=2,
                    hidden_size=4,
                    bidirectional=True,
                    batch_first=True,
                    bias=False,
                    dropout=0.2
                ), #torch args
                project_args=dict(
                    input_dim=8,
                    projection_dim=4
                ),  # will be projected to this dimension if not None.
            ),
            stress=dict(
                rnn_model="LSTM",
                rnn_args=dict(
                    input_size=2,
                    hidden_size=4,
                    bidirectional=True,
                    batch_first=True,
                    bias=False,
                    dropout=0.2
                ), #torch args
                project_args=dict(
                    input_dim=8,
                    projection_dim=4
                ),  # will be projected to this dimension if not None.
            )
        ),
        aggregation=dict(
            method="concatenate", # options are `mean` (this means all the branch reps have to be the same), `concatenate`
            project_args=dict(
                input_dim=44,
                projection_dim=50), # the output of the given `method` will be projected to it (if not None).
        ),
        task=dict(
            target_in_meta='overall_quantized_stress_value',
            task_type='classification',
            label_layout=[0.0,
                          0.2571428571428571,
                          0.5142857142857142,
                          0.7714285714285714,
                          1.0285714285714285,
                          1.2857142857142856,
                          1.5428571428571427,
                          1.7999999999999998,
                          2.057142857142857,
                          2.314285714285714,
                          2.571428571428571,
                          2.8285714285714283,
                          3.0857142857142854,
                          3.3428571428571425,
                          3.5999999999999996],
            loss_class='CrossEntropyLoss',
            loss_args=dict(),
        )
    )
)

  "num_layers={}".format(dropout, num_layers))


# Single batch demonstration

Now let's read, process, and make inferences using the batch:

In [7]:
my_iter = iter(dataloaders['train'])
batch = next(my_iter)

In [8]:
processed_batch = tensorizer(normalizer(batch))
# packed_batch = pack_single_slice_batch_for_rnn(processed_batch, processed_batch['slice'][0].keys())

In [9]:
out = model(processed_batch, mode='train')
print(out.keys())

dict_keys(['model_outputs', 'loss_outputs'])


In [10]:
out = model(processed_batch, mode='test')
print(out.keys())

dict_keys(['model_outputs', 'loss_outputs'])


In [11]:
out['model_outputs'].keys()

dict_keys(['latent_representations', 'logits', 'y_hat', 'loss_eval'])

In [12]:
out['model_outputs']['y_hat']

array([11,  6, 11,  6, 11, 11, 11, 11, 11, 11,  7, 11, 11, 11,  7, 11, 11,
       11, 11, 11, 11, 11,  7, 11, 11, 11,  7, 11, 11,  6, 11,  7, 11, 11,
       11,  7, 11,  7, 11, 11, 11, 11, 11, 11,  7, 11, 11, 11, 11,  7])

In [13]:
for batch in tqdm(dataloaders['train']):
    processed_batch = tensorizer(normalizer(batch))
    out = model(processed_batch, mode='train')

100%|███████████████████████████████████████████| 16/16 [00:11<00:00,  1.38it/s]


In [14]:
for batch in tqdm(dataloaders['test']):
    processed_batch = tensorizer(normalizer(batch))
    out = model(processed_batch, mode='test')

100%|█████████████████████████████████████████████| 9/9 [00:09<00:00,  1.03s/it]
