# Pipelining: Demo

In [1]:
import os
import sys
sys.path.append(os.path.abspath('../..'))
from nowoe.data.api.smartwatch.utilities.preparations import *
import pandas
import plotly_express as px
import numpy
import json
import pickle
import gzip
import matplotlib.pyplot as plt
from copy import deepcopy
from functools import reduce
from tqdm import tqdm
from datetime import datetime, timezone
from typing import Dict, List, Tuple, Union, Any, Iterator
import torch.utils.data.dataloader
from nowoe.data.api.smartwatch.utilities.timestamp import get_utc_date_from_utc_timestamp
from nowoe.data.api.smartwatch.data_manager.module import SmartwatchDataManager
from nowoe.deep_learning.data.dataset.smartwatch_study.single_slice import get_dataloaders, SmartwatchStudySingleSliceDataset, single_slice_collate_fn
from nowoe.deep_learning.data.preprocessing.single_slice.normalization import MinMaxSingleSliceNormalization, ZScoreSingleSliceNormalization
from nowoe.deep_learning.data.tensorizer.single_slice import CustomTensorizer
from nowoe.deep_learning.data.augmentation.single_slice.sample_from_distribution.gaussian_mixtures import GaussianMixturesSingleSliceAugmentation

In [2]:
dataloaders = get_dataloaders(
    batch_size=50,
    root_dir='../../resources/warrior_wellness/Analysis/local_repo/',
    subject_splits={
        "train": [f'SWS_{i:02d}' for i in range(0,10)],
        "test": [f'SWS_{i:02d}' for i in range(10,15)]},
    dataset_config={
        'slice_lengths': [3600],
        'slice_time_step': (5 * 60),
        'label_milestone_per_window': 1.0,
        'metadata_cache_filepath': './dataset_cache/dataset-cache-2.pkl.gz',
        'no_cache': False,
        'parallel_threads': 10
    },
    sampler_configs=dict(
       train=dict(
           negative_sample_count=100,
            positive_sample_count=50,
            target_variable='overall_quantized_stress_value',
           split_name="train"
       ),
       test=dict(
               negative_sample_count=100,
            positive_sample_count=50,
            target_variable='overall_quantized_stress_value',
           split_name="test"
       )
        )
)

2022-02-25 15:56:26,399 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - initializing data manager...
2022-02-25 15:59:01,001 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - preparing the dataset...
2022-02-25 15:59:01,294 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.dataset - INFO - Loaded metadata from cache file: /home/shayan/phoenix/nowoe_framework/nowoe_framework/notebooks/smartwatch/dataset_cache/dataset-cache-2.pkl.gz
2022-02-25 15:59:01,295 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - preparing samplers...


In [3]:
augmentation = GaussianMixturesSingleSliceAugmentation(
    feature_names_per_data_source=dict(
        daily=[
            'heart_rate_tsvalue', 
            'distanceInMeters', 
            'floorsClimbed', 
            'bmrKilocalories', 
            'durationInSeconds',
            'activeTimeInSeconds', 
            'activityStressDurationInSeconds',
            'minHeartRateInBeatsPerMinute',
            'stressDurationInSeconds',
            'highStressDurationInSeconds',
            'maxStressLevel',
            'averageHeartRateInBeatsPerMinute',
        ],
        pulseOx=[
            'durationInSeconds',
            'spo2_tsvalue'
        ],
        respiration=[
           'durationInSeconds',
           'epoch_to_breath_tsvalue'
        ],
        stress=[
            'durationInSeconds',
            'stress_level_tsvalue'
        ]
    ),
        gmm_config=dict(
        n_components=2,
        covariance_type='full'
    )
)

In [4]:
normalizer = MinMaxSingleSliceNormalization(
    feature_names_per_data_source=dict(
        daily=[
            'heart_rate_tsvalue', 
            'distanceInMeters', 
            'floorsClimbed', 
            'bmrKilocalories', 
            'durationInSeconds',
            'activeTimeInSeconds', 
            'activityStressDurationInSeconds',
            'minHeartRateInBeatsPerMinute',
            'stressDurationInSeconds',
            'highStressDurationInSeconds',
            'maxStressLevel',
            'averageHeartRateInBeatsPerMinute',
        ],
        pulseOx=[
            'durationInSeconds',
            'spo2_tsvalue'
        ],
        respiration=[
           'durationInSeconds',
           'epoch_to_breath_tsvalue'
        ],
        stress=[
            'durationInSeconds',
            'stress_level_tsvalue'
        ]
    )
)
normalizer.learn_normalizers(dataloaders['train'])

2022-02-25 15:59:01,465 - nowoe.deep_learning.data.preprocessing.single_slice.normalization.base - INFO - [learning normalizers] sampling from the given dataloader...
100%|███████████████████████████████████████████| 16/16 [00:07<00:00,  2.23it/s]
2022-02-25 15:59:08,651 - nowoe.deep_learning.data.preprocessing.single_slice.normalization.base - INFO - [learning distributions] fitting models (of class <class 'sklearn.preprocessing._data.MinMaxScaler'>) to the distributions...
2022-02-25 15:59:08,660 - nowoe.deep_learning.data.preprocessing.single_slice.normalization.base - INFO - [learning distributions] completed.


In [16]:
tensorizer = CustomTensorizer(
       config=dict(
            timestamp_column='utc_timestamp',
           value_config=dict(
               daily=dict(
                    embed=dict(
                        columns=['activityType'],
                        embedding_dim=[8],
                    ),
                    bring=[
                        'durationInSeconds',
                        'heart_rate_tsvalue', 
                        'distanceInMeters', 
                        'floorsClimbed', 
                        'bmrKilocalories', 
                        'activeTimeInSeconds', 
                        'activityStressDurationInSeconds',
                        'minHeartRateInBeatsPerMinute',
                        'stressDurationInSeconds',
                        'highStressDurationInSeconds',
                        'maxStressLevel',
                        'averageHeartRateInBeatsPerMinute',
                    ]
               ),
               pulseOx=dict(
                   bring=[
                       'durationInSeconds',
                       'spo2_tsvalue'
                   ]
               ),
               respiration=dict(
                   bring=[
                       'durationInSeconds',
                       'epoch_to_breath_tsvalue'
                   ]
               ),
               stress=dict(
                   bring=[
                       'durationInSeconds',
                       'stress_level_tsvalue'
                   ]
               )
           )
       )
)
tensorizer.learn_embeddings(dataloaders['train'])
tensorizer.build_embeddings()

100%|███████████████████████████████████████████| 16/16 [00:06<00:00,  2.44it/s]


In [17]:
processed_batch = tensorizer(normalizer(batch))

In [18]:
processed_batch['slice'][0]['daily']

tensor([[0.5569, 0.3214, 0.5275,  ..., 0.9201, 0.7500, 0.6604],
        [0.5569, 0.3286, 0.5275,  ..., 0.9201, 0.7500, 0.6604],
        [0.5569, 0.3286, 0.5275,  ..., 0.9201, 0.7500, 0.6604],
        ...,
        [0.5569, 0.2929, 0.5275,  ..., 0.9201, 0.7500, 0.6604],
        [0.5569, 0.2929, 0.5275,  ..., 0.9201, 0.7500, 0.6604],
        [0.5569, 0.2929, 0.5275,  ..., 0.9201, 0.7500, 0.6604]],
       dtype=torch.float64, grad_fn=<CatBackward>)

In [21]:
processed_batch['timestamp'][0]['daily'].long()

tensor([1630765800, 1630765815, 1630765830, 1630765845, 1630765860, 1630765875,
        1630765890, 1630765905, 1630765920, 1630765935, 1630765950, 1630765965,
        1630765980, 1630765995, 1630766010, 1630766025, 1630766040, 1630766055,
        1630766070, 1630766085, 1630766100, 1630766115, 1630766130, 1630766145,
        1630766160, 1630766175, 1630766190, 1630766205, 1630766220, 1630766235,
        1630766250, 1630766265, 1630766280, 1630766295, 1630766310, 1630766325,
        1630766340, 1630766355, 1630766370, 1630766385, 1630766400, 1630766415,
        1630766430, 1630766445, 1630766460, 1630766475, 1630766490, 1630766505,
        1630766520, 1630766535, 1630766550, 1630766565, 1630766580, 1630766595,
        1630766610, 1630766625, 1630766640, 1630766655, 1630766670, 1630766685,
        1630766700, 1630766715, 1630766730, 1630766745, 1630766760, 1630766775,
        1630766790, 1630766805, 1630766820, 1630766835, 1630766850, 1630766865,
        1630766880, 1630766895, 16307669

In [19]:
batch['slice'][2]['daily']

Unnamed: 0,utc_timestamp,heart_rate_tsvalue,distanceInMeters,calendarDate,startTimeInSeconds,mediumStressDurationInSeconds,activityStressDurationInSeconds,durationInSeconds,user_id,lowStressDurationInSeconds,...,activeKilocalories,highStressDurationInSeconds,stressDurationInSeconds,minHeartRateInBeatsPerMinute,intensityDurationGoalInSeconds,bmrKilocalories,stressQualifier,activeTimeInSeconds,averageHeartRateInBeatsPerMinute,utc_date
60235,1623406620,61,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 10:17:00+00:00
60236,1623406635,64,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 10:17:15+00:00
60237,1623406650,64,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 10:17:30+00:00
60238,1623406665,64,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 10:17:45+00:00
60239,1623406680,64,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 10:18:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60470,1623410145,83,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 11:15:45+00:00
60471,1623410160,83,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 11:16:00+00:00
60472,1623410175,66,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 11:16:15+00:00
60473,1623410190,66,6863.0,2021-06-11,1623387600,600,8820,48300,SWS_12,2220,...,433,60,2880,47,9000,935,unknown,3865,68,2021-06-11 11:16:30+00:00


In [None]:
processed_batch['slice'][2]['daily']

In [6]:
my_iter = iter(dataloaders['test'])

In [7]:
batch = next(my_iter)

In [None]:
batch['slice'][4]['pulseOx'].iloc[1]

In [None]:
tensorized_batch = tensorizer(batch)

In [None]:
batch['slice'][0]['daily'].shape

In [None]:
tensorized_batch['slice'][0]['daily']['value'].shape

In [None]:
tensorized_batch['slice'][0]['daily']['timestamp'][0].long()