In [1]:
import os
import sys
sys.path.append(os.path.abspath('../..'))
from nowoe.data.api.smartwatch.utilities.preparations import *
import pandas
import plotly_express as px
import numpy
import json
import pickle
import gzip
import matplotlib.pyplot as plt
from copy import deepcopy
from functools import reduce
from tqdm import tqdm
from datetime import datetime, timezone
from typing import Dict, List, Tuple, Union, Any, Iterator
import torch.utils.data.dataloader
from nowoe.data.api.smartwatch.utilities.timestamp import get_utc_date_from_utc_timestamp
from nowoe.data.api.smartwatch.data_manager.module import SmartwatchDataManager
from nowoe.deep_learning.data.dataset.smartwatch_study.single_slice import get_dataloaders, SmartwatchStudySingleSliceDataset, single_slice_collate_fn

In [2]:
dataloaders = get_dataloaders(
    batch_size=50,
    root_dir='../../resources/warrior_wellness/Analysis/local_repo/',
    subject_splits={
        "train": [f'SWS_{i:02d}' for i in range(0,10)],
        "test": [f'SWS_{i:02d}' for i in range(10,15)]},
    dataset_config={
        'slice_lengths': [3600],
        'slice_time_step': (5 * 60),
        'label_milestone_per_window': 1.0,
        'metadata_cache_filepath': './dataset_cache/dataset-cache-2.pkl.gz',
        'no_cache': False,
        'parallel_threads': 10
    },
    sampler_configs=dict(
       train=dict(
           negative_sample_count=1000,
            positive_sample_count=500,
            target_variable='overall_quantized_stress_value',
           split_name="train"
       ),
       test=dict(
               negative_sample_count=200,
            positive_sample_count=100,
            target_variable='overall_quantized_stress_value',
           split_name="test"
       )
        )
)

2022-02-22 07:39:38,483 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - initializing data manager...
2022-02-22 07:42:10,578 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - preparing the dataset...
2022-02-22 07:42:10,847 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.dataset - INFO - Loaded metadata from cache file: /home/shayan/phoenix/nowoe_framework/nowoe_framework/notebooks/smartwatch/dataset_cache/dataset-cache-2.pkl.gz
2022-02-22 07:42:10,848 - nowoe.deep_learning.data.dataset.smartwatch_study.single_slice.interface - INFO - preparing samplers...


In [3]:
my_iter = iter(dataloaders['test'])

In [4]:
overall_quantized_stress_value_layout = list(set([e['overall_quantized_stress_value'] for e in dataloaders['train'].dataset.metadata]))

In [5]:
sighted_labels = []
for batch in tqdm(dataloaders['test']):
    sighted_labels += [overall_quantized_stress_value_layout.index(e['overall_quantized_stress_value']) for e in batch['meta']]
sighted_labels = numpy.array(sighted_labels)

100%|███████████████████████████████████████████| 18/18 [00:12<00:00,  1.43it/s]


In [14]:
slice_features = {
    'respiration': ['epoch_to_breath_tsvalue'],
    'daily': ['heart_rate_tsvalue'],
    'pulseOx': ['spo2_tsvalue'],
    'stress': ['stress_level_tsvalue']
}

In [16]:
from collections import defaultdict
distributions = defaultdict(lambda: defaultdict(lambda: []))

for batch in tqdm(dataloaders['train']):
    for data_source, features_per_ds in slice_features.items():
        for featurename in features_per_ds:
            distributions[data_source][featurename].append(numpy.concatenate([e[data_source][featurename].to_numpy() for e in batch['slice']], axis=0))
            
for data_source in distributions:
    for featurename in distributions[data_source]:
        distributions[data_source][featurename] = numpy.concatenate(distributions[data_source][featurename], axis=0)

100%|█████████████████████████████████████████| 160/160 [01:02<00:00,  2.54it/s]


NameError: name 'distribution' is not defined

In [18]:
from sklearn.mixture import GaussianMixture

In [40]:
distr_models = defaultdict(lambda: dict())

for data_source in distributions:
    for featurename in distributions[data_source]:
        distr_models[data_source][featurename] = GaussianMixture(n_components=2, random_state=None).fit(distributions[data_source][featurename].reshape(-1, 1))

In [78]:
distr_models['daily']['heart_rate_tsvalue'].sample(1)[0].item()

86.45986422487002

Number of items per label index:

In [93]:
for item_index in range(len(batch['slice'])):
    timestamps = batch['slice'][item_index]['daily'].utc_timestamp.to_numpy()
    if timestamps.shape[0] == 0:
        continue
    min_timestamp = timestamps.min()
    max_timestamp = timestamps.max()
    timestamps_left_blank = numpy.arange(min_timestamp, max_timestamp + 1)
    timestamps_left_blank = timestamps_left_blank[(timestamps - min_timestamp).astype('int')].tolist()
    
    column_names = batch['slice'][item_index]['daily'].columns.tolist()
    row = {x: None for x in column_names}
    row['heart_rate_tsvalue'] = distr_models['daily']['heart_rate_tsvalue'].sample(1)[0].item()
    row['utc_timestamp'] = numpy.random.choice(timestamps_left_blank)
    batch['slice'][item_index]['daily'].append([row, row], ignore_index=True)
    batch['slice'][item_index]['daily'].sort_values(by='utc_timestamp', inplace=True)
    batch['slice'][item_index]['daily'] = batch['slice'][item_index]['daily'].ffill().bfill()

In [94]:
batch['slice'][item_index]['daily']

Unnamed: 0,utc_timestamp,heart_rate_tsvalue,floorsClimbed,steps,averageStressLevel,maxHeartRateInBeatsPerMinute,startTimeInSeconds,activeTimeInSeconds,calendarDate,distanceInMeters,...,lowStressDurationInSeconds,bmrKilocalories,summaryId,stepsGoal,startTimeOffsetInSeconds,highStressDurationInSeconds,averageHeartRateInBeatsPerMinute,stressQualifier,vigorousIntensityDurationInSeconds,utc_date
20651,1.615479e+09,79.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 16:05:00+00:00
20652,1.615479e+09,81.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 16:05:15+00:00
20653,1.615479e+09,81.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 16:05:30+00:00
20654,1.615479e+09,81.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 16:05:45+00:00
20655,1.615479e+09,81.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 16:06:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20886,1.615482e+09,59.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 17:03:45+00:00
20887,1.615482e+09,59.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 17:04:00+00:00
20888,1.615482e+09,59.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 17:04:15+00:00
20889,1.615482e+09,59.0,12.0,3965.0,24.0,103.0,1.615439e+09,1818.0,2021-03-11,3446.0,...,16020.0,2087.0,x38beb34-6049a3d0-11940-6,6310.0,-18000.0,240.0,65.0,balanced,0.0,2021-03-11 17:04:30+00:00
