In [1]:
import os
import sys
sys.path.append(os.path.abspath('../..'))
from nowoe.data.api.smartwatch.utilities.preparations import *
import pandas
import plotly_express as px
import numpy
import json
import pickle
import gzip
import matplotlib.pyplot as plt
from copy import deepcopy
from functools import reduce
from tqdm import tqdm
from datetime import datetime, timezone
from typing import Dict, List, Tuple, Union, Any
import torch.utils.data.dataloader
from nowoe.data.api.smartwatch.utilities.timestamp import get_utc_date_from_utc_timestamp
from nowoe.data.api.smartwatch.data_manager.module import SmartwatchDataManager
from nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice import SmartwatchStudySingleSliceDataset, single_slice_collate_fn

Building the data manager:

In [2]:
%%time
root = '../../resources/warrior_wellness/Analysis/local_repo/'
data_manager = SmartwatchDataManager(
    root_dir = root,
    subject_id_list=[f'SWS_{i:02d}' for i in range(0,5)]
)

CPU times: user 38.5 s, sys: 422 ms, total: 39 s
Wall time: 39 s


In [3]:
data_manager.get_existing_subject_id_list()

['SWS_01', 'SWS_02', 'SWS_03', 'SWS_04']

In [4]:
data_manager.get_utc_timestamp_range_for_subject(subject_id='SWS_02')

[1614816000.0, 1615754760.0]

Building the dataset (PyTorch)

In [5]:
%%time
cache_file = 'train-dataset-cache-test.pkl.gz'
# os.system(f'rm {cache_file}')
dataset = SmartwatchStudySingleSliceDataset(
    data_manager=data_manager,
    slice_lengths=[3600],
    slice_time_step=(15*60),
    label_milestone_per_window=1.0,
    metadata_cache_filepath=cache_file,
    no_cache=False
)

2022-02-18 09:38:32,879 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - 
Building dataset metadata...

2022-02-18 09:38:32,880 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing subjects:
  0%|                                                     | 0/4 [00:00<?, ?it/s]2022-02-18 09:38:32,883 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing subject_id: SWS_01
2022-02-18 09:38:32,908 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing slice_length_in_seconds: 3600.0




remaining timespan: 3960.0 seconds == 1.10 hours            

 25%|███████████▎                                 | 1/4 [00:51<02:34, 51.35s/it]2022-02-18 09:39:24,234 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing subject_id: SWS_02
2022-02-18 09:39:24,251 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing slice_length_in_seconds: 3600.0




remaining timespan: 5460.0 seconds == 1.52 hours           

 50%|██████████████████████▌                      | 2/4 [01:22<01:19, 39.58s/it]2022-02-18 09:39:55,568 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing subject_id: SWS_03
2022-02-18 09:39:55,596 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing slice_length_in_seconds: 3600.0




remaining timespan: 6120.0 seconds == 1.70 hours            

 75%|█████████████████████████████████▊           | 3/4 [02:17<00:46, 46.52s/it]2022-02-18 09:40:50,354 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing subject_id: SWS_04
2022-02-18 09:40:50,394 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - processing slice_length_in_seconds: 3600.0




remaining timespan: 4320.0 seconds == 1.20 hours            

100%|█████████████████████████████████████████████| 4/4 [03:42<00:00, 55.58s/it]
2022-02-18 09:42:15,218 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - 
Dataset metadata built.

2022-02-18 09:42:15,218 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - 
Caching dataset metadata...

2022-02-18 09:42:15,471 - nowoe.deep_learning.data.dataset.datasets.smartwatch_study.single_slice - INFO - 
Cached it. File is stored at f/home/shayan/phoenix/nowoe_framework/nowoe_framework/notebooks/smartwatch/train-dataset-cache-test.pkl.gz



CPU times: user 3min 43s, sys: 263 ms, total: 3min 43s
Wall time: 3min 42s


In [7]:
dataset.datapoint_counts

{'total': 3767,
 'overall/bin-0/0.00-0.26': 3521,
 'overall/bin-1/0.26-0.51': 62,
 'overall/bin-2/0.51-0.77': 53,
 'overall/bin-3/0.77-1.03': 64,
 'overall/bin-4/1.03-1.29': 11,
 'overall/bin-5/1.29-1.54': 7,
 'overall/bin-6/1.54-1.80': 13,
 'overall/bin-7/1.80-2.06': 15,
 'overall/bin-8/2.06-2.31': 0,
 'overall/bin-9/2.31-2.57': 2,
 'overall/bin-10/2.57-2.83': 8,
 'overall/bin-11/2.83-3.09': 3,
 'overall/bin-12/3.09-3.34': 2,
 'overall/bin-13/3.34-3.60': 3,
 'overall/bin-14/3.60-3.86': 3,
 'overall/bin-15/3.86-4.11': 0,
 'overall/bin-16/4.11-4.37': 0,
 'overall/bin-17/4.37-4.63': 0,
 'overall/bin-18/4.63-4.89': 0,
 'overall/bin-19/4.89-5.14': 0,
 'overall/bin-20/5.14-5.40': 0,
 'overall/bin-21/5.40-5.66': 0,
 'overall/bin-22/5.66-5.91': 0,
 'overall/bin-23/5.91-6.17': 0,
 'overall/bin-24/6.17-6.43': 0,
 'overall/bin-25/6.43-6.69': 0,
 'overall/bin-26/6.69-6.94': 0,
 'overall/bin-27/6.94-7.20': 0,
 'overall/bin-28/7.20-7.46': 0,
 'overall/bin-29/7.46-7.71': 0,
 'overall/bin-30/7.71-7.9

In [None]:
class BalancedStressLevelSampler(torch.utils.data.sampler.Sampler):
    def __init__(self, metadata, datapoint_counts):
        """
        We want to build the probability distribution of retaining an element from each one of the bins.
        
        Thus, we will follow the approach below:
        """
        super(BalancedStressLevelSampler, self).__init__()
        self.metadata = metadata
        self.datapoint_counts = datapoint_counts
        self.prepare_binner(datapoint_counts)
        self.prepare_bin_retain_probabilities()
        
    def prepare_bin_retain_probabilities(self):
        
        
    def prepare_binner(self, ):
        bins_per_category = dict()
        for stress_bin in [for e in self.datapoint_counts.keys() if not e == 'total']:
            stress_category, bin_range = stress_bin.split('/') 
            bin_low, bin_high = [float(e) for e in bin_range.split('-')]
            bins_per_category[stress_bin] = (bin_low, bin_high)
        
        def binner(stress_category: str, value: float):
            for k in [e for e in bins_per_category.keys() if e.lower().startswith(stress_category.lower())]:
                if value >= bins_per_category[k][0] and value < bins_per_category[k][1]:
                    return k
            raise Exception(f"unknown bin - was not able to match value {value:.4f} for category {stress_category}")
        self.binner = binner
        
    def __iter__(self) -> Iterator[int]:
        for i, meta in self.metadata:
            
        
    def __len__(self) -> int:
        return 

In [None]:
import numpy as np
import math

a = np.array([11,75])
# calculate histogram values


In [None]:
x=2.3
vals, bins = numpy.histogram(np.array([x]), bins = numpy.arange(0, 3.2, 0.2))
numpy.nonzero(vals)[0][0]

In [None]:
numpy.arange(0, 3.2, 0.2)[11]

In [8]:
dataset_iter = iter(dataset)

In [9]:
item = next(dataset_iter)

In [10]:
item['meta']

{'subject_id': 'SWS_01',
 'utc_timestamp_window': (1614924000.0, 1614927600.0),
 'overall_stress_value': 0.0,
 'general_stress_value': 0.0,
 'interpersonal_stress_value': 0.0,
 'utc_timestamp_for_stress_query': 1614927600.0,
 'induced_stress_value': 0.0,
 'general_quantized_stress_value': 0.0,
 'induced_quantized_stress_value': 0.0,
 'interpersonal_quantized_stress_value': 0.0,
 'overall_quantized_stress_value': 0.0}

In [12]:
item['slice'].keys()

dict_keys(['daily', 'respiration', 'stress', 'pulseOx'])

In [13]:
item['slice']['daily'].head()

Unnamed: 0,utc_timestamp,heart_rate_tsvalue,floorsClimbed,maxHeartRateInBeatsPerMinute,averageStressLevel,user_id,averageHeartRateInBeatsPerMinute,calendarDate,distanceInMeters,stressDurationInSeconds,...,steps,intensityDurationGoalInSeconds,floorsClimbedGoal,summaryId,mediumStressDurationInSeconds,activeTimeInSeconds,stressQualifier,startTimeInSeconds,stepsGoal,utc_date
17268,1614924000,56,34,181,24,SWS_01,72,2021-03-05,14234.0,18900,...,15521,9000,10,x38a008e-6041bad0-f99c-6,8040,7088,balanced,1614920400,7480,2021-03-05 06:00:00+00:00
17269,1614924000,56,11,116,21,SWS_01,65,2021-03-05,3301.0,11580,...,4267,9000,10,x38a008e-6041bad0-c06c-6,5700,2960,unknown,1614920400,7480,2021-03-05 06:00:00+00:00
17270,1614924000,56,11,116,21,SWS_01,65,2021-03-05,3301.0,11580,...,4267,9000,10,x38a008e-6041bad0-c06c-6,5700,2960,unknown,1614920400,7480,2021-03-05 06:00:00+00:00
17271,1614924000,56,11,116,21,SWS_01,65,2021-03-05,3301.0,11520,...,4267,9000,10,x38a008e-6041bad0-c030-6,5640,2960,unknown,1614920400,7480,2021-03-05 06:00:00+00:00
17272,1614924000,56,35,181,28,SWS_01,74,2021-03-05,15196.0,27780,...,16765,9000,10,x38a008e-6041bad0-15180-6,12900,7884,balanced,1614920400,7480,2021-03-05 06:00:00+00:00


In [17]:
import numpy as np
np.digitize(np.array([2.5, 0.1, 1.9, 2., 3.]), bins=[0, 2, 3])

array([2, 1, 1, 2, 3])

{2}

In [1]:
a = set()
a.add(2)


In [None]:
meta_distributions = dict()
for k in item['meta'].keys():
    meta_distributions[k] = [e[k] for e in dataset.metadata]
meta_distributions = pandas.DataFrame(meta_distributions)
meta_distributions.head()

In [None]:
fig = px.violin(meta_distributions, y='overall_stress_value', box=True)
fig.show()

We need to do a 'binning', and we need to subsample accordingly by writing a sampler.

In [None]:
%%time
c = 0
for item_meta in tqdm(dataset.metadata):
    if item_meta['overall_stress_value'] > 0.5:
        c += 1
print(c)

In [None]:
item['meta']

In [None]:
item['slice'].keys()

In [None]:
item['slice']['stress']

In [None]:
from collections import defaultdict
from typing import List
import copy
import functools
from nowoe.data.api.smartwatch.utilities.timestamp import get_utc_date_from_utc_timestamp, \
    get_utc_timestamp_from_naive_datetime
d = defaultdict(lambda: [])
d['a']

In [None]:
d.keys()

In [27]:
df = pandas.DataFrame(dataset.data_manager.json_contents['daily'])
df = df[df.user_id == 'SWS_01']

In [28]:
df.head()

Unnamed: 0,mediumStressDurationInSeconds,intensityDurationGoalInSeconds,steps,summaryId,averageStressLevel,user_id,minHeartRateInBeatsPerMinute,restStressDurationInSeconds,lowStressDurationInSeconds,averageHeartRateInBeatsPerMinute,...,highStressDurationInSeconds,vigorousIntensityDurationInSeconds,activityType,activeKilocalories,timeOffsetHeartRateSamples,stepsGoal,activityStressDurationInSeconds,stressQualifier,activeTimeInSeconds,calendarDate
336,12900.0,9000,16765,x38a008e-6041bad0-15180-6,28.0,SWS_01,45.0,33480.0,13140.0,74.0,...,1740.0,3840,WALKING,1142,"{'22365': 53, '50430': 87, '41820': 94, '40590...",7480,15060.0,balanced,7884,2021-03-05
337,5640.0,9000,4267,x38a008e-6041bad0-c030-6,21.0,SWS_01,45.0,28740.0,4620.0,65.0,...,1260.0,0,WALKING,250,"{'14895': 52, '44055': 80, '9870': 49, '20310'...",7480,6540.0,unknown,2960,2021-03-05
338,5700.0,9000,4267,x38a008e-6041bad0-c06c-6,21.0,SWS_01,45.0,28740.0,4620.0,65.0,...,1260.0,0,WALKING,250,"{'5880': 54, '7320': 54, '26175': 51, '12480':...",7480,6540.0,unknown,2960,2021-03-05
339,5760.0,9000,4267,x38a008e-6041bad0-c0a8-6,21.0,SWS_01,45.0,28740.0,4620.0,65.0,...,1260.0,0,WALKING,250,"{'33825': 70, '47145': 83, '2505': 52, '35700'...",7480,6540.0,unknown,2960,2021-03-05
340,6120.0,9000,4267,x38a008e-6041bad0-c210-6,21.0,SWS_01,45.0,28740.0,4620.0,65.0,...,1260.0,0,WALKING,250,"{'31560': 69, '18225': 52, '6195': 52, '16125'...",7480,6540.0,unknown,2960,2021-03-05


#### `daily`

In [30]:
print(item['slice']['daily'].shape[0])
item['slice']['daily'].sort_values(by='durationInSeconds')

2880


Unnamed: 0,utc_timestamp,heart_rate_tsvalue,floorsClimbed,maxHeartRateInBeatsPerMinute,averageStressLevel,user_id,averageHeartRateInBeatsPerMinute,calendarDate,distanceInMeters,stressDurationInSeconds,...,steps,intensityDurationGoalInSeconds,floorsClimbedGoal,summaryId,mediumStressDurationInSeconds,activeTimeInSeconds,stressQualifier,startTimeInSeconds,stepsGoal,utc_date
18045,1614924960,55,11,116,21,SWS_01,65,2021-03-05,3301.0,11520,...,4267,9000,10,x38a008e-6041bad0-c030-6,5640,2960,unknown,1614920400,7480,2021-03-05 06:16:00+00:00
19848,1614927225,53,11,116,21,SWS_01,65,2021-03-05,3301.0,11520,...,4267,9000,10,x38a008e-6041bad0-c030-6,5640,2960,unknown,1614920400,7480,2021-03-05 06:53:45+00:00
18018,1614924930,55,11,116,21,SWS_01,65,2021-03-05,3301.0,11520,...,4267,9000,10,x38a008e-6041bad0-c030-6,5640,2960,unknown,1614920400,7480,2021-03-05 06:15:30+00:00
18022,1614924930,55,11,116,21,SWS_01,65,2021-03-05,3301.0,11520,...,4267,9000,10,x38a008e-6041bad0-c030-6,5640,2960,unknown,1614920400,7480,2021-03-05 06:15:30+00:00
18027,1614924945,55,11,116,21,SWS_01,65,2021-03-05,3301.0,11520,...,4267,9000,10,x38a008e-6041bad0-c030-6,5640,2960,unknown,1614920400,7480,2021-03-05 06:15:45+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18449,1614925470,52,35,181,28,SWS_01,74,2021-03-05,15196.0,27780,...,16765,9000,10,x38a008e-6041bad0-15180-6,12900,7884,balanced,1614920400,7480,2021-03-05 06:24:30+00:00
19676,1614927000,52,35,181,28,SWS_01,74,2021-03-05,15196.0,27780,...,16765,9000,10,x38a008e-6041bad0-15180-6,12900,7884,balanced,1614920400,7480,2021-03-05 06:50:00+00:00
18443,1614925455,52,35,181,28,SWS_01,74,2021-03-05,15196.0,27780,...,16765,9000,10,x38a008e-6041bad0-15180-6,12900,7884,balanced,1614920400,7480,2021-03-05 06:24:15+00:00
18435,1614925455,52,35,181,28,SWS_01,74,2021-03-05,15196.0,27780,...,16765,9000,10,x38a008e-6041bad0-15180-6,12900,7884,balanced,1614920400,7480,2021-03-05 06:24:15+00:00


In [20]:
print(item['slice']['daily'].shape[0])
item['slice']['daily'].head()

2880


Unnamed: 0,utc_timestamp,heart_rate_tsvalue,floorsClimbed,maxHeartRateInBeatsPerMinute,averageStressLevel,user_id,averageHeartRateInBeatsPerMinute,calendarDate,distanceInMeters,stressDurationInSeconds,...,steps,intensityDurationGoalInSeconds,floorsClimbedGoal,summaryId,mediumStressDurationInSeconds,activeTimeInSeconds,stressQualifier,startTimeInSeconds,stepsGoal,utc_date
17268,1614924000,56,34,181,24,SWS_01,72,2021-03-05,14234.0,18900,...,15521,9000,10,x38a008e-6041bad0-f99c-6,8040,7088,balanced,1614920400,7480,2021-03-05 06:00:00+00:00
17269,1614924000,56,11,116,21,SWS_01,65,2021-03-05,3301.0,11580,...,4267,9000,10,x38a008e-6041bad0-c06c-6,5700,2960,unknown,1614920400,7480,2021-03-05 06:00:00+00:00
17270,1614924000,56,11,116,21,SWS_01,65,2021-03-05,3301.0,11580,...,4267,9000,10,x38a008e-6041bad0-c06c-6,5700,2960,unknown,1614920400,7480,2021-03-05 06:00:00+00:00
17271,1614924000,56,11,116,21,SWS_01,65,2021-03-05,3301.0,11520,...,4267,9000,10,x38a008e-6041bad0-c030-6,5640,2960,unknown,1614920400,7480,2021-03-05 06:00:00+00:00
17272,1614924000,56,35,181,28,SWS_01,74,2021-03-05,15196.0,27780,...,16765,9000,10,x38a008e-6041bad0-15180-6,12900,7884,balanced,1614920400,7480,2021-03-05 06:00:00+00:00


In [21]:
item['slice']['daily'].utc_timestamp

17268    1614924000
17269    1614924000
17270    1614924000
17271    1614924000
17272    1614924000
            ...    
20143    1614927585
20144    1614927585
20145    1614927585
20146    1614927585
20147    1614927585
Name: utc_timestamp, Length: 2880, dtype: int64

#### `respiration`

In [15]:
item['slice']['respiration'].head()

Unnamed: 0,utc_timestamp,epoch_to_breath_tsvalue,user_id,durationInSeconds,startTimeOffsetInSeconds,startTimeInSeconds,summaryId,utc_date


#### `stress`

In [17]:
item['slice']['stress'].head()

Unnamed: 0,utc_timestamp,stress_level_tsvalue,startTimeOffsetInSeconds,calendarDate,startTimeInSeconds,durationInSeconds,user_id,summaryId,utc_date,body_battery_tsvalue
960,1614924000,13,-18000,2021-03-05,1614920400,30660,SWS_01,x38a008e-6041bad0-77c4,2021-03-05 06:00:00+00:00,100.0
961,1614924000,13,-18000,2021-03-05,1614920400,49200,SWS_01,x38a008e-6041bad0-c030,2021-03-05 06:00:00+00:00,100.0
962,1614924000,13,-18000,2021-03-05,1614920400,49680,SWS_01,x38a008e-6041bad0-c210,2021-03-05 06:00:00+00:00,100.0
963,1614924000,13,-18000,2021-03-05,1614920400,49260,SWS_01,x38a008e-6041bad0-c06c,2021-03-05 06:00:00+00:00,100.0
964,1614924000,13,-18000,2021-03-05,1614920400,78780,SWS_01,x38a008e-6041bad0-133bc,2021-03-05 06:00:00+00:00,100.0


#### `pulseOx`

In [18]:
item['slice']['pulseOx'].head()

Unnamed: 0,utc_timestamp,spo2_tsvalue,summaryId,calendarDate,user_id,onDemand,startTimeOffsetInSeconds,durationInSeconds,startTimeInSeconds,utc_date


In [None]:
dataloader = torch.utils.data.dataloader.DataLoader(dataset, shuffle=True, batch_size=50, collate_fn=single_slice_collate_fn)

In [None]:
dl = iter(dataloader)
b = next(dl)

In [None]:
len(b['slice'])

In [None]:
len(dataset.metadata)

In [None]:
dataset.datapoint_counts

In [None]:
numpy.mean([e['overall_stress_value'] for e in dataset.metadata])

In [None]:
numpy.quantile([e['overall_stress_value'] for e in dataset.metadata], q=0.89)