In [197]:
import pandas as pd
import os
import numpy as np
# os.chdir('motion-and-heart-rate-from-a-wrist-worn-wearable-and-labeled-sleep-from-polysomnography-1.0.0/')
output_path = '/output_data'


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [26]:
files = os.listdir()
files = [f for f in files if f != 'notebook.ipynb' and f[0] is not'.']
print(files)

['acceleration', 'steps', 'labeled_sleep', 'LICENSE.txt', 'heartrate', 'SHA256SUMS.txt']


In [3]:
subjects = set([s.split("_")[0] for s in os.listdir('acceleration')] +
               [s.split("_")[0] for s in os.listdir('labeled_sleep')] +
               [s.split("_")[0] for s in os.listdir('steps')] +
               [s.split("_")[0] for s in os.listdir('heartrate')])

print(subjects)

{'1066528', '8000685', '4018081', '5132496', '2598705', '5383425', '9961348', '3997827', '46343', '8258170', '8686948', '7749105', '1449548', '4426783', '1360686', '8530312', '844359', '5498603', '781756', '1818471', '8692923', '9618981', '8173033', '4314139', '2638030', '9106476', '759667', '5797046', '1455390', '3509524', '6220552'}


In [192]:
def get_df(sensor_type, participant):
    file = sensor_type + "/" + participant + "_" + sensor_type + ".txt"
    if sensor_type == 'acceleration':
        df = pd.read_csv(file, sep=" ", header=None)
        df.columns = ['timestamps', 'x', 'y', 'z']
        df.timestamps = df.timestamps * 1e+9 + 3.154e+16
        
    elif sensor_type == 'labeled_sleep':
        df = pd.read_csv(file, sep = " ", header=None)
        df.columns = ['label_start', 'label']
        df = df[["label", "label_start"]]
        df['label_end'] = df.label_start + 30
        df.label_start = df.label_start * 1e+9 + 3.154e+16
        df.label_end = df.label_end * 1e+9 + 3.154e+16
    else:
        df = pd.read_csv(file, header=None)
        df.columns = ['timestamps', sensor_type]
        if sensor_type == 'heartrate':
            df.timestamps = df.timestamps * 1e+9 + 3.154e+16

    df.insert(0, 'sensor_type', [sensor_type for i in range(len(df))])
    df.insert(1, 'device_type', ['smartwatch' for i in range(len(df))])
    return df


In [193]:
df_heartrate = get_df('heartrate', '6220552')
df_acceloration = get_df('acceleration', '6220552')
df_labels = get_df('labeled_sleep', '6220552')
df_steps = get_df('steps', "6220552")
df_steps.head()

Unnamed: 0,sensor_type,device_type,timestamps,steps
0,steps,smartwatch,-604256,0
1,steps,smartwatch,-603656,0
2,steps,smartwatch,-603056,0
3,steps,smartwatch,-602456,0
4,steps,smartwatch,-601856,0


In [199]:
df_steps

Unnamed: 0,sensor_type,device_type,timestamps,steps
0,steps,smartwatch,-604256,0
1,steps,smartwatch,-603656,0
2,steps,smartwatch,-603056,0
3,steps,smartwatch,-602456,0
4,steps,smartwatch,-601856,0
5,steps,smartwatch,-601256,0
6,steps,smartwatch,-600656,0
7,steps,smartwatch,-600056,0
8,steps,smartwatch,-599456,0
9,steps,smartwatch,-598856,0


In [7]:
df_acceloration.timestamps.describe()

count    1.826000e+06
mean     3.155477e+16
std      9.420991e+12
min      3.151404e+16
25%      3.154686e+16
50%      3.155484e+16
75%      3.156282e+16
max      3.157080e+16
Name: timestamps, dtype: float64

In [8]:
df_heartrate.timestamps.describe()

count    7.113000e+03
mean     3.150666e+16
std      1.314694e+14
min      3.098078e+16
25%      3.154280e+16
50%      3.155200e+16
75%      3.156122e+16
max      3.157038e+16
Name: timestamps, dtype: float64

In [9]:
df_labels.label_start.describe()

count    9.820000e+02
mean     3.155472e+16
std      8.508698e+12
min      3.154000e+16
25%      3.154736e+16
50%      3.155472e+16
75%      3.156207e+16
max      3.156943e+16
Name: label_start, dtype: float64

## Create first part of sinus from moment particpant went to bed at the lab

In [135]:
start_time = df_labels.label_start[0]
end_time = df_labels.label_end[len(df_labels) - 1]
step_size = (end_time - start_time) / len(df_labels)
nr_instances = len(df_labels)

sin_left = np.arange(0, 1, 1 / ((nr_instances - 1)/ 2))
sin_right = np.flip(sin_left)
sin_full = np.concatenate([sin_left, sin_right])
if nr_instances % 2 != 0:
    sin_full = sin_full[:-1]

df_sin_from_bedtime = pd.DataFrame(columns = ['sensor_type', 'device_type', 'timestamps', 'sin_from_bedtime'])
df_sin_from_bedtime.sensor_type = ['sin_from_bedtime' for i in range(nr_instances)]
df_sin_from_bedtime.device_type = ['smartwatch' for i in range(nr_instances)]
timestamps = np.arange(start_time, end_time, step_size)

df_sin_from_bedtime.timestamps = np.arange(start_time, end_time, step_size)

df_sin_from_bedtime.sin_from_bedtime = sin_full
df_sin_from_bedtime.head()

982
982
982


Unnamed: 0,sensor_type,device_type,timestamps,sin_from_bedtime
0,sin_from_bedtime,smartwatch,3.154e+16,0.0
1,sin_from_bedtime,smartwatch,3.154003e+16,0.002039
2,sin_from_bedtime,smartwatch,3.154006e+16,0.004077
3,sin_from_bedtime,smartwatch,3.154009e+16,0.006116
4,sin_from_bedtime,smartwatch,3.154012e+16,0.008155


In [137]:
df_sin_from_bedtime.to_csv("../Sinus_from_bedtime.csv", index=False)

## Create sinus every 90 mintues from moment particpant went to bed at the lab

In [181]:
miuntes_22half = 5.4e+12 / 4
start_time = df_sin_from_bedtime.timestamps[0]
instances_per_quarter = len(df_sin_from_bedtime[df_sin_from_bedtime.timestamps <= (start_time + miuntes_22half)])
sin_first_quarter = np.arange(0, 1, (1 / instances_per_quarter))
sin_second_quarter = np.flip(sin_first_quarter)
sin_first_half = np.concatenate([sin_first_quarter, sin_second_quarter])
sin_second_half = np.negative(sin_first_half)
sin_90_minutes = np.concatenate([sin_first_half, sin_second_half])
repeats = int(len(df_sin_from_bedtime)/ (len(sin_90_minutes)) + 1)
for i in range(repeats):
    sin_90_minutes = np.concatenate([sin_90_minutes, sin_90_minutes])

print(len(sin_90_minutes))
df_90_minutes_from_bedtime = df_sin_from_bedtime.copy(deep=True)
length = len(df_90_minutes_from_bedtime)
print(length)
df_90_minutes_from_bedtime.columns = ['sensor_type', 'device_type', 'timestamps', '90minutes_from_bedtime']
df_90_minutes_from_bedtime.sensor_type = ['90_mintues_from_bedtime' for i in range(length)]
df_90_minutes_from_bedtime['90minutes_from_bedtime'] = sin_90_minutes[:(length)]


11776
982


In [187]:
df_90_minutes_from_bedtime.to_csv("../90minutes_from_bedtime.csv", index=False)

In [284]:
def find_avg_bed_time_diffence(df_steps):
    hours = []
    hour_in_seconds = 3600
    hour_indexes = []
    start_time = df_steps.timestamps[0]
    while start_time < 0:
        end_time_hour = start_time + hour_in_seconds
        temp_df = df_steps.copy(deep=True)
        df_hour = temp_df[temp_df.timestamps >= start_time]
        df_hour = df_hour[df_hour.timestamps < end_time_hour]
        hours.append([start_time, end_time_hour, df_hour.steps.sum()])
        if len(hour_indexes) > 0:
            hour_indexes.append(hour_indexes[-1] + len(df_hour))
        else:
            hour_indexes.append(len(df_hour))
        start_time = start_time + hour_in_seconds
    
    sleeping_hours = []
    for i, hour in enumerate(hours):
        if hour[2] < 10 and hours[i+1][2] < 10 and (len(hours) - i) > 10:
            sleeping_hours.append(len(hours) - i)
    
    bed_times = []
    for i, hour in enumerate(sleeping_hours):
        if hour + 1 not in sleeping_hours:
            bed_times.append(hour)
    
    bed_time_differences = []
    for i, bed_time in enumerate(bed_times[::-1]):
        bed_time_differences.append((i+1)*24 - bed_time)
    
    avg_bed_time_diffence = sum(bed_time_differences)/len(bed_time_differences)
    return avg_bed_time_diffence







Unnamed: 0,sensor_type,device_type,timestamps,sin_from_avg_bedtime
0,sin_from_bedtime,smartwatch,3.154e+16,-0.627931
1,sin_from_bedtime,smartwatch,3.154003e+16,-0.625892
2,sin_from_bedtime,smartwatch,3.154006e+16,-0.623853
3,sin_from_bedtime,smartwatch,3.154009e+16,-0.621814
4,sin_from_bedtime,smartwatch,3.154012e+16,-0.619776


982


## Create part of sinus and 90mintues from avg moment particpant went to bed past days

In [297]:
avg_bed_time_diffence = find_avg_bed_time_diffence(df_steps)
avg_bed_time_diffence_ns = avg_bed_time_diffence * 3.6e+12

start_time = df_labels.label_start[0]
end_time = df_labels.label_end[len(df_labels) - 1]
step_size = (end_time - start_time) / len(df_labels)
nr_instances = len(df_labels)

sin_left = np.arange(0, 1, 1 / ((nr_instances - 1)/ 2))
sin_right = np.flip(sin_left)
sin_full = np.concatenate([np.negative(sin_left), np.negative(sin_right), sin_left, sin_right])

df_sin_from_avg_bedtime = df_sin_from_bedtime.copy(deep=True)
df_90_minutes_avg_bedtime = df_sin_from_bedtime.copy(deep=True)


df_sin_from_avg_bedtime.columns = ['sensor_type', 'device_type', 'timestamps', 'sin_from_avg_bedtime']
df_sin_from_avg_bedtime.sensor_type = ['sin_from_avg_bedtime' for i in range(len(df_sin_from_avg_bedtime))]
df_90_minutes_avg_bedtime.columns = ['sensor_type', 'device_type', 'timestamps', '90minutes_from_avg_bedtime']
df_90_minutes_avg_bedtime.sensor_type = ['90_minutes_from_avg_bedtime' for i in range(len(df_sin_from_avg_bedtime))]

relative_normal_bedtime = start_time + avg_bed_time_diffence_ns
avg_bed_time_index = len(df_sin_from_avg_bedtime[df_sin_from_avg_bedtime.timestamps < relative_normal_bedtime])


start = int(len(sin_full)/2 - avg_bed_time_index)
sin_full = sin_full[start:]

sin_first_quarter = np.arange(0, 1, (1 / instances_per_quarter))
sin_second_quarter = np.flip(sin_first_quarter)
sin_first_half = np.concatenate([sin_first_quarter, sin_second_quarter])
sin_second_half = np.negative(sin_first_half)
sin_90_minutes = np.concatenate([sin_first_half, sin_second_half])
repeats = int(len(df_90_minutes_avg_bedtime)/ (len(sin_90_minutes)) + 1)
for i in range(repeats):
    sin_90_minutes = np.concatenate([sin_90_minutes, sin_90_minutes])

sin_90_minutes_full = sin_90_minutes[start:]

df_sin_from_avg_bedtime.sin_from_avg_bedtime = sin_full[:len(df_sin_from_avg_bedtime)]
df_90_minutes_avg_bedtime['90minutes_from_avg_bedtime'] = sin_90_minutes_full[:len(df_90_minutes_avg_bedtime)]

[-0.63043478 -0.65217391 -0.67391304 ... -0.04347826 -0.02173913
 -0.        ]


In [299]:
df_90_minutes_avg_bedtime.to_csv("../90minutes_from_avg_bedtime.csv", index=False)
df_sin_from_avg_bedtime.to_csv("../clock_from_avg_bedtime.csv", index=False)

2.5714285714285716
