In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path

In [11]:
sids = sorted(int(p.stem.split("_")[0]) for p in Path("../data/labels").glob("*.txt"))
sids

[46343,
 759667,
 781756,
 844359,
 1066528,
 1360686,
 1449548,
 1455390,
 1818471,
 2598705,
 2638030,
 3509524,
 3997827,
 4018081,
 4314139,
 4426783,
 5132496,
 5383425,
 5498603,
 5797046,
 6220552,
 7749105,
 8000685,
 8173033,
 8258170,
 8530312,
 8686948,
 8692923,
 9106476,
 9618981,
 9961348]

In [21]:
sid = sids[0]
hrate = pd.read_csv(f"../data/heart_rate/{sid}_heartrate.txt", header=None, names=["tssec", "hrate"])
print(hrate.info())
hrate

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4878 entries, 0 to 4877
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tssec   4878 non-null   float64
 1   hrate   4878 non-null   int64  
dtypes: float64(1), int64(1)
memory usage: 76.3 KB
None


Unnamed: 0,tssec,hrate
0,-556410.36066,57
1,-556408.36062,56
2,-556403.36062,56
3,-556399.36062,57
4,-556389.36062,59
...,...,...
4873,16959.47229,72
4874,16964.47229,74
4875,16970.47229,75
4876,16975.47229,76


In [25]:
sleep = pd.read_csv(f"../data/labels/{sid}_labeled_sleep.txt", header=None, names=["tssec", "sleep"], sep=" ")
print(sleep.info())
sleep

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 567 entries, 0 to 566
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   tssec   567 non-null    int64
 1   sleep   567 non-null    int64
dtypes: int64(2)
memory usage: 9.0 KB
None


Unnamed: 0,tssec,sleep
0,0,-1
1,30,-1
2,60,-1
3,90,-1
4,120,-1
...,...,...
562,16860,0
563,16890,0
564,16920,0
565,16950,0


In [31]:
accel = pd.read_csv(f"../data/motion/{sid}_acceleration.txt", header=None, names=["tssec", "acc_x", "acc_y", "acc_z"], sep=" ")
print(accel.info())
accel

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 982000 entries, 0 to 981999
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   tssec   982000 non-null  float64
 1   acc_x   982000 non-null  float64
 2   acc_y   982000 non-null  float64
 3   acc_z   982000 non-null  float64
dtypes: float64(4)
memory usage: 30.0 MB
None


Unnamed: 0,tssec,acc_x,acc_y,acc_z
0,-124489.161050,0.017487,-0.586700,-0.805771
1,-124489.116395,0.018982,-0.589676,-0.809158
2,-124489.115548,0.020966,-0.580887,-0.815048
3,-124489.114691,0.019485,-0.580872,-0.813583
4,-124489.097700,0.016998,-0.587204,-0.806259
...,...,...,...,...
981995,17642.967007,0.505249,0.425720,-0.748764
981996,17642.987576,0.508194,0.429138,-0.750717
981997,17643.006946,0.508179,0.427200,-0.747284
981998,17643.026865,0.505737,0.427673,-0.749741


In [59]:
def read_data(sid):
    hrate = pd.read_csv(f"../data/heart_rate/{sid}_heartrate.txt", header=None, names=["tssec", "hrate"])
    sleep = pd.read_csv(f"../data/labels/{sid}_labeled_sleep.txt", header=None, names=["tssec", "sleep"], sep=" ")
    accel = pd.read_csv(f"../data/motion/{sid}_acceleration.txt",  header=None, names=["tssec", "acc_x", "acc_y", "acc_z"], sep=" ")
    hrate = hrate.sort_values(by="tssec")
    sleep = sleep.sort_values(by="tssec")
    accel = accel.sort_values(by="tssec")
    sleep["sleep"] = sleep["sleep"] > 0
    assert all(hrate.isna().sum() == 0)
    assert all(sleep.isna().sum() == 0)
    assert all(accel.isna().sum() == 0)
    return hrate, sleep, accel

In [102]:
data = [read_data(sid) for sid in sids]

In [127]:
def gen_row(row, hrate, accel, rng, nwin=5):
    t = row.tssec
    i = accel["tssec"].searchsorted(t)
    if i == len(accel):
        i -= 1
    r = {"tssec": t}
    acc_x, acc_y, acc_z = accel.iloc[i, 1:]
    r |= {"acc_x": acc_x, "acc_y": acc_y, "acc_z": acc_z}
    prev_t, prev_j = t, None
    for nw in range(nwin):
        low, high = (0, 6) if nw == 0 else (1, 7)
        diff = rng.uniform(low=low, high=high) * 60
        j = hrate["tssec"].searchsorted(prev_t - diff)
        if j == len(hrate):
            j -= 1
        if j == prev_j:
            j -= 1
        hrate_t, hrate_v = hrate.iloc[j, :]
        r[f"hrate{nw}"] = hrate_v
        r[f"hdiff{nw}"] = (prev_t - hrate_t)
        prev_t = hrate_t
        prev_j = j
    r["sleep"] = row.sleep
    return r

def gen_data_table(seed, nwin=5):
    rng = np.random.default_rng(seed)
    res = []
    for row in sleep.itertuples():
        res += [gen_row(row, hrate, accel, rng, nwin)]
    return pd.DataFrame(res)

In [128]:
hrate

Unnamed: 0,tssec,hrate
0,-556410.36066,57
1,-556408.36062,56
2,-556403.36062,56
3,-556399.36062,57
4,-556389.36062,59
...,...,...
4873,16959.47229,72
4874,16964.47229,74
4875,16970.47229,75
4876,16975.47229,76


In [129]:
j = hrate["tssec"].searchsorted(-60)
hrate.iloc[j - 10:j + 10, :]

Unnamed: 0,tssec,hrate
1553,-112.05212,86
1554,-110.05212,86
1555,-105.05212,87
1556,-99.05212,87
1557,-92.05212,87
1558,-85.05212,83
1559,-80.05212,83
1560,-75.05212,82
1561,-73.05212,82
1562,-69.05212,84


In [130]:
gen_data_table(4)

Unnamed: 0,tssec,acc_x,acc_y,acc_z,hrate0,hdiff0,hrate1,hdiff1,hrate2,hdiff2,hrate3,hdiff3,hrate4,hdiff4,sleep
0,0,-0.234650,0.905975,0.362747,91.0,336.91223,91.0,5.00000,84.0,783.17831,86.0,85.00000,79.0,274.00000,False
1,30,-0.321213,0.782272,0.505707,87.0,135.05212,91.0,236.86011,84.0,783.17831,80.0,369.00003,81.0,248.90622,False
2,60,-0.352829,0.906235,0.207657,90.0,324.05212,91.0,77.86011,84.0,783.17831,78.0,341.00003,79.0,357.73065,False
3,90,-0.505539,0.884384,-0.009323,86.0,128.52163,91.0,303.39060,84.0,783.17831,79.0,120.00000,84.0,271.00000,False
4,120,-0.342529,0.900085,-0.146805,86.0,253.05215,91.0,208.86008,84.0,783.17831,86.0,108.00000,78.0,233.00003,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
562,16860,-0.444382,-0.528549,0.723450,108.0,284.52771,68.0,408.00291,80.0,411.00000,73.0,409.00003,77.0,400.99997,False
563,16890,-0.442917,-0.525604,0.723923,95.0,329.52771,120.0,264.00000,77.0,354.00294,77.0,90.99997,80.0,80.00003,False
564,16920,-0.440949,-0.526581,0.724426,75.0,42.52774,74.0,223.00000,113.0,333.99997,76.0,373.00291,75.0,82.00000,False
565,16950,-0.440949,-0.526581,0.724426,80.0,161.52771,76.0,176.00000,113.0,80.00000,109.0,110.00003,75.0,410.00288,False


In [131]:
import torch
from torch.utils.data import Dataset, DataLoader

class SleepDataset(Dataset):
    def __init__(self, data, seed, nwin=5):
        self.data = data
        self.lens = [len(d[1]) for d in data]
        self.cumlen = np.cumsum(self.lens)
        self.seed = seed
        self.nwin = nwin
        self.rng = np.random.default_rng(seed)
        
    def __len__(self):
        return sum(self.lens)
    
    def __getitem__(self, idx):
        sid = np.searchsorted(self.cumlen, idx)
        if sid == len(self.cumlen):
            sid -= 1
        if sid > 0:
            idx -= self.cumlen[sid - 1]
        hrate, sleep, accel = self.data[sid]
        row = sleep.iloc[idx, :]
        return gen_row(row, hrate, accel, self.rng, self.nwin)

In [142]:
ds = SleepDataset(data, 1, nwin=5)
print(len(ds))
print(ds[0])
print(ds[20000])

27211
{'tssec': 0, 'acc_x': -0.2346497, 'acc_y': 0.9059753, 'acc_z': 0.3627472, 'hrate0': 86.0, 'hdiff0': 184.05211997, 'hrate1': 91.0, 'hdiff1': 157.86011004499997, 'hrate2': 84.0, 'hdiff2': 783.178309915, 'hrate3': 85.0, 'hdiff3': 401.0, 'hrate4': 74.0, 'hdiff4': 169.0, 'sleep': False}
{'tssec': 8640, 'acc_x': 0.3096619, 'acc_y': 0.1594543, 'acc_z': -0.9389496, 'hrate0': 71.0, 'hdiff0': 148.49247002999982, 'hrate1': 68.0, 'hdiff1': 353.0, 'hrate2': 65.0, 'hdiff2': 206.0, 'hrate3': 75.0, 'hdiff3': 255.0, 'hrate4': 81.0, 'hdiff4': 65.0, 'sleep': True}


In [60]:
hrate, sleep, accel = read_data(sids[0])

In [61]:
sleep.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 567 entries, 0 to 566
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   tssec   567 non-null    int64
 1   sleep   567 non-null    bool 
dtypes: bool(1), int64(1)
memory usage: 9.4 KB


In [62]:
sleep

Unnamed: 0,tssec,sleep
0,0,False
1,30,False
2,60,False
3,90,False
4,120,False
...,...,...
562,16860,False
563,16890,False
564,16920,False
565,16950,False


In [58]:
accel

Unnamed: 0,tssec,acc_x,acc_y,acc_z
0,-124489.161050,0.017487,-0.586700,-0.805771
1,-124489.116395,0.018982,-0.589676,-0.809158
2,-124489.115548,0.020966,-0.580887,-0.815048
3,-124489.114691,0.019485,-0.580872,-0.813583
4,-124489.097700,0.016998,-0.587204,-0.806259
...,...,...,...,...
981995,17642.967007,0.505249,0.425720,-0.748764
981996,17642.987576,0.508194,0.429138,-0.750717
981997,17643.006946,0.508179,0.427200,-0.747284
981998,17643.026865,0.505737,0.427673,-0.749741
