In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import pyarrow.parquet as pq

#downsampling
train_series = pd.read_parquet('train_series.parquet', engine = 'pyarrow')

series_id = train_series['series_id'].unique()

every_6th_ID = series_id[0::6]
every_6th_data = train_series[train_series['series_id'].isin(every_6th_ID)]

train_events = pd.read_csv('train_events.csv')

lst_IDs = every_6th_data['series_id'].unique()

filtered_events = train_events[train_events['series_id'].isin(lst_IDs)]

#finally...
merged_data = pd.merge(every_6th_data, filtered_events, on=['timestamp', 'series_id'], how='left')

#print(lst_IDs)

In [13]:
class CreateFeatureFrames:
    
    def calculate_accel_stats(accel_data, window_size=10):
        
        #accel_data = merged_data[merged_data['series_id'] == series_ID].copy()
    
        # mean, variance, and max for 'anglez' and 'enmo'
        accel_data['anglez_mean'] = accel_data['anglez'].rolling(window=window_size).mean()
        accel_data['anglez_var'] = accel_data['anglez'].rolling(window=window_size).var()
        accel_data['anglez_max'] = accel_data['anglez'].rolling(window=window_size).max()
    
        accel_data['enmo_mean'] = accel_data['enmo'].rolling(window=window_size).mean()
        accel_data['enmo_var'] = accel_data['enmo'].rolling(window=window_size).var()
        accel_data['enmo_max'] = accel_data['enmo'].rolling(window=window_size).max()
    
        # dataframe to store the new stats
        full_on_stats = pd.DataFrame({
            'series_id': accel_data['series_id'],
            'timestamp': accel_data['timestamp'],
            'anglez_mean': accel_data['anglez_mean'],
            'anglez_var': accel_data['anglez_var'],
            'anglez_max': accel_data['anglez_max'],
            'enmo_mean': accel_data['enmo_mean'],
            'enmo_var': accel_data['enmo_var'],
            'enmo_max': accel_data['enmo_max']
        })
    
        # drop NaN values
        full_on_stats.dropna(inplace=True)
    
        return full_on_stats

    def pull_labeled_stats(filtered_events, full_on_stats):
        
        #filtered_events1 = filtered_events[filtered_events['series_id'] == series_ID].copy()
        aggie_stats = pd.merge(full_on_stats, filtered_events)
        
        return aggie_stats


In [14]:
stats = CreateFeatureFrames.calculate_accel_stats(merged_data)
labeled_stats = CreateFeatureFrames.pull_labeled_stats(filtered_events, stats)

In [17]:
stats #All my stats across all observations

Unnamed: 0,series_id,timestamp,anglez_mean,anglez_var,anglez_max,enmo_mean,enmo_var,enmo_max
9,038441c925bb,2018-08-14T15:30:45-0400,2.69769,0.021058,3.084700,0.02168,6.844437e-08,0.0223
10,038441c925bb,2018-08-14T15:30:50-0400,2.71204,0.021173,3.084700,0.02180,2.177777e-07,0.0229
11,038441c925bb,2018-08-14T15:30:55-0400,2.70041,0.024470,3.084700,0.02181,2.121111e-07,0.0229
12,038441c925bb,2018-08-14T15:31:00-0400,2.67800,0.032650,3.084700,0.02183,2.067777e-07,0.0229
13,038441c925bb,2018-08-14T15:31:05-0400,2.65561,0.039713,3.084700,0.02184,1.959998e-07,0.0229
...,...,...,...,...,...,...,...,...
23116675,fe90110788d2,2017-09-08T00:14:35-0400,-27.90211,0.258496,-27.277500,0.02222,4.324622e-05,0.0409
23116676,fe90110788d2,2017-09-08T00:14:40-0400,-27.69077,0.121003,-27.032499,0.02046,1.162666e-06,0.0233
23116677,fe90110788d2,2017-09-08T00:14:45-0400,-27.56280,0.162447,-26.841200,0.02053,1.062333e-06,0.0233
23116678,fe90110788d2,2017-09-08T00:14:50-0400,-27.43640,0.202811,-26.723900,0.02056,1.009333e-06,0.0233


In [18]:
labeled_stats #All stats that correspond to an onset/wakeup event

Unnamed: 0,series_id,timestamp,anglez_mean,anglez_var,anglez_max,enmo_mean,enmo_var,enmo_max,night,event,step
0,038441c925bb,2018-08-14T22:26:00-0400,-78.805121,0.012469,-78.690598,0.00998,8.622227e-08,0.0104,1,onset,4992.0
1,038441c925bb,2018-08-15T06:41:00-0400,-62.386751,2.712332,-58.177101,0.02282,2.838844e-05,0.0358,1,wakeup,10932.0
2,038441c925bb,2018-08-15T19:37:00-0400,-5.842240,14.763264,-0.866400,0.02501,1.883277e-04,0.0588,2,onset,20244.0
3,038441c925bb,2018-08-16T05:41:00-0400,-45.934840,1.705133,-45.062500,0.01702,6.328891e-07,0.0187,2,wakeup,27492.0
4,038441c925bb,2018-08-16T23:03:00-0400,-1.851670,0.009765,-1.784700,0.00004,1.600000e-08,0.0004,3,onset,39996.0
...,...,...,...,...,...,...,...,...,...,...,...
1686,fe90110788d2,2017-09-05T09:26:00-0400,-57.161220,0.454760,-56.573101,0.00462,1.040622e-05,0.0089,32,wakeup,547152.0
1687,fe90110788d2,2017-09-05T22:30:00-0400,25.298310,532.083278,57.056400,0.09755,3.497012e-03,0.1995,33,onset,556560.0
1688,fe90110788d2,2017-09-06T04:07:00-0400,-43.050900,0.000000,-43.050900,0.00000,0.000000e+00,0.0000,33,wakeup,560604.0
1689,fe90110788d2,2017-09-06T23:35:00-0400,-2.288620,295.802544,14.965400,0.05037,1.255836e-03,0.1206,34,onset,574620.0


In [19]:
data = pd.merge(stats, labeled_stats, on=['series_id','timestamp','enmo_var','anglez_var','anglez_mean','anglez_max','enmo_mean','enmo_max'], how='left')

In [21]:
data #all together now!

Unnamed: 0,series_id,timestamp,anglez_mean,anglez_var,anglez_max,enmo_mean,enmo_var,enmo_max,night,event,step
0,038441c925bb,2018-08-14T15:30:45-0400,2.69769,0.021058,3.084700,0.02168,6.844437e-08,0.0223,,,
1,038441c925bb,2018-08-14T15:30:50-0400,2.71204,0.021173,3.084700,0.02180,2.177777e-07,0.0229,,,
2,038441c925bb,2018-08-14T15:30:55-0400,2.70041,0.024470,3.084700,0.02181,2.121111e-07,0.0229,,,
3,038441c925bb,2018-08-14T15:31:00-0400,2.67800,0.032650,3.084700,0.02183,2.067777e-07,0.0229,,,
4,038441c925bb,2018-08-14T15:31:05-0400,2.65561,0.039713,3.084700,0.02184,1.959998e-07,0.0229,,,
...,...,...,...,...,...,...,...,...,...,...,...
23116666,fe90110788d2,2017-09-08T00:14:35-0400,-27.90211,0.258496,-27.277500,0.02222,4.324622e-05,0.0409,,,
23116667,fe90110788d2,2017-09-08T00:14:40-0400,-27.69077,0.121003,-27.032499,0.02046,1.162666e-06,0.0233,,,
23116668,fe90110788d2,2017-09-08T00:14:45-0400,-27.56280,0.162447,-26.841200,0.02053,1.062333e-06,0.0233,,,
23116669,fe90110788d2,2017-09-08T00:14:50-0400,-27.43640,0.202811,-26.723900,0.02056,1.009333e-06,0.0233,,,


In [33]:
test_series = pd.read_parquet('test_series.parquet', engine = 'pyarrow')

In [35]:
test_stats = CreateFeatureFrames.calculate_accel_stats(test_series)
#labeled_stats = CreateFeatureFrames.pull_labeled_stats(filtered_events, stats)

In [36]:
test_stats

Unnamed: 0,series_id,timestamp,anglez_mean,anglez_var,anglez_max,enmo_mean,enmo_var,enmo_max
9,038441c925bb,2018-08-14T15:30:45-0400,2.69769,0.021058,3.0847,0.02168,6.844437e-08,0.0223
10,038441c925bb,2018-08-14T15:30:50-0400,2.71204,0.021173,3.0847,0.02180,2.177777e-07,0.0229
11,038441c925bb,2018-08-14T15:30:55-0400,2.70041,0.024470,3.0847,0.02181,2.121111e-07,0.0229
12,038441c925bb,2018-08-14T15:31:00-0400,2.67800,0.032650,3.0847,0.02183,2.067777e-07,0.0229
13,038441c925bb,2018-08-14T15:31:05-0400,2.65561,0.039713,3.0847,0.02184,1.959998e-07,0.0229
...,...,...,...,...,...,...,...,...
445,0402a003dae9,2018-12-18T12:57:05-0500,-27.80299,238.783705,-3.8209,0.05022,4.374773e-04,0.0867
446,0402a003dae9,2018-12-18T12:57:10-0500,-27.27190,226.067521,-3.8209,0.05152,3.950973e-04,0.0867
447,0402a003dae9,2018-12-18T12:57:15-0500,-25.89411,220.285875,-3.8209,0.05225,3.551339e-04,0.0867
448,0402a003dae9,2018-12-18T12:57:20-0500,-28.24926,240.009293,-3.8209,0.04865,3.523339e-04,0.0867
