# Setup

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Feature engineering

In [45]:
df = pd.read_csv('aggregated_data_250ms.csv')

In [18]:
overlap_percentage = 0.9

In [46]:
df

Unnamed: 0,End Time,Run,Participant,Path,count_Acceleration x (m/s^2)_head,count_Acceleration x (m/s^2)_leg,count_Acceleration y (m/s^2)_head,count_Acceleration y (m/s^2)_leg,count_Acceleration z (m/s^2)_head,count_Acceleration z (m/s^2)_leg,...,min_Distance (cm)_head,min_Distance (cm)_leg,min_Illuminance (lx)_head,std_Distance (cm)_leg,std_Illuminance (lx)_head,std_Height (m)_leg,std_Horizontal Accuracy (m)_leg,std_Latitude (°)_leg,std_Longitude (°)_leg,std_Vertical Accuracy (°)_leg
0,1970-01-01 00:01:14.236228590,3,Julian,circle,113.0,0.0,113.0,0.0,113.0,0.0,...,,,,,,,,,,
1,1970-01-01 00:01:14.486228590,3,Julian,circle,118.0,0.0,118.0,0.0,118.0,0.0,...,,,,,,,,,,
2,1970-01-01 00:01:14.736228590,3,Julian,circle,117.0,0.0,117.0,0.0,117.0,0.0,...,,,,,,,,,,
3,1970-01-01 00:01:14.986228590,3,Julian,circle,118.0,0.0,118.0,0.0,118.0,0.0,...,,,,,,,,,,
4,1970-01-01 00:01:15.236228590,3,Julian,circle,117.0,0.0,117.0,0.0,117.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14565,1970-01-01 00:04:19.202577700,3,Martin,straight,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
14566,1970-01-01 00:04:19.452577700,3,Martin,straight,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
14567,1970-01-01 00:04:19.702577700,3,Martin,straight,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
14568,1970-01-01 00:04:19.952577700,3,Martin,straight,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [47]:
print(df.groupby('Participant').size())

Participant
Dany       2597
Felix      2263
Julian     1894
Mark       1888
Martin     1884
Michele    1837
Paul       2207
dtype: int64


## Time domain

In [48]:
# performing feature engineering on the data

window_size = 80

# calculating the number of windows

num_windows = int(df.shape[0] / (window_size * (1 - overlap_percentage)))

print('Number of windows:', num_windows)

# print number of windows for each participant
print(df.groupby('Participant').size() / (window_size * (1 - overlap_percentage)))

Number of windows: 1821
Participant
Dany       324.625
Felix      282.875
Julian     236.750
Mark       236.000
Martin     235.500
Michele    229.625
Paul       275.875
dtype: float64


In [49]:
# compute duration in seconds of each window

# interval size in seconds of each row in the dataset
interval_size = 0.25 # seconds

duration = window_size * interval_size

print('Duration of each window:', duration, 'seconds')


Duration of each window: 20.0 seconds


In [104]:
# drop the columns that start with count or std

cols = df.columns

cols_to_drop = []

for col in cols:
    if col.startswith('count') or col.startswith('std'):
        cols_to_drop.append(col)

slim_df = df.drop(cols_to_drop, axis=1)


In [51]:
slim_df

Unnamed: 0,End Time,Run,Participant,Path,max_Acceleration x (m/s^2)_head,max_Acceleration y (m/s^2)_head,max_Acceleration z (m/s^2)_head,max_Direction (°)_head,max_Direction (°)_leg,max_Gyroscope x (rad/s)_head,...,max_Illuminance (lx)_head,mean_Distance (cm)_head,mean_Distance (cm)_leg,mean_Illuminance (lx)_head,median_Distance (cm)_head,median_Distance (cm)_leg,median_Illuminance (lx)_head,min_Distance (cm)_head,min_Distance (cm)_leg,min_Illuminance (lx)_head
0,1970-01-01 00:01:14.236228590,3,Julian,circle,-2.528615,0.854238,9.607183,86.514656,,-0.181122,...,,,,,,,,,,
1,1970-01-01 00:01:14.486228590,3,Julian,circle,-2.184049,0.865005,9.752547,,,0.242972,...,,,,,,,,,,
2,1970-01-01 00:01:14.736228590,3,Julian,circle,-2.126023,0.486939,9.944571,,,0.201280,...,,,,,,,,,,
3,1970-01-01 00:01:14.986228590,3,Julian,circle,-1.955534,0.155533,9.562318,,87.232205,0.096822,...,,,,,,,,,,
4,1970-01-01 00:01:15.236228590,3,Julian,circle,-2.541178,-0.178863,9.623933,86.514656,,0.067806,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14565,1970-01-01 00:04:19.202577700,3,Martin,straight,,,,,,,...,,,,,,,,,,
14566,1970-01-01 00:04:19.452577700,3,Martin,straight,,,,,,,...,,,,,,,,,,
14567,1970-01-01 00:04:19.702577700,3,Martin,straight,,,,,320.187022,,...,,,,,,,,,,
14568,1970-01-01 00:04:19.952577700,3,Martin,straight,,,,,,,...,,,,,,,,,,


In [36]:
# create a dataframe to store the percentage of missing values for each feature

missing_values = slim_df.isna().sum() / slim_df.shape[0]

missing_values_df = pd.DataFrame(missing_values, columns=['missing_values'])


# count the number of features that have more than 10% missing values

for  row in missing_values_df[missing_values_df['missing_values'] > 0.1].iterrows():
    print(row)




('max_Direction (°)_head', missing_values    0.768497
Name: max_Direction (°)_head, dtype: float64)
('max_Direction (°)_leg', missing_values    0.790048
Name: max_Direction (°)_leg, dtype: float64)
('max_Height (m)_head', missing_values    0.768291
Name: max_Height (m)_head, dtype: float64)
('max_Height (m)_leg', missing_values    0.77337
Name: max_Height (m)_leg, dtype: float64)
('max_Horizontal Accuracy (m)_head', missing_values    0.768222
Name: max_Horizontal Accuracy (m)_head, dtype: float64)
('max_Horizontal Accuracy (m)_leg', missing_values    0.77337
Name: max_Horizontal Accuracy (m)_leg, dtype: float64)
('max_Latitude (°)_head', missing_values    0.768222
Name: max_Latitude (°)_head, dtype: float64)
('max_Latitude (°)_leg', missing_values    0.77337
Name: max_Latitude (°)_leg, dtype: float64)
('max_Longitude (°)_head', missing_values    0.768222
Name: max_Longitude (°)_head, dtype: float64)
('max_Longitude (°)_leg', missing_values    0.77337
Name: max_Longitude (°)_leg, dtype:

In [105]:
# drop the features that have more than 80% missing values

slim_df = slim_df.drop(missing_values_df[missing_values_df['missing_values'] > 0.8].index, axis=1)


In [53]:
slim_df

Unnamed: 0,End Time,Run,Participant,Path,max_Acceleration x (m/s^2)_head,max_Acceleration y (m/s^2)_head,max_Acceleration z (m/s^2)_head,max_Direction (°)_head,max_Direction (°)_leg,max_Gyroscope x (rad/s)_head,...,min_X (hPa)_leg,min_X (m/s^2)_leg,min_X (rad/s)_leg,min_X (µT)_leg,min_Y (m/s^2)_leg,min_Y (rad/s)_leg,min_Y (µT)_leg,min_Z (m/s^2)_leg,min_Z (rad/s)_leg,min_Z (µT)_leg
0,1970-01-01 00:01:14.236228590,3,Julian,circle,-2.528615,0.854238,9.607183,86.514656,,-0.181122,...,,-1.417702,-0.008362,123.241341,-9.436826,-0.025891,195.738083,-0.176849,-0.008046,78.316597
1,1970-01-01 00:01:14.486228590,3,Julian,circle,-2.184049,0.865005,9.752547,,,0.242972,...,,-1.818568,0.007293,122.754692,-9.682765,-0.033591,195.317673,-0.285953,-0.058599,78.195145
2,1970-01-01 00:01:14.736228590,3,Julian,circle,-2.126023,0.486939,9.944571,,,0.201280,...,1017.633820,-1.306932,0.006235,122.611519,-9.702673,-0.051410,194.987640,-0.163583,-0.021580,77.717529
3,1970-01-01 00:01:14.986228590,3,Julian,circle,-1.955534,0.155533,9.562318,,87.232205,0.096822,...,,-1.247057,-0.059553,122.610069,-9.458531,-0.051886,194.959137,-0.181838,-0.000762,77.923599
4,1970-01-01 00:01:15.236228590,3,Julian,circle,-2.541178,-0.178863,9.623933,86.514656,,0.067806,...,,-1.451532,-0.054143,122.635063,-9.386381,-0.008329,195.125153,-0.135919,-0.013119,77.808777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14565,1970-01-01 00:04:19.202577700,3,Martin,straight,,,,,,,...,,-2.522704,-0.247998,-9.559916,-11.075468,-0.654018,42.954319,-0.901329,-0.094059,7.501751
14566,1970-01-01 00:04:19.452577700,3,Martin,straight,,,,,,,...,1017.750397,-1.451681,-0.114819,-8.744414,-10.288106,-0.219959,42.755871,-0.271714,-0.076566,9.085091
14567,1970-01-01 00:04:19.702577700,3,Martin,straight,,,,,320.187022,,...,,-1.108894,-0.062142,-8.442186,-9.440119,-0.088548,42.923237,-0.031726,-0.051616,10.078011
14568,1970-01-01 00:04:19.952577700,3,Martin,straight,,,,,,,...,,,,,,,,,,


In [54]:
step_size = window_size * (1 - overlap_percentage)

step_size = int(np.ceil(step_size))

In [44]:
step_size

8

## Aggregating with mean

In [135]:
# create a new dataframe aggregating the data in windows of 80 rows with a step size of step_size grouping by Participant, Path and Run columns. using the last value of the End Time column to represent the end time of the window.

# Perform the rolling window mean calculation
rolling_means = (slim_df
                 .drop(['End Time'], axis=1)
                 .groupby(['Participant', 'Path', 'Run'])
                 .rolling(window=window_size, min_periods=1)
                 .mean()
                 .reset_index(drop=False))

# Select the last record of each window using step_size
# selected_indices = slim_df.groupby(['Participant', 'Path', 'Run']).apply(lambda x: x.index[::step_size]).reset_index(drop=True)

# Select the last record of each window using step_size
selected_indices = rolling_means.groupby(['Participant', 'Path', 'Run']).apply(
    lambda x: x.iloc[window_size-1::step_size]
).reset_index(drop=True)

# Optionally, extract the last 'End Time' for each window
# This assumes 'End Time' is a column in slim_df
end_times = slim_df.groupby(['Participant', 'Path', 'Run']).apply(
    lambda x: x['End Time'].iloc[window_size-1::step_size]
).reset_index(drop=False)

# Merge the selected indices with the original dataframe to get the final result

final_df = pd.merge(selected_indices, end_times, on=['Participant', 'Path', 'Run', 'level_3'])







In [136]:
# reorder the columns in the final dataframe so that end time is the first column

final_df = final_df[['End Time'] + final_df.columns[:-1].tolist()]

In [137]:
# drop the level_3 column

final_df = final_df.drop('level_3', axis=1)


In [138]:
final_df[final_df['Participant'] == 'Paul']

Unnamed: 0,End Time,Participant,Path,Run,max_Acceleration x (m/s^2)_head,max_Acceleration y (m/s^2)_head,max_Acceleration z (m/s^2)_head,max_Direction (°)_head,max_Direction (°)_leg,max_Gyroscope x (rad/s)_head,...,min_X (hPa)_leg,min_X (m/s^2)_leg,min_X (rad/s)_leg,min_X (µT)_leg,min_Y (m/s^2)_leg,min_Y (rad/s)_leg,min_Y (µT)_leg,min_Z (m/s^2)_leg,min_Z (rad/s)_leg,min_Z (µT)_leg
1097,1970-01-01 00:00:19.443174375,Paul,circle,1,1.765696,2.697317,10.270371,248.490251,184.570314,0.399717,...,1017.769964,-3.374005,-0.455431,1.773215,-4.036714,-0.514052,-18.099647,-1.567110,-0.437367,-28.363825
1098,1970-01-01 00:00:21.443174375,Paul,circle,1,1.616015,3.012968,10.223753,239.994258,173.803715,0.432771,...,1017.771694,-4.249971,-0.390526,5.430133,-4.717432,-0.464384,-13.690645,-1.499579,-0.446396,-25.062362
1099,1970-01-01 00:00:23.443174375,Paul,circle,1,0.816468,3.766310,9.877714,240.044957,175.113284,0.391889,...,1017.771107,-5.132163,-0.346049,7.910240,-5.494996,-0.447851,-8.082824,-1.451632,-0.476161,-20.385967
1100,1970-01-01 00:00:25.443174375,Paul,circle,1,0.691152,4.564122,9.585206,246.814536,189.580076,0.306570,...,1017.771787,-6.128236,-0.297085,9.884049,-6.106893,-0.425436,-1.135174,-1.348840,-0.477839,-16.795956
1101,1970-01-01 00:00:27.443174375,Paul,circle,1,0.814951,4.968913,9.636465,226.700345,189.441963,0.337560,...,1017.777822,-7.242221,-0.232402,12.587481,-6.900980,-0.414543,5.372318,-1.338451,-0.515266,-15.342298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1301,1970-01-01 00:01:13.420156209,Paul,straight,3,0.280678,6.166162,11.565835,207.241666,227.957637,0.538041,...,1017.657691,-10.575632,-0.449159,14.399783,-10.115436,-0.702960,36.867949,-2.174195,-0.848261,-2.948314
1302,1970-01-01 00:01:15.420156209,Paul,straight,3,0.372420,5.868674,11.529158,197.567117,237.422908,0.533530,...,1017.654266,-10.018005,-0.498442,13.295103,-10.343066,-0.765908,37.164859,-2.172623,-0.872678,-1.347099
1303,1970-01-01 00:01:17.420156209,Paul,straight,3,0.483956,6.086563,11.067687,205.582197,216.550055,0.504743,...,1017.656034,-9.374143,-0.482093,13.406412,-9.896224,-0.810511,32.583036,-2.278870,-0.699989,-3.907939
1304,1970-01-01 00:01:19.420156209,Paul,straight,3,0.599402,6.135728,10.777991,231.293670,243.750783,0.499518,...,1017.650018,-8.501130,-0.533147,10.461789,-9.099295,-0.851079,27.611279,-2.265472,-0.628307,-5.576325


In [139]:
# save the final dataframe to a csv file

final_df.to_csv('aggregated_data_80rows_250ms.csv', index=False)

In [122]:
rolling_means

Unnamed: 0,Participant,Path,Run,level_3,max_Acceleration x (m/s^2)_head,max_Acceleration y (m/s^2)_head,max_Acceleration z (m/s^2)_head,max_Direction (°)_head,max_Direction (°)_leg,max_Gyroscope x (rad/s)_head,...,min_X (hPa)_leg,min_X (m/s^2)_leg,min_X (rad/s)_leg,min_X (µT)_leg,min_Y (m/s^2)_leg,min_Y (rad/s)_leg,min_Y (µT)_leg,min_Z (m/s^2)_leg,min_Z (rad/s)_leg,min_Z (µT)_leg
0,Dany,circle,1,7782,-1.136591,-0.402592,10.048658,,,-0.392328,...,,,,,,,,,,
1,Dany,circle,1,7783,-1.117747,0.172881,10.222437,,,-0.244346,...,,,,,,,,,,
2,Dany,circle,1,7784,-1.136990,-0.274377,10.135399,,,-0.168904,...,,,,,,,,,,
3,Dany,circle,1,7785,-1.186541,-0.472582,10.085448,263.703186,,-0.020120,...,,,,,,,,,,
4,Dany,circle,1,7786,-1.459024,-0.389073,10.132647,263.703186,,0.133260,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14565,Paul,straight,3,10079,1.071341,5.679051,10.691244,234.457210,265.593693,0.498413,...,1017.647206,-8.203327,-0.534708,10.545596,-8.823087,-0.869453,25.919201,-2.265114,-0.583395,-5.420765
14566,Paul,straight,3,10080,1.123744,5.684031,10.675503,234.457210,265.593693,0.520142,...,1017.647206,-8.166988,-0.535131,10.348376,-8.847656,-0.874433,25.835895,-2.279793,-0.567666,-5.373441
14567,Paul,straight,3,10081,1.140943,5.677607,10.671166,234.457210,265.593693,0.519894,...,1017.647206,-8.120352,-0.544852,10.406526,-8.804371,-0.883169,25.688060,-2.280835,-0.545514,-5.330164
14568,Paul,straight,3,10082,1.117381,5.659579,10.645728,247.404543,259.952410,0.520765,...,1017.649740,-8.108655,-0.546183,10.482126,-8.774086,-0.879784,25.455084,-2.290057,-0.548339,-5.270283


In [121]:
selected_indices

Unnamed: 0,Participant,Path,Run,level_3,max_Acceleration x (m/s^2)_head,max_Acceleration y (m/s^2)_head,max_Acceleration z (m/s^2)_head,max_Direction (°)_head,max_Direction (°)_leg,max_Gyroscope x (rad/s)_head,...,min_X (hPa)_leg,min_X (m/s^2)_leg,min_X (rad/s)_leg,min_X (µT)_leg,min_Y (m/s^2)_leg,min_Y (rad/s)_leg,min_Y (µT)_leg,min_Z (m/s^2)_leg,min_Z (rad/s)_leg,min_Z (µT)_leg
0,Dany,circle,1,7861,-0.174856,3.718259,9.913330,231.000047,,0.286181,...,,,,,,,,,,
1,Dany,circle,1,7869,-0.063574,4.080630,9.843227,230.877146,,0.260505,...,,,,,,,,,,
2,Dany,circle,1,7877,-0.002632,4.155705,9.872405,234.449919,,0.282208,...,,,,,,,,,,
3,Dany,circle,1,7885,0.011000,4.233187,9.881662,242.378522,,0.288809,...,,,,,,,,,,
4,Dany,circle,1,7893,-0.115065,4.186939,9.902068,221.094806,,0.310472,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1301,Paul,straight,3,10044,0.280678,6.166162,11.565835,207.241666,227.957637,0.538041,...,1017.657691,-10.575632,-0.449159,14.399783,-10.115436,-0.702960,36.867949,-2.174195,-0.848261,-2.948314
1302,Paul,straight,3,10052,0.372420,5.868674,11.529158,197.567117,237.422908,0.533530,...,1017.654266,-10.018005,-0.498442,13.295103,-10.343066,-0.765908,37.164859,-2.172623,-0.872678,-1.347099
1303,Paul,straight,3,10060,0.483956,6.086563,11.067687,205.582197,216.550055,0.504743,...,1017.656034,-9.374143,-0.482093,13.406412,-9.896224,-0.810511,32.583036,-2.278870,-0.699989,-3.907939
1304,Paul,straight,3,10068,0.599402,6.135728,10.777991,231.293670,243.750783,0.499518,...,1017.650018,-8.501130,-0.533147,10.461789,-9.099295,-0.851079,27.611279,-2.265472,-0.628307,-5.576325


In [108]:
selected_indices

Unnamed: 0,Participant,Path,Run,level_3,max_Acceleration x (m/s^2)_head,max_Acceleration y (m/s^2)_head,max_Acceleration z (m/s^2)_head,max_Direction (°)_head,max_Direction (°)_leg,max_Gyroscope x (rad/s)_head,...,min_X (hPa)_leg,min_X (m/s^2)_leg,min_X (rad/s)_leg,min_X (µT)_leg,min_Y (m/s^2)_leg,min_Y (rad/s)_leg,min_Y (µT)_leg,min_Z (m/s^2)_leg,min_Z (rad/s)_leg,min_Z (µT)_leg
0,Dany,circle,1,7861,-0.174856,3.718259,9.913330,231.000047,,0.286181,...,,,,,,,,,,
1,Dany,circle,1,7869,-0.063574,4.080630,9.843227,230.877146,,0.260505,...,,,,,,,,,,
2,Dany,circle,1,7877,-0.002632,4.155705,9.872405,234.449919,,0.282208,...,,,,,,,,,,
3,Dany,circle,1,7885,0.011000,4.233187,9.881662,242.378522,,0.288809,...,,,,,,,,,,
4,Dany,circle,1,7893,-0.115065,4.186939,9.902068,221.094806,,0.310472,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1301,Paul,straight,3,10044,0.280678,6.166162,11.565835,207.241666,227.957637,0.538041,...,1017.657691,-10.575632,-0.449159,14.399783,-10.115436,-0.702960,36.867949,-2.174195,-0.848261,-2.948314
1302,Paul,straight,3,10052,0.372420,5.868674,11.529158,197.567117,237.422908,0.533530,...,1017.654266,-10.018005,-0.498442,13.295103,-10.343066,-0.765908,37.164859,-2.172623,-0.872678,-1.347099
1303,Paul,straight,3,10060,0.483956,6.086563,11.067687,205.582197,216.550055,0.504743,...,1017.656034,-9.374143,-0.482093,13.406412,-9.896224,-0.810511,32.583036,-2.278870,-0.699989,-3.907939
1304,Paul,straight,3,10068,0.599402,6.135728,10.777991,231.293670,243.750783,0.499518,...,1017.650018,-8.501130,-0.533147,10.461789,-9.099295,-0.851079,27.611279,-2.265472,-0.628307,-5.576325


In [91]:
aggregated_df[aggregated_df['Participant'] == 'Paul']

Unnamed: 0,Participant,Path,Run,End Time,max_Acceleration x (m/s^2)_head,max_Acceleration y (m/s^2)_head,max_Acceleration z (m/s^2)_head,max_Direction (°)_head,max_Direction (°)_leg,max_Gyroscope x (rad/s)_head,...,min_X (hPa)_leg,min_X (m/s^2)_leg,min_X (rad/s)_leg,min_X (µT)_leg,min_Y (m/s^2)_leg,min_Y (rad/s)_leg,min_Y (µT)_leg,min_Z (m/s^2)_leg,min_Z (rad/s)_leg,min_Z (µT)_leg
1536,Paul,circle,1,1970-01-01 00:00:00.693174375,0.972483,6.098308,8.413763,260.135803,,-0.049582,...,1017.746353,-0.607920,-0.319922,0.625681,-0.321893,-0.133885,-37.534379,-0.483850,-0.100894,-28.284395
1537,Paul,circle,1,1970-01-01 00:00:02.693174375,5.252137,3.911993,11.445088,260.135803,,0.423663,...,1017.748311,-0.528477,-0.313696,1.222175,-0.483830,-0.265215,-36.051968,-0.524016,-0.185630,-30.924236
1538,Paul,circle,1,1970-01-01 00:00:04.693174375,5.785247,1.934093,11.624679,260.135803,,0.815135,...,1017.751221,-0.426060,-0.217944,2.271941,-0.399379,-0.240487,-36.286431,-0.561132,-0.162040,-30.500476
1539,Paul,circle,1,1970-01-01 00:00:06.693174375,4.328838,2.134797,11.131012,260.135803,,0.705284,...,1017.751977,-0.337178,-0.168571,2.464992,-0.323184,-0.184187,-36.870851,-0.478306,-0.116299,-29.672081
1540,Paul,circle,1,1970-01-01 00:00:08.693174375,3.482001,2.608672,10.875584,260.135803,,0.549211,...,1017.752194,-0.377916,-0.239930,1.512689,-0.333162,-0.229398,-37.190186,-0.526507,-0.108354,-29.540151
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,Paul,straight,3,1970-01-01 00:01:14.670156209,0.283161,5.926020,11.539656,202.302045,242.075564,0.515188,...,1017.659495,-10.083590,-0.426928,13.645502,-10.220170,-0.685592,37.191964,-2.062375,-0.801272,-1.986403
1808,Paul,straight,3,1970-01-01 00:01:16.670156209,0.431867,6.008280,11.244958,192.700833,227.595207,0.517012,...,1017.656034,-9.628700,-0.492325,13.979033,-10.213533,-0.820875,35.196319,-2.270115,-0.732656,-2.842232
1809,Paul,straight,3,1970-01-01 00:01:18.670156209,0.556466,6.055374,10.903465,218.444045,228.623724,0.499459,...,1017.650018,-8.727640,-0.527043,10.984718,-9.249225,-0.846011,28.593160,-2.222123,-0.649619,-5.373074
1810,Paul,straight,3,1970-01-01 00:01:20.670156209,0.636124,6.123659,10.616408,244.197991,256.456974,0.456841,...,1017.648015,-8.435512,-0.529817,10.414927,-9.061630,-0.854241,26.912382,-2.270390,-0.610301,-5.409832


# Testing

In [79]:
test_df = {'Participant': ['Paul','Paul','Paul','Mark','Mark','Mark','John','John','John'], 'Path': ['A','A','A','A','A','A','A','A','A'], 'Run': [1,1,1,1,1,1,1,1,1], 'Feature1': [1,2,3,4,5,6,7,8,9], 'Feature2': [10,20,30,40,50,60,70,80,90]}

test_df = pd.DataFrame(test_df)

In [81]:
test_df

Unnamed: 0,Participant,Path,Run,Feature1,Feature2
0,Paul,A,1,1,10
1,Paul,A,1,2,20
2,Paul,A,1,3,30
3,Mark,A,1,4,40
4,Mark,A,1,5,50
5,Mark,A,1,6,60
6,John,A,1,7,70
7,John,A,1,8,80
8,John,A,1,9,90


In [88]:
test_df.groupby(['Participant', 'Path', 'Run']).rolling(window=3, min_periods=1).agg('mean').apply(lambda x: x.iloc[::-1])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Feature1,Feature2
Participant,Path,Run,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Paul,A,1,2,2.0,20.0
Paul,A,1,1,1.5,15.0
Paul,A,1,0,1.0,10.0
Mark,A,1,5,5.0,50.0
Mark,A,1,4,4.5,45.0
Mark,A,1,3,4.0,40.0
John,A,1,8,8.0,80.0
John,A,1,7,7.5,75.0
John,A,1,6,7.0,70.0


In [None]:
agg_df = slim_df.groupby(['Participant', 'Path', 'Run']).rolling(window=window_size, min_periods=1).mean().reset_index().dropna()