# Preprocessing

1. Import pandas and numpy libraries.
2. Read the Esense sensor readings csv file into a pandas Dataframe and converting UNIX time to Local TimeZone
3. Upsample Esense data to 100Hz to match with that of Bangle.js Data

In [1]:
import pandas as pd
import numpy as np

df_sub1_esense = pd.read_csv('..\sensor_data\eSense_sbj1.csv', names=['time','Device','E1','E2','E3','E4','E5','E6','Label'], index_col=0)
df_sub1_esense.index = pd.to_datetime(df_sub1_esense.index, unit='ms') + pd.Timedelta('02:00:00')
df_sub1_esense = df_sub1_esense.resample('10ms', origin='start').first().interpolate()
#df_sub1_esense.sort_index()

df_sub2_esense = pd.read_csv('..\sensor_data\eSense_sbj2.csv', names=['time','Device','E1','E2','E3','E4','E5','E6','Label'], index_col=0)
df_sub2_esense.index = pd.to_datetime(df_sub2_esense.index, unit='ms') + pd.Timedelta('02:00:00')
df_sub2_esense = df_sub2_esense.resample('10ms', origin='start').first().interpolate()
#df_sub2_esense.sort_index()

1. Read Bangle.js ankle sensor readings csv file into pandas Dataframe for each individual activity
2. Correcting timestamps by keeping the frequency at 92 Hz
3. Upsample Bangle.js data to 100Hz to match with that of Esense Data
4. Merge all activities dataframes into a single dataframe for Ankle sensor

In [2]:
df_sub1_ankle_dribbling = pd.read_csv('..\sensor_data\dribbling_sbj1_ankle.csv', index_col=0)
df_sub1_ankle_dribbling.index = pd.date_range(start='2021-04-14 18:22:13.553', periods = df_sub1_ankle_dribbling.shape[0], freq="10.869565ms")
df_sub1_ankle_dribbling = df_sub1_ankle_dribbling.resample('10ms', origin='start').first().interpolate()

df_sub1_ankle_layup = pd.read_csv('..\sensor_data\layup_sbj1_ankle.csv', index_col=0)
df_sub1_ankle_layup.index = pd.date_range(start='2021-04-14 18:22:13.553', periods = df_sub1_ankle_layup.shape[0], freq="10.869565ms")
df_sub1_ankle_layup = df_sub1_ankle_layup.resample('10ms', origin='start').first().interpolate()

df_sub1_ankle_shooting = pd.read_csv('..\sensor_data\shooting_sbj1_ankle.csv', index_col=0)
df_sub1_ankle_shooting.index = pd.date_range(start='2021-04-14 18:12:54.023', periods = df_sub1_ankle_shooting.shape[0], freq="10.869565ms")
df_sub1_ankle_shooting = df_sub1_ankle_shooting.resample('10ms', origin='start').first().interpolate()

df_sub1_ankle_movements = pd.read_csv('..\sensor_data\movements_sbj1_ankle_1.csv', index_col=0)
df_sub1_ankle_movements.index = pd.date_range(start='2021-04-14 18:42:32.023', periods = df_sub1_ankle_movements.shape[0], freq="10.869565ms")
df_sub1_ankle_movements = df_sub1_ankle_movements.resample('10ms', origin='start').first().interpolate()

df_sub1_ankle = pd.concat([df_sub1_ankle_layup, df_sub1_ankle_shooting, df_sub1_ankle_movements])
df_sub1_ankle['acc_z'] = df_sub1_ankle['acc_z'].replace({';':''}, regex=True)
df_sub1_ankle = df_sub1_ankle.set_axis(['A1', 'A2', 'A3'], axis=1)
df_sub1_ankle['A3'] = df_sub1_ankle['A3'].astype(np.float64)
#df_sub1_ankle.sort_index()


df_sub2_ankle_dribbling = pd.read_csv('..\sensor_data\dribbling_sbj2_ankle.csv', index_col=0)
df_sub2_ankle_dribbling.index = pd.date_range(start='2021-04-14 19:26:42.914', periods = df_sub2_ankle_dribbling.shape[0], freq="10.869565ms")
df_sub2_ankle_dribbling = df_sub2_ankle_dribbling.resample('10ms', origin='start').first().interpolate()

df_sub2_ankle_layup = pd.read_csv('..\sensor_data\layup_sbj2_ankle.csv', index_col=0)
df_sub2_ankle_layup.index = pd.date_range(start='2021-04-14 19:16:25.794', periods = df_sub2_ankle_layup.shape[0], freq="10.869565ms")
df_sub2_ankle_layup = df_sub2_ankle_layup.resample('10ms', origin='start').first().interpolate()

df_sub2_ankle_shooting = pd.read_csv('..\sensor_data\shooting_sbj2_ankle_1.csv', index_col=0)
df_sub2_ankle_shooting.index = pd.date_range(start='2021-04-14 18:55:50.364', periods = df_sub2_ankle_shooting.shape[0], freq="10.869565ms")
df_sub2_ankle_shooting = df_sub2_ankle_shooting.resample('10ms', origin='start').first().interpolate()

df_sub2_ankle_movements = pd.read_csv('..\sensor_data\movements_sbj2_ankle_1.csv', index_col=0)
df_sub2_ankle_movements.index = pd.date_range(start='2021-04-14 19:36:19.244', periods = df_sub2_ankle_movements.shape[0], freq="10.869565ms")
df_sub2_ankle_movements = df_sub2_ankle_movements.resample('10ms', origin='start').first().interpolate()

df_sub2_ankle = pd.concat([df_sub2_ankle_dribbling, df_sub2_ankle_layup, df_sub2_ankle_shooting, df_sub2_ankle_movements])
df_sub2_ankle['acc_z'] = df_sub2_ankle['acc_z'].replace({';':''}, regex=True)
df_sub2_ankle = df_sub2_ankle.set_axis(['A1', 'A2', 'A3'], axis=1)
df_sub2_ankle['A3'] = df_sub2_ankle['A3'].astype(np.float64)
#df_sub2_ankle.sort_index()

1. Read Bangle.js wrist sensor readings csv file into pandas Dataframe for each individual activity (of Subject2)
2. Correcting timestamps by keeping the frequency at 92 Hz
3. Upsample Bangle.js data to 100Hz to match with that of Esense Data
4. Merge all activities dataframes into a single dataframe for Wrist sensor

In [3]:
df_sub1_wrist_dribbling = pd.read_csv('..\sensor_data\dribbling_sbj1_wrist.csv', index_col=0)
df_sub1_wrist_dribbling.index = pd.date_range(start='2021-04-14 18:32:17.683', periods = df_sub1_wrist_dribbling.shape[0], freq="10.869565ms")
df_sub1_wrist_dribbling = df_sub1_wrist_dribbling.resample('10ms', origin='start').first().interpolate()

df_sub1_wrist_layup = pd.read_csv('..\sensor_data\layup_sbj1_wrist.csv', index_col=0)
df_sub1_wrist_layup.index = pd.date_range(start='2021-04-14 18:22:52.523', periods = df_sub1_wrist_layup.shape[0], freq="10.869565ms")
df_sub1_wrist_layup = df_sub1_wrist_layup.resample('10ms', origin='start').first().interpolate()

df_sub1_wrist_shooting = pd.read_csv('..\sensor_data\shooting_sbj1_wrist.csv', index_col=0)
df_sub1_wrist_shooting.index = pd.date_range(start='2021-04-14 18:13:12.833', periods = df_sub1_wrist_shooting.shape[0], freq="10.869565ms")
df_sub1_wrist_shooting = df_sub1_wrist_shooting.resample('10ms', origin='start').first().interpolate()

df_sub1_wrist_movements = pd.read_csv('..\sensor_data\movements_sbj1_wrist.csv', index_col=0)
df_sub1_wrist_movements.index = pd.date_range(start='2021-04-14 18:42:05.243', periods = df_sub1_wrist_movements.shape[0], freq="10.869565ms")
df_sub1_wrist_movements = df_sub1_wrist_movements.resample('10ms', origin='start').first().interpolate()

df_sub1_wrist = pd.concat([df_sub1_wrist_layup, df_sub1_wrist_shooting, df_sub1_wrist_movements])
df_sub1_wrist['acc_z'] = df_sub1_wrist['acc_z'].replace({';':''}, regex=True)
df_sub1_wrist = df_sub1_wrist.set_axis(['W1', 'W2', 'W3'], axis=1)
df_sub1_wrist['W3'] = df_sub1_wrist['W3'].astype(np.float64)
#df_sub1_wrist.sort_index()


df_sub2_wrist_dribbling = pd.read_csv('..\sensor_data\dribbling_sbj2_wrist.csv', index_col=0)
df_sub2_wrist_dribbling.index = pd.date_range(start='2021-04-14 19:26:24.464', periods = df_sub2_wrist_dribbling.shape[0], freq="10.869565ms")
df_sub2_wrist_dribbling = df_sub2_wrist_dribbling.resample('10ms', origin='start').first().interpolate()

df_sub2_wrist_layup = pd.read_csv('..\sensor_data\layup_sbj2_wrist.csv', index_col=0)
df_sub2_wrist_layup.index = pd.date_range(start='2021-04-14 19:16:05.604', periods = df_sub2_wrist_layup.shape[0], freq="10.869565ms")
df_sub2_wrist_layup = df_sub2_wrist_layup.resample('10ms', origin='start').first().interpolate()

df_sub2_wrist_shooting = pd.read_csv('..\sensor_data\shooting_sbj2_wrist.csv', index_col=0)
df_sub2_wrist_shooting.index = pd.date_range(start='2021-04-14 18:56:21.474', periods = df_sub2_wrist_shooting.shape[0], freq="10.869565ms")
df_sub2_wrist_shooting = df_sub2_wrist_shooting.resample('10ms', origin='start').first().interpolate()

df_sub2_wrist_movements = pd.read_csv('..\sensor_data\movements_sbj2_wrist.csv', index_col=0)
df_sub2_wrist_movements.index = pd.date_range(start='2021-04-14 19:36:45.074', periods = df_sub2_wrist_movements.shape[0], freq="10.869565ms")
df_sub2_wrist_movements = df_sub2_wrist_movements.resample('10ms', origin='start').first().interpolate()

df_sub2_wrist = pd.concat([df_sub2_wrist_dribbling, df_sub2_wrist_layup, df_sub2_wrist_shooting, df_sub2_wrist_movements])
df_sub2_wrist['acc_z'] = df_sub2_wrist['acc_z'].replace({';':''}, regex=True)
df_sub2_wrist = df_sub2_wrist.set_axis(['W1', 'W2', 'W3'], axis=1)
df_sub2_wrist['W3'] = df_sub2_wrist['W3'].astype(np.float64)
#df_sub2_wrist.sort_index()

1. Merge Esensor, Ankle, Wrist sensor dataframes into a single dataframe Subject2
2. Relabel all the records as null class
3. Relabel the records into 5 activites as per the Timestamps from the Videos

In [4]:
df_sub1 = pd.concat([df_sub1_esense, df_sub1_ankle, df_sub1_wrist], axis=1, join="inner")
df_sub1.Label = 'null'

#df_sub1.loc[pd.to_datetime('2021-04-14 19:27:01.000'):pd.to_datetime('2021-04-14 19:29:55.000'), 'Label'] = 'dribbling'
df_sub1.loc[pd.to_datetime('2021-04-14 18:23:20.000'):pd.to_datetime('2021-04-14 18:25:25.000'), 'Label'] = 'layup'
df_sub1.loc[pd.to_datetime('2021-04-14 18:45:18.000'):pd.to_datetime('2021-04-14 18:47:11.000'), 'Label'] = 'running'
df_sub1.loc[pd.to_datetime('2021-04-14 18:13:30.000'):pd.to_datetime('2021-04-14 18:15:40.000'), 'Label'] = 'shooting'
df_sub1.loc[pd.to_datetime('2021-04-14 18:42:48.000'):pd.to_datetime('2021-04-14 18:45:17.000'), 'Label'] = 'walking'
#df_sub1


df_sub2 = pd.concat([df_sub2_esense, df_sub2_ankle, df_sub2_wrist], axis=1, join="inner")
df_sub2.Label = 'null'

df_sub2.loc[pd.to_datetime('2021-04-14 19:27:01.000'):pd.to_datetime('2021-04-14 19:29:55.000'), 'Label'] = 'dribbling'
df_sub2.loc[pd.to_datetime('2021-04-14 19:17:40.000'):pd.to_datetime('2021-04-14 19:20:10.000'), 'Label'] = 'layup'
df_sub2.loc[pd.to_datetime('2021-04-14 19:39:47.000'):pd.to_datetime('2021-04-14 19:41:50.000'), 'Label'] = 'running'
df_sub2.loc[pd.to_datetime('2021-04-14 19:00:18.000'):pd.to_datetime('2021-04-14 19:01:42.000'), 'Label'] = 'shooting'
df_sub2.loc[pd.to_datetime('2021-04-14 19:37:18.000'):pd.to_datetime('2021-04-14 19:39:46.000'), 'Label'] = 'walking'
#df_sub2

1. Drop unnecessary Columns
2. Drop all rows with any missing values

In [5]:
df_sub1 = df_sub1.drop(columns=['Device'])
df_sub1.dropna(inplace=True)
df_sub1.reset_index(drop=True, inplace=True)
#df_sub1


df_sub2 = df_sub2.drop(columns=['Device'])
df_sub2.dropna(inplace=True)
df_sub2.reset_index(drop=True, inplace=True)
#df_sub2

Normalization
1. Zero mean and Unit Variance standardization

In [6]:
column = ['E1', 'E2', 'E3', 'E4', 'E5', 'E6', 'A1', 'A2', 'A3', 'W1', 'W2', 'W3']
df_sub1[column] = (df_sub1[column] - df_sub1[column].mean()) / df_sub1[column].std()    
#df_sub1

df_sub2[column] = (df_sub2[column] - df_sub2[column].mean()) / df_sub2[column].std()    
#df_sub2

1. Add new Subject Label to the Dataframes and merge them together

In [7]:
df_sub1['Subject'] = 'Subject1'

df_sub2['Subject'] = 'Subject2'

df = pd.concat([df_sub1, df_sub2])
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,E1,E2,E3,E4,E5,E6,Label,A1,A2,A3,W1,W2,W3,Subject
0,0.618401,1.691442,-0.976735,-0.774083,0.433445,0.604095,,0.107023,0.388318,0.481222,0.669849,-0.174150,0.433733,Subject1
1,0.617975,1.697821,-0.954008,-0.743713,0.427695,0.626773,,0.114494,0.376683,0.497912,0.693698,-0.322711,0.356920,Subject1
2,0.617548,1.704199,-0.931281,-0.713343,0.421946,0.649452,,0.089287,0.319821,0.550984,0.806475,-0.580197,0.278296,Subject1
3,0.617121,1.710577,-0.908553,-0.682973,0.416196,0.672131,,0.055984,0.270973,0.681915,0.800686,-0.686583,0.175765,Subject1
4,0.616694,1.716956,-0.885826,-0.652603,0.410446,0.694809,,0.068428,0.298372,0.765344,0.738880,-0.566457,0.123586,Subject1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137198,-0.158875,1.151302,-2.776552,0.241354,0.291022,-0.113816,,0.295287,-0.234971,-0.366231,-0.674764,0.558879,0.569187,Subject2
137199,-0.147549,1.158013,-2.761551,0.260865,0.304502,-0.146412,,0.303762,-0.234716,-0.373863,-0.669681,0.539690,0.566564,Subject2
137200,-0.136222,1.164724,-2.746550,0.280376,0.317983,-0.179008,,0.307605,-0.237057,-0.376407,-0.669387,0.533298,0.561288,Subject2
137201,-0.124895,1.171435,-2.731549,0.299887,0.331463,-0.211605,,0.304518,-0.239909,-0.360121,-0.668786,0.531372,0.558264,Subject2


1. Verify and get Number of records for each activity

In [8]:
df.loc[df['Label'] == 'running']

Unnamed: 0,E1,E2,E3,E4,E5,E6,Label,A1,A2,A3,W1,W2,W3,Subject
49219,0.480937,0.072879,0.507433,-0.498172,-0.165455,-0.983526,running,-1.691847,-0.094243,1.208185,0.050730,-1.176338,-0.018435,Subject1
49220,0.476914,-0.077087,0.619230,-0.648300,-0.541059,-1.277799,running,-1.805156,0.158540,1.491809,0.043861,-1.110531,-0.012276,Subject1
49221,0.363506,-0.287977,0.753364,-0.731056,-0.603498,-1.468077,running,-1.881107,0.494028,1.623754,0.079272,-1.014678,0.030107,Subject1
49222,0.335259,-0.309267,0.772180,-0.731310,-0.574295,-1.473073,running,-1.736051,0.994163,1.442257,0.112537,-0.953987,0.048598,Subject1
49223,0.307012,-0.330557,0.790997,-0.731563,-0.545092,-1.478068,running,-1.248594,1.452173,1.073713,0.146142,-0.886897,0.036993,Subject1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135585,2.181556,0.435621,-0.515452,-0.036742,-0.444067,0.267669,running,0.658712,-0.603652,0.060416,-0.632295,0.463261,-0.290277,Subject2
135586,2.299563,0.497986,-0.604347,-0.070799,-0.370973,0.264087,running,0.670242,-0.599757,0.049718,-0.693016,0.506119,-0.301199,Subject2
135587,2.358566,0.529168,-0.648795,-0.087827,-0.334426,0.262296,running,0.680622,-0.597937,0.020189,-0.740263,0.461021,-0.295182,Subject2
135588,2.417569,0.560350,-0.693242,-0.104856,-0.297879,0.260505,running,0.692168,-0.598459,0.003402,-0.784521,0.378841,-0.282748,Subject2


In [9]:
df.loc[df['Subject'] == 'Subject2']

Unnamed: 0,E1,E2,E3,E4,E5,E6,Label,A1,A2,A3,W1,W2,W3,Subject
61313,-0.390557,0.235895,-0.152388,0.129028,0.379993,-0.160382,,0.095712,0.218920,0.078746,-0.227972,-0.513043,1.542037,Subject2
61314,-0.391954,0.236560,-0.156674,0.123981,0.378529,-0.155636,,0.098784,0.226199,0.075681,-0.222288,-0.444932,1.526225,Subject2
61315,-0.393352,0.237226,-0.160960,0.118934,0.377065,-0.150889,,0.100328,0.231392,0.081791,-0.236939,-0.417110,1.506633,Subject2
61316,-0.394749,0.237891,-0.165246,0.113887,0.375602,-0.146143,,0.101084,0.232169,0.087401,-0.256085,-0.403030,1.471984,Subject2
61317,-0.396147,0.238557,-0.169532,0.108840,0.374138,-0.141397,,0.095712,0.232169,0.114385,-0.277607,-0.393756,1.448257,Subject2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137198,-0.158875,1.151302,-2.776552,0.241354,0.291022,-0.113816,,0.295287,-0.234971,-0.366231,-0.674764,0.558879,0.569187,Subject2
137199,-0.147549,1.158013,-2.761551,0.260865,0.304502,-0.146412,,0.303762,-0.234716,-0.373863,-0.669681,0.539690,0.566564,Subject2
137200,-0.136222,1.164724,-2.746550,0.280376,0.317983,-0.179008,,0.307605,-0.237057,-0.376407,-0.669387,0.533298,0.561288,Subject2
137201,-0.124895,1.171435,-2.731549,0.299887,0.331463,-0.211605,,0.304518,-0.239909,-0.360121,-0.668786,0.531372,0.558264,Subject2


In [18]:
df

Unnamed: 0,E1,E2,E3,E4,E5,E6,Label,A1,A2,A3,W1,W2,W3,Subject
0,0.618401,1.691442,-0.976735,-0.774083,0.433445,0.604095,,0.107023,0.388318,0.481222,0.669849,-0.174150,0.433733,Subject1
1,0.617975,1.697821,-0.954008,-0.743713,0.427695,0.626773,,0.114494,0.376683,0.497912,0.693698,-0.322711,0.356920,Subject1
2,0.617548,1.704199,-0.931281,-0.713343,0.421946,0.649452,,0.089287,0.319821,0.550984,0.806475,-0.580197,0.278296,Subject1
3,0.617121,1.710577,-0.908553,-0.682973,0.416196,0.672131,,0.055984,0.270973,0.681915,0.800686,-0.686583,0.175765,Subject1
4,0.616694,1.716956,-0.885826,-0.652603,0.410446,0.694809,,0.068428,0.298372,0.765344,0.738880,-0.566457,0.123586,Subject1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137198,-0.158875,1.151302,-2.776552,0.241354,0.291022,-0.113816,,0.295287,-0.234971,-0.366231,-0.674764,0.558879,0.569187,Subject2
137199,-0.147549,1.158013,-2.761551,0.260865,0.304502,-0.146412,,0.303762,-0.234716,-0.373863,-0.669681,0.539690,0.566564,Subject2
137200,-0.136222,1.164724,-2.746550,0.280376,0.317983,-0.179008,,0.307605,-0.237057,-0.376407,-0.669387,0.533298,0.561288,Subject2
137201,-0.124895,1.171435,-2.731549,0.299887,0.331463,-0.211605,,0.304518,-0.239909,-0.360121,-0.668786,0.531372,0.558264,Subject2


In [16]:
df2 = pd.read_csv('..\sensor_data\Dataset.csv')

In [17]:
df2

Unnamed: 0,E1,E2,E3,E4,E5,E6,Label,A1,A2,A3,W1,W2,W3,Subject
0,0.618401,1.691442,-0.976735,-0.774083,0.433445,0.604095,,0.107023,0.388318,0.481222,0.669849,-0.174150,0.433733,Subject1
1,0.617975,1.697821,-0.954008,-0.743713,0.427695,0.626773,,0.114494,0.376683,0.497912,0.693698,-0.322711,0.356920,Subject1
2,0.617548,1.704199,-0.931281,-0.713343,0.421946,0.649452,,0.089287,0.319821,0.550984,0.806475,-0.580197,0.278296,Subject1
3,0.617121,1.710577,-0.908553,-0.682973,0.416196,0.672131,,0.055984,0.270973,0.681915,0.800686,-0.686583,0.175765,Subject1
4,0.616694,1.716956,-0.885826,-0.652603,0.410446,0.694809,,0.068428,0.298372,0.765344,0.738880,-0.566457,0.123586,Subject1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137198,-0.158875,1.151302,-2.776552,0.241354,0.291022,-0.113816,,0.295287,-0.234971,-0.366231,-0.674764,0.558879,0.569187,Subject2
137199,-0.147549,1.158013,-2.761551,0.260865,0.304502,-0.146412,,0.303762,-0.234716,-0.373863,-0.669681,0.539690,0.566564,Subject2
137200,-0.136222,1.164724,-2.746550,0.280376,0.317983,-0.179008,,0.307605,-0.237057,-0.376407,-0.669387,0.533298,0.561288,Subject2
137201,-0.124895,1.171435,-2.731549,0.299887,0.331463,-0.211605,,0.304518,-0.239909,-0.360121,-0.668786,0.531372,0.558264,Subject2
