In [1]:
import pandas as pd
import numpy as np
import os
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

In [2]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV


In [3]:
def preprocess(df, name):
    df_gyro = pd.DataFrame(columns=['time','GY(x)', 'GY(y)', 'GY(z)'])
    df_acc = pd.DataFrame(columns=['time','LA(x)', 'LA(y)', 'LA(z)'])
    
    df_gyro[['time','GY(x)', 'GY(y)', 'GY(z)']] = df[['TimeStamp','GY(x)', 'GY(y)', 'GY(z)']].copy()
#     df_gyro['name'] = df_gyro['name'].fillna(name)
    df_gyro = df_gyro.dropna()
    df_acc[['time','LA(x)', 'LA(y)', 'LA(z)']] = df[['TimeStamp','LA(x)', 'LA(y)', 'LA(z)']].copy()
#     df_acc['name'] = df_acc['name'].fillna(name)
    df_acc = df_acc.dropna()
    
    df_gyro['time'] = pd.to_datetime(df_gyro['time'], unit='ms')
    df_indexed_gyro = df_gyro.set_index('time')
    resampled_df_gyro = df_indexed_gyro.resample('5ms').mean().interpolate()
    resampled_df_gyro['name'] = name
    resampled_df_gyro.reset_index(inplace=True)
    
    df_acc['time'] = pd.to_datetime(df_acc['time'], unit='ms')
    df_indexed_acc = df_acc.set_index('time')
    resampled_df_acc = df_indexed_acc.resample('5ms').mean().interpolate()
#     resampled_df_acc['name'] = name
    resampled_df_acc.reset_index(inplace=True)
    
    # Inner join: Only the common values in both DataFrames are included in the merged DataFrame (inner_merged_df).
    
    inner_merged_df = pd.merge(resampled_df_acc, resampled_df_gyro, on='time', how='inner')
    
    return inner_merged_df

In [4]:
df_final = pd.DataFrame(columns=['name','time','LA(x)', 'LA(y)', 'LA(z)','GY(x)', 'GY(y)', 'GY(z)'])
directory = os.path.join(os.getcwd(), 'straightWalk')
x=1
firstTime = True
for filename in os.listdir(directory):
    if (filename.endswith(".csv")):
        filepath = os.path.join(directory, filename)
        df_original = pd.read_csv(filepath)
        
#         num_rows_before = df_original.shape[0]
    
#         twenty = num_rows_before * 0.20
#         twenty = math.ceil(twenty)
        
#         #test = test.iloc[fivePercent:]
#         df_original_remove20 = df_original.iloc[:-twenty]
        
         
       
        df_processed = preprocess(df_original, filename[:-4])

        df_final = pd.concat([df_final, df_processed])
        
df_final

Unnamed: 0,name,time,LA(x),LA(y),LA(z),GY(x),GY(y),GY(z)
0,alibadawy,1970-01-01 00:00:00.000,-0.268265,-0.005538,-0.071094,-0.016798,0.103233,0.033138
1,alibadawy,1970-01-01 00:00:00.005,-0.163861,-0.004340,0.122527,-0.017613,0.110563,0.034157
2,alibadawy,1970-01-01 00:00:00.010,-0.018824,-0.011455,0.059537,-0.018936,0.123697,0.031000
3,alibadawy,1970-01-01 00:00:00.015,0.043948,-0.010231,0.135187,-0.019445,0.136423,0.030288
4,alibadawy,1970-01-01 00:00:00.020,0.106720,-0.009007,0.210836,-0.013744,0.146603,0.027030
...,...,...,...,...,...,...,...,...
5275,zoz,1970-01-01 00:00:26.375,0.032646,0.028275,0.519662,0.135150,-0.022907,0.223494
5276,zoz,1970-01-01 00:00:26.380,-0.478584,-0.296353,0.069982,0.152254,-0.098041,0.268391
5277,zoz,1970-01-01 00:00:26.385,-0.897290,-0.563466,-0.287334,0.155308,-0.165234,0.318786
5278,zoz,1970-01-01 00:00:26.390,-1.035197,-0.655348,-0.474507,0.145534,-0.212269,0.362156


In [5]:
df_Xtime = df_final.drop(['time'], axis = 1).copy()
df_Xtime.shape

(155302, 7)

In [6]:
label = LabelEncoder()
df_Xtime['label'] = label.fit_transform(df_Xtime['name'])
df_Xtime

Unnamed: 0,name,LA(x),LA(y),LA(z),GY(x),GY(y),GY(z),label
0,alibadawy,-0.268265,-0.005538,-0.071094,-0.016798,0.103233,0.033138,0
1,alibadawy,-0.163861,-0.004340,0.122527,-0.017613,0.110563,0.034157,0
2,alibadawy,-0.018824,-0.011455,0.059537,-0.018936,0.123697,0.031000,0
3,alibadawy,0.043948,-0.010231,0.135187,-0.019445,0.136423,0.030288,0
4,alibadawy,0.106720,-0.009007,0.210836,-0.013744,0.146603,0.027030,0
...,...,...,...,...,...,...,...,...
5275,zoz,0.032646,0.028275,0.519662,0.135150,-0.022907,0.223494,30
5276,zoz,-0.478584,-0.296353,0.069982,0.152254,-0.098041,0.268391,30
5277,zoz,-0.897290,-0.563466,-0.287334,0.155308,-0.165234,0.318786,30
5278,zoz,-1.035197,-0.655348,-0.474507,0.145534,-0.212269,0.362156,30


In [7]:
X = df_Xtime[['LA(x)', 'LA(y)', 'LA(z)','GY(x)', 'GY(y)', 'GY(z)']].copy()
y = df_Xtime['label'].copy()

In [8]:
scaler = StandardScaler()
X_transformed = X.copy()
X_transformed = scaler.fit_transform(X_transformed)

scaled_X = pd.DataFrame(data = X_transformed, columns = ['LA(x)', 'LA(y)', 'LA(z)','GY(x)', 'GY(y)', 'GY(z)'])
scaled_X['label'] = y.values

scaled_X

Unnamed: 0,LA(x),LA(y),LA(z),GY(x),GY(y),GY(z),label
0,-0.157996,0.116219,-0.109081,-0.070382,0.176505,0.142357,0
1,-0.120536,0.116679,-0.048151,-0.071206,0.181998,0.143630,0
2,-0.068498,0.113950,-0.067973,-0.072545,0.191841,0.139684,0
3,-0.045976,0.114420,-0.044167,-0.073060,0.201378,0.138793,0
4,-0.023454,0.114889,-0.020362,-0.067292,0.209007,0.134721,0
...,...,...,...,...,...,...,...
155297,-0.050031,0.129187,0.076822,0.083323,0.081973,0.380324,30
155298,-0.233456,0.004690,-0.064686,0.100625,0.025666,0.436450,30
155299,-0.383684,-0.097750,-0.177129,0.103714,-0.024689,0.499450,30
155300,-0.433164,-0.132987,-0.236030,0.093828,-0.059938,0.553668,30


In [9]:
import scipy.stats as stats

Fs = 200
frame_size = Fs*9
hop_size = int(Fs*2)

In [10]:
df_framed = pd.DataFrame(columns=['name','mean_LA(x)', 'mean_LA(y)', 'mean_LA(z)', 'mean_GY(x)', 'mean_GY(y)', 'mean_GY(z)','sd_LA(x)', 'sd_LA(y)', 'sd_LA(z)', 'sd_GY(x)', 'sd_GY(y)', 'sd_GY(z)'])
def get_frames(df, frame_size, hop_size):
    
    last_value = df['label'].iloc[-1]
    for j in range (0,last_value+1): 
        filtered_df = df[df['label'] == j]
        filtered_df = filtered_df.drop('label', axis = 1)
        for i in range(0, len(filtered_df) - frame_size, hop_size):
            frame = filtered_df[i: i + frame_size]

            new_row = [j
                       ,frame['LA(x)'].mean(), frame['LA(y)'].mean(), frame['LA(z)'].mean(),
                       frame['GY(x)'].mean(), frame['GY(y)'].mean(), frame['GY(z)'].mean(),
                       frame['LA(x)'].std(), frame['LA(y)'].std(), frame['LA(z)'].std(),
                       frame['GY(x)'].std(), frame['GY(y)'].std(), frame['GY(z)'].std()]



            df_framed.loc[len(df_framed)] = new_row


    return df_framed

In [11]:
df_framed = get_frames(scaled_X,frame_size,hop_size)
df_framed

Unnamed: 0,name,mean_LA(x),mean_LA(y),mean_LA(z),mean_GY(x),mean_GY(y),mean_GY(z),sd_LA(x),sd_LA(y),sd_LA(z),sd_GY(x),sd_GY(y),sd_GY(z)
0,0.0,-0.123772,-0.022685,0.090589,-0.013605,-0.113176,-0.065143,1.054545,0.756838,1.043157,1.011858,0.972352,0.911135
1,0.0,-0.095532,-0.084408,0.110001,0.009763,-0.167659,-0.054797,1.154537,0.841931,1.205164,1.103680,1.054478,0.984836
2,0.0,-0.072047,-0.058672,0.215849,0.026760,-0.423716,-0.076474,1.124866,0.787374,1.209913,1.059965,1.149488,0.948850
3,0.0,-0.029702,-0.083121,0.178993,-0.028142,-0.335948,-0.113417,1.170933,0.862652,1.227368,1.098756,1.186656,0.991700
4,0.0,-0.033854,-0.062234,0.156797,-0.018488,-0.279757,-0.053662,1.119523,0.866007,1.199704,1.101729,1.128499,0.964023
...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,30.0,-0.073557,0.105486,-0.113689,-0.250535,-0.352924,-0.095886,0.764740,0.796635,0.693533,0.854566,0.884741,0.692349
260,30.0,-0.044901,0.118608,-0.093778,-0.262166,-0.380931,-0.093657,0.723956,0.780506,0.686542,0.852095,0.867233,0.649154
261,30.0,-0.070672,0.121726,-0.120054,-0.146014,-0.179149,-0.004259,0.729235,0.808793,0.673689,0.872510,0.771902,0.636169
262,30.0,-0.065175,0.100350,-0.108569,-0.236716,-0.348628,-0.073140,0.679243,0.731258,0.633527,0.775940,0.834792,0.618006


In [12]:
X = df_framed.drop('name', axis=1)  # Features (all columns except the target)
y = df_framed['name']  # Labels

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0, stratify = y)
X_train.shape, X_test.shape

((184, 12), (80, 12))

In [14]:
occurrences = y_test.value_counts()
print(occurrences.mean())

2.5806451612903225


In [15]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)
#dval = xgb.DMatrix(X_validation, label=y_validation)
dtrain, dtest #, dval

(<xgboost.core.DMatrix at 0x190d10b7520>,
 <xgboost.core.DMatrix at 0x190d10b6f20>)

In [16]:
params = {
    'max_depth': 7,
    'learning_rate': 0.3,
    'objective': 'multi:softmax',
    'num_class': 50
}
epochs = 25
evals_result = {}  # Initialize an empty dictionary to store the evaluation results

In [17]:
model = xgb.train(params, dtrain, epochs,  verbose_eval=1)

In [18]:
model = xgb.train(params, dtrain, epochs, evals=[(dtrain, 'train')],
                  verbose_eval=1)

[0]	train-mlogloss:3.14359
[1]	train-mlogloss:2.45240
[2]	train-mlogloss:1.82864
[3]	train-mlogloss:1.39217
[4]	train-mlogloss:1.07540
[5]	train-mlogloss:0.85327
[6]	train-mlogloss:0.68155
[7]	train-mlogloss:0.55676
[8]	train-mlogloss:0.46322
[9]	train-mlogloss:0.39208
[10]	train-mlogloss:0.33351
[11]	train-mlogloss:0.28760
[12]	train-mlogloss:0.25081
[13]	train-mlogloss:0.22104
[14]	train-mlogloss:0.19996
[15]	train-mlogloss:0.18417
[16]	train-mlogloss:0.17264
[17]	train-mlogloss:0.16286
[18]	train-mlogloss:0.15455
[19]	train-mlogloss:0.14765
[20]	train-mlogloss:0.14183
[21]	train-mlogloss:0.13688
[22]	train-mlogloss:0.13289
[23]	train-mlogloss:0.12936
[24]	train-mlogloss:0.12663


In [21]:
predictions = model.predict(dtest)
accuracy_score(y_test, predictions)

0.8375