In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
import seaborn as sns # for plotting.
import os
import math

In [2]:
filepath = "D:/online dataset csv files preprocessing/merged_u1-50_w001_resampled.csv"
df_original = pd.read_csv(filepath)

In [3]:
df_original

Unnamed: 0,timestamp,LAx,LAy,LAz,GYx,GYy,GYz,name
0,924313410000000,1.242813,-0.160181,-0.900440,0.194931,-0.004242,0.163818,1
1,924313420000000,1.117246,-0.187921,-1.529893,0.236458,0.076378,0.133293,1
2,924313430000000,0.912846,-0.192566,-1.570929,0.226074,0.012253,0.108246,1
3,924313440000000,0.874858,-0.227208,-1.453511,0.139336,-0.050667,0.085030,1
4,924313450000000,0.834022,-0.282976,-1.292671,-0.013374,-0.109314,0.078308,1
...,...,...,...,...,...,...,...,...
2055043,1368327120000000,-9.502644,-7.178468,4.246279,-2.834417,-0.120428,-0.987857,50
2055044,1368327130000000,-9.443804,-7.698220,3.059675,-2.638940,0.561997,-0.947715,50
2055045,1368327140000000,-8.345459,-6.805815,1.853457,-2.483606,1.225222,-0.954696,50
2055046,1368327150000000,-7.580540,-5.629017,1.559257,-2.429500,1.769765,-1.003565,50


In [4]:
import scipy.stats as stats

Fs = 100
frame_size = Fs*6 
hop_size = Fs*6

In [5]:
df_framed = pd.DataFrame(columns=['name','mean_LA(x)', 'mean_LA(y)', 'mean_LA(z)', 'mean_GY(x)', 'mean_GY(y)', 'mean_GY(z)','sd_LA(x)', 'sd_LA(y)', 'sd_LA(z)', 'sd_GY(x)', 'sd_GY(y)', 'sd_GY(z)'])
def get_frames(df, frame_size, hop_size):
    
    last_value = df['name'].iloc[-1]
    for j in range (1,last_value+1): 
        filtered_df = df[df['name'] == j]
        filtered_df = filtered_df.drop('name', axis = 1)
        for i in range(0, len(filtered_df) - frame_size, hop_size):
            frame = filtered_df[i: i + frame_size]
            
            new_row = [j
                       ,frame['LAx'].mean(),frame['LAy'].mean(),frame['LAz'].mean()
                       ,frame['GYx'].mean(),frame['GYy'].mean(),frame['GYz'].mean()
                       ,frame['LAx'].std(),frame['LAy'].std(),frame['LAz'].std() 
                       ,frame['GYx'].std(),frame['GYy'].std(),frame['GYz'].std()]


            
            df_framed.loc[len(df_framed)] = new_row
            

    return df_framed

In [6]:
df_framed = get_frames(df_original,frame_size,hop_size)
df_framed

Unnamed: 0,name,mean_LA(x),mean_LA(y),mean_LA(z),mean_GY(x),mean_GY(y),mean_GY(z),sd_LA(x),sd_LA(y),sd_LA(z),sd_GY(x),sd_GY(y),sd_GY(z)
0,1.0,-0.393514,-0.231500,0.259489,-0.136301,0.808596,0.365505,1.607389,2.060070,1.568203,0.718605,1.529494,0.896005
1,1.0,0.977162,0.184893,0.420916,0.122411,0.074673,-0.052158,3.646011,3.797331,5.745886,1.701895,2.413996,0.913987
2,1.0,0.868165,-0.320024,-0.093038,0.056006,0.094478,-0.247336,5.704045,3.265884,7.824760,2.248331,1.981881,0.899200
3,1.0,0.651436,-0.042524,0.129633,-0.075404,0.128380,-0.227428,5.328452,3.304334,7.017526,2.167866,1.981418,0.940454
4,1.0,1.084959,-0.319923,-0.195518,0.028891,0.026057,-0.323677,6.076534,3.319325,7.810847,2.432790,2.110462,1.009521
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3386,50.0,-1.020807,-1.171016,1.729468,-0.003498,0.020327,-0.145425,3.422165,3.917514,4.235285,1.613613,1.298959,0.921226
3387,50.0,-1.184758,-1.406637,1.470867,0.128532,0.110669,-0.030852,3.626671,3.994308,4.448399,1.530076,1.284954,0.988766
3388,50.0,-0.831645,-0.959262,1.536584,-0.042937,-0.211504,-0.157940,3.738687,4.264228,4.597193,1.632034,1.387187,1.007491
3389,50.0,-0.732001,-0.887330,1.679462,0.130207,0.025768,-0.080390,3.218597,3.704232,4.181007,1.579482,1.185527,0.788691


In [7]:
occurrences = df_framed['name'].value_counts()
print(occurrences.mean())

67.82


In [8]:
X = df_framed.drop('name', axis=1)  # Features (all columns except the target)
y = df_framed['name']  # Labels

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0, stratify = y)

In [10]:
X_test

Unnamed: 0,mean_LA(x),mean_LA(y),mean_LA(z),mean_GY(x),mean_GY(y),mean_GY(z),sd_LA(x),sd_LA(y),sd_LA(z),sd_GY(x),sd_GY(y),sd_GY(z)
2791,0.032808,0.084429,0.492218,0.014154,0.109416,-0.088328,2.282674,3.954795,2.760941,1.621210,1.371114,0.392743
1344,0.436311,0.408686,0.150534,-0.027129,-0.010014,-0.045379,4.374253,3.803833,2.732415,1.188413,0.959498,1.233315
1685,0.072158,0.218204,-0.040309,0.020882,0.041122,-0.076128,2.618717,3.863606,4.688277,2.077339,1.864611,0.459489
2365,-0.069151,0.306956,-0.468544,0.024394,-0.116034,0.063477,2.792696,4.727350,3.048325,1.407140,0.983063,0.970491
2934,-0.463939,1.018977,0.576783,-0.012898,-0.053009,-0.162819,5.116019,5.393772,6.898083,2.513474,2.317359,0.745154
...,...,...,...,...,...,...,...,...,...,...,...,...
1795,-0.733358,0.324924,0.335402,0.017717,0.014639,-0.039271,2.459946,4.011541,3.402639,1.733694,1.789364,0.713528
2315,0.054131,0.239604,-0.585784,-0.088441,0.069896,0.092954,2.455233,3.245223,2.944788,1.268048,1.443167,0.507223
1023,0.082773,-0.496494,0.778392,0.039468,-0.013466,-0.056825,1.995192,2.658237,1.673530,0.660690,0.680777,0.430462
885,2.141197,-3.293617,-2.856944,2.646694,-1.440498,1.308344,0.064612,0.008232,0.053304,0.004747,0.007586,0.004809


In [11]:
occurrences = y_test.value_counts()
print(occurrences.mean())

20.36


In [12]:
X_train.shape, X_test.shape

((2373, 12), (1018, 12))

In [13]:
y_train.shape, y_test.shape

((2373,), (1018,))

In [17]:
#Create a svm Classifier
model = svm.SVC(C = 4, # reg paramater
                kernel='linear', #kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, default=’rbf’
               ) # Linear Kernel

#Train the model using the training sets
model.fit(X_train, y_train)


from sklearn import metrics
y_pred = model.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.899803536345776


In [15]:
# df_framed.to_csv('MEAN_SD_FRAMED_RESAMPLED_merged_u1-50_w001_resampled.csv', index = False)