# Importing libraires

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1 - Data processing

In [7]:
#Concatenating the data
file_paths = [f'ampc/w{i}.csv' for i in range(1, 5)]

data = pd.concat([pd.read_csv(f) for f in file_paths], ignore_index=True)

data.to_csv('./combined_data.csv', index = False)

data.head()

Unnamed: 0,acc_mean_x_right,acc_mean_y_right,acc_mean_z_right,acc_mean_xyz_right,acc_mean_xy_right,acc_mean_yz_right,acc_mean_zx_right,acc_mean_pitch_right,acc_mean_roll_right,acc_std_x_right,...,gyro_max_yz_left,gyro_max_zx_left,gyro_peak_x_left,gyro_peak_y_left,gyro_peak_z_left,gyro_peak_xyz_left,gyro_peak_xy_left,gyro_peak_yz_left,gyro_peak_zx_left,class
0,-0.1733,0.14864,0.98128,1.1065,0.44735,1.0611,1.0332,9.9751,52.281,0.25398,...,137.85,79.286,4,4,3,2,2,2,4,2
1,-0.40618,0.24715,0.79471,1.0178,0.52388,0.86595,0.96693,-30.421,48.213,0.26456,...,269.08,103.56,3,1,2,2,2,2,1,2
2,-0.4967,0.37167,0.70283,1.0402,0.68213,0.80958,0.9651,-23.068,52.897,0.35638,...,158.42,114.7,2,3,2,1,1,2,2,2
3,-0.2878,0.15882,0.91688,1.0974,0.50834,1.0276,0.99884,3.2451,31.009,0.29577,...,283.65,120.46,3,2,2,3,4,3,2,2
4,-0.56189,0.36946,0.68668,1.3085,0.91759,1.0218,1.1201,-24.118,47.579,0.5681,...,199.69,93.039,4,2,2,3,3,3,2,2


In [8]:
shuffled_data = data.sample(n=len(data)).reset_index(drop=True)

shuffled_data.to_csv('./all_data.csv', index = False)

shuffled_data.head()

Unnamed: 0,acc_mean_x_right,acc_mean_y_right,acc_mean_z_right,acc_mean_xyz_right,acc_mean_xy_right,acc_mean_yz_right,acc_mean_zx_right,acc_mean_pitch_right,acc_mean_roll_right,acc_std_x_right,...,gyro_max_yz_left,gyro_max_zx_left,gyro_peak_x_left,gyro_peak_y_left,gyro_peak_z_left,gyro_peak_xyz_left,gyro_peak_xy_left,gyro_peak_yz_left,gyro_peak_zx_left,class
0,0.64099,0.45412,0.7196,1.1777,0.90859,0.92558,1.0034,64.441,45.162,0.3028,...,230.41,135.68,2,2,4,3,2,3,3,2
1,-0.45311,0.38586,0.74528,1.1791,0.86289,0.9277,1.0244,-15.722,50.772,0.51034,...,324.06,119.33,3,3,4,2,3,2,2,2
2,-0.90543,0.4462,-0.23823,1.0684,1.0136,0.5623,0.9646,-63.02,30.88,0.093662,...,59.732,25.927,4,2,1,1,3,1,1,2
3,-0.080558,-0.82512,0.62456,1.2407,0.99363,1.1162,0.84994,6.9487,-51.026,0.52819,...,200.35,196.51,5,5,4,6,6,5,5,2
4,-0.64983,-0.85104,-0.052758,1.1677,1.117,0.92099,0.74269,-36.07,-46.924,0.32935,...,196.33,197.02,3,2,2,3,5,2,2,2


# 2 - Model Training

### Split features and target variable

In [9]:
from sklearn.model_selection import train_test_split

# Split features and target vairable
X = shuffled_data.drop('class', axis=1)
y = shuffled_data['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) 


### Initialize and train model without parameter tuning

In [10]:
from sklearn import svm
from sklearn.metrics import accuracy_score

clf = svm.SVC() # Linear Kernel
clf.fit(X_train, y_train) # Training the model
y_pred = clf.predict(X_test) # Predicting the model
accuracy_score = accuracy_score(y_test, y_pred) # Calculating the accuracy of the model

f"Accuracy of the model: {accuracy_score}"

'Accuracy of the model: 0.8950988822012038'

### 10 fold cross validation 

In [11]:
from sklearn.model_selection import cross_val_score
from sklearn import svm

clf = svm.SVC()
scores = cross_val_score(clf, X, y, cv = 10) # 10-fold cross validation

scores

array([0.88907997, 0.8822012 , 0.90455718, 0.88736028, 0.88993981,
       0.89595873, 0.89853826, 0.88048151, 0.88736028, 0.90017212])

# 3 - Hyperparameter Tuning

In [12]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
 # Radial Basis Function kernel
param_grid = {
    'C' : [0.1, 1, 10, 100, 1000],
    'gamma' : [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel' : ['rbf']
}

grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)

grid.fit(X_train, y_train)

grid.best_params_

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.834 total time=   9.5s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.834 total time=   9.5s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.834 total time=   9.6s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.834 total time=   9.5s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.834 total time=   9.7s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.834 total time=   9.4s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.834 total time=   9.6s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.834 total time=   9.6s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.834 total time=  11.3s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.834 total time=  10.4s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.834 total time=  19.1s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf

{'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}

### Re-run prediction and check new model accuracy

In [23]:
grid_prediction = grid.predict(X_test)

# Evaluating the model that uses the best parameters
new_accuracy_score = accuracy_score(y_test, grid_prediction) # Calculating the accuracy of the model

f"Accuracy of the model using the best parameter: {new_accuracy_score}"


# 10-fold cross validation
scores = cross_val_score(grid, X, y, cv = 10) 


TypeError: 'numpy.float64' object is not callable

# 4 - Feature Selection

In [None]:
from sklearn.feature_selection import VarianceThreshold