# Importing libraires

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1 - Data processing

### Concatnating the datasets

In [3]:
#Concatenating the data
file_paths = [f'ampc/w{i}.csv' for i in range(1, 5)]

data = pd.concat([pd.read_csv(f) for f in file_paths], ignore_index=True)

data.to_csv('./combined_data.csv', index = False)

data.head()

Unnamed: 0,acc_mean_x_right,acc_mean_y_right,acc_mean_z_right,acc_mean_xyz_right,acc_mean_xy_right,acc_mean_yz_right,acc_mean_zx_right,acc_mean_pitch_right,acc_mean_roll_right,acc_std_x_right,...,gyro_max_yz_left,gyro_max_zx_left,gyro_peak_x_left,gyro_peak_y_left,gyro_peak_z_left,gyro_peak_xyz_left,gyro_peak_xy_left,gyro_peak_yz_left,gyro_peak_zx_left,class
0,-0.1733,0.14864,0.98128,1.1065,0.44735,1.0611,1.0332,9.9751,52.281,0.25398,...,137.85,79.286,4,4,3,2,2,2,4,2
1,-0.40618,0.24715,0.79471,1.0178,0.52388,0.86595,0.96693,-30.421,48.213,0.26456,...,269.08,103.56,3,1,2,2,2,2,1,2
2,-0.4967,0.37167,0.70283,1.0402,0.68213,0.80958,0.9651,-23.068,52.897,0.35638,...,158.42,114.7,2,3,2,1,1,2,2,2
3,-0.2878,0.15882,0.91688,1.0974,0.50834,1.0276,0.99884,3.2451,31.009,0.29577,...,283.65,120.46,3,2,2,3,4,3,2,2
4,-0.56189,0.36946,0.68668,1.3085,0.91759,1.0218,1.1201,-24.118,47.579,0.5681,...,199.69,93.039,4,2,2,3,3,3,2,2


### Shuffle data

In [4]:
shuffled_data = data.sample(n=len(data)).reset_index(drop=True)

shuffled_data.to_csv('./all_data.csv', index = False)

shuffled_data.head()

Unnamed: 0,acc_mean_x_right,acc_mean_y_right,acc_mean_z_right,acc_mean_xyz_right,acc_mean_xy_right,acc_mean_yz_right,acc_mean_zx_right,acc_mean_pitch_right,acc_mean_roll_right,acc_std_x_right,...,gyro_max_yz_left,gyro_max_zx_left,gyro_peak_x_left,gyro_peak_y_left,gyro_peak_z_left,gyro_peak_xyz_left,gyro_peak_xy_left,gyro_peak_yz_left,gyro_peak_zx_left,class
0,-0.37386,-0.53822,0.69911,1.1384,0.75576,1.0091,0.89655,-4.3005,-19.866,0.3152,...,318.17,244.34,4,4,5,6,5,5,7,2
1,-0.63051,-0.76417,-0.00069,0.99614,0.99096,0.77094,0.63835,-39.16,-50.225,0.025883,...,40.393,12.228,4,7,3,5,5,5,5,1
2,-0.85584,-0.25832,-0.43962,1.0023,0.89682,0.51594,0.96573,-72.284,-5.5858,0.093458,...,12.134,5.7711,7,8,4,6,6,6,6,2
3,-0.84629,-0.48096,0.15484,0.98711,0.97428,0.50697,0.86094,-60.025,-28.513,0.030618,...,98.489,73.968,5,5,4,6,6,6,7,2
4,-0.3837,0.27799,0.87787,1.0091,0.4803,0.92572,0.96607,-30.082,56.325,0.11834,...,51.543,20.747,6,4,4,5,5,5,5,2


# 2 - Model Training 

### Split features and target variable

In [5]:
from sklearn.model_selection import train_test_split

# Split features and target vairable
X = shuffled_data.drop('class', axis=1)
y = shuffled_data['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) 


### Train-test split model training 

In [6]:
from sklearn import svm
from sklearn.metrics import accuracy_score

clf = svm.SVC() # Linear Kernel
clf.fit(X_train, y_train) # Training the model
y_pred = clf.predict(X_test) # Predicting the model
accuracy_score = accuracy_score(y_test, y_pred) # Calculating the accuracy of the model

f"Accuracy of the model: {accuracy_score*100:2f}%"

'Accuracy of the model: 89.022643%'

### 10-fold cross validation mean accuracy 

In [8]:
from sklearn import svm
from sklearn.model_selection import cross_val_score

clf = svm.SVC()
scores = cross_val_score(clf, X, y, cv = 10) # 10-fold cross validation

f"{scores.mean()*100:2f}%"

'89.242396%'

# 3 - Hyperparameter Tuning

### Use GridSeachCV to find the best set of values for the SVC model

In [9]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

 # Radial Basis Function kernel
param_grid = {
    'C' : [0.1, 1, 10, 100, 1000],
    'gamma' : [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel' : ['rbf']
}

grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)

grid.fit(X_train, y_train)

grid.best_params_

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.835 total time=   9.7s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.835 total time=   9.6s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.835 total time=   9.8s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.834 total time=  10.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.834 total time=   9.5s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.835 total time=   9.7s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.835 total time=   9.7s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.835 total time=   9.8s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.834 total time=   9.5s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.834 total time=   9.6s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.835 total time=   9.7s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf

{'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}

### Train-test split training with hyperparameter tuning

In [12]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

# best_C = 10
# best_gamma = 0.0001
# best_kernel = rbf

best_C = grid.best_params_['C']
best_gamma = grid.best_params_['gamma']
best_kernel = grid.best_params_['kernel']

svc_hyp = SVC(C = best_C, gamma = best_gamma, kernel = best_kernel)

svc_hyp.fit(X_train, y_train)

y_pred_hyp = svc_hyp.predict(X_test)

accuracy_score_new = accuracy_score(y_test, y_pred_hyp)

print(f"Accuracy of the model after hyperparameter tuning: {accuracy_score_new*100:2f}%")

Accuracy of the model after hyperparameter tuning: 84.293494%


### 10-fold cross validation mean accuracy

In [11]:
# Perform 10-fold cross-validation
cv_scores = cross_val_score(svc_hyp, X, y, cv=10)

# Calculate and print the mean accuracy across all 10 folds
cv_accuracy = cv_scores.mean()

f"10-Fold Cross-Validation Accuracy after hyperparameter tuning: {cv_accuracy * 100:.2f}%"


'10-Fold Cross-Validation Accuracy after hyperparameter tuning: 84.29%'

# 4 - Feature Selection

In [15]:
# Import libraries
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

### Select features and split data based on selected features

In [17]:
# Select top 100 features
selector = SelectKBest(f_classif, k=100)

# Create a pipeline with a feature selector and classifier
X_selected = selector.fit_transform(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

### Train-test split training with feature selection and hyperparameter tuning

In [18]:
svc_hyp_selected = SVC(C = best_C, gamma = best_gamma, kernel = best_kernel)

# Fit/train the model
svc_hyp_selected.fit(X_train, y_train)

# Predict the model
y_pred_hyp_selected = svc_hyp_selected.predict(X_test)

accuracy_score_selected = accuracy_score(y_test, y_pred_hyp_selected)

print(f"Accuracy of the model after feature selection: {accuracy_score_selected*100:.2f}%")

Accuracy of the model after feature selection: 85.38%


# 10-fold cross validation mean accuracy score

In [19]:
cv_scores_selected = cross_val_score(svc_hyp_selected, X_selected, y, cv=10)

cv_accuracy_selected = cv_scores_selected.mean()

f"10-Fold Cross-Validation Accuracy after feature selection: {cv_accuracy_selected * 100:.2f}%"

'10-Fold Cross-Validation Accuracy after feature selection: 85.61%'

# 5 - Dimensionality reduction

In [20]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

### Perform PCA to reduce dimensionality

In [21]:
# initialize PCA to reduce the dimensionality to 10 components
pca = PCA(n_components=10)

# fit PCA on the training data
X_pca = pca.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, random_state=42)


### Train-test split model training with reduced dimensionality

In [24]:
svc_hyp_pca = SVC(C = best_C, gamma = best_gamma, kernel = best_kernel)

# Fit/train the model
svc_hyp_pca.fit(X_train, y_train)

# Predict the model
y_pred_hyp_pca = svc_hyp_pca.predict(X_test)

accuracy_score_pca = accuracy_score(y_test, y_pred_hyp_pca)

print(f"Accuracy of the model after PCA: {accuracy_score_pca*100:.2f}%")


Accuracy of the model after PCA: 84.04%


### 10-fold cross validation mean accuracy

In [23]:
# Perform 10-fold cross-validation
cv_scores_pca = cross_val_score(svc_hyp_pca, X_pca, y, cv=10)

cv_accuracy_pca = cv_scores_pca.mean()

f"10-Fold Cross-Validation Accuracy after PCA: {cv_accuracy_pca * 100:.2f}%"

'10-Fold Cross-Validation Accuracy after PCA: 84.37%'

# 6 - Testing with other classifiers

### Import libraries

In [26]:
import pandas as pd

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

### Load dataset and split data 

In [27]:
df = pd.read_csv('all_data.csv')

X = df.drop('class', axis=1)
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## SGD - Stochastic Gradient Descent 

### Train and test accuracy of SGD model

In [28]:
sgd_model = SGDClassifier(random_state=42)

sgd_model.fit(X_train, y_train)

y_pred = sgd_model.predict(X_test)

accuracy_score_sgd = accuracy_score(y_test, y_pred)

f"Accuracy of the model using Stochastic Gradient Descent: {accuracy_score_sgd*100:.2f}%"

'Accuracy of the model using Stochastic Gradient Descent: 86.70%'

### 10-fold cross validation mean accuracy score

In [29]:
cv_scores_sgd = cross_val_score(sgd_model, X, y, cv=10)

cv_accuracy_sgd = cv_scores_sgd.mean()

f"10-Fold Cross-Validation Accuracy using Stochastic Gradient Descent: {cv_accuracy_sgd * 100:.2f}%"

'10-Fold Cross-Validation Accuracy using Stochastic Gradient Descent: 85.89%'

## Random Forest Classifier

### Train-test split training

In [31]:
rf_model = RandomForestClassifier(random_state = 42)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

accuracy_score_rf = accuracy_score(y_test, y_pred_rf)

f"Accuracy of the model using Random Forest: {accuracy_score_rf*100:.2f}%"

'Accuracy of the model using Random Forest: 92.40%'

### 10-fold cross validation mean accuracy

In [32]:
cv_scores_rf = cross_val_score(rf_model, X, y, cv=10)

cv_accuracy_rf = cv_scores_rf.mean()

f"10-Fold Cross-Validation Accuracy using Random Forest: {cv_accuracy_rf * 100:.2f}%"

'10-Fold Cross-Validation Accuracy using Random Forest: 92.51%'

## MLP Classifier

### Train-test split training

In [33]:
mlp_model = MLPClassifier(random_state = 42)

mlp_model.fit(X_train, y_train)

y_pred_mlp = mlp_model.predict(X_test)

accuracy_score_mlp = accuracy_score(y_test, y_pred_mlp)

f"Accuracy of the model using Multi-Layer Perceptron: {accuracy_score_mlp*100:.2f}%"

'Accuracy of the model using Multi-Layer Perceptron: 89.22%'

### 10-fold cross-validation mean accuracy

In [35]:
cv_scores_mlp = cross_val_score(mlp_model, X, y, cv=10)

cv_accuracy = cv_scores_mlp.mean()

f"10-Fold Cross-Validation Accuracy using Multi-Layer Perceptron: {cv_accuracy * 100:.2f}%"

'10-Fold Cross-Validation Accuracy using Multi-Layer Perceptron: 84.81%'