# IMU Surface Classification - Model - Removed Redundant Features

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

In [3]:
file_path = "../data/Merged_Data_Selected_Features_numeric.csv"
df = pd.read_csv(file_path)

In [4]:
# Define features and target
target_columns = ["CONDITION_BnkL", "CONDITION_BnkR", "CONDITION_CS", 
                  "CONDITION_FE", "CONDITION_GR", "CONDITION_SlpD", "CONDITION_SlpU"]
features = df.drop(columns=target_columns)  
target = df[target_columns]

In [5]:
# Display first few rows of features and target
print("Features")
display(features.head())  

print("Target")
display(target.head())  

Features


Unnamed: 0,shankR_Acc_X_SFreq,shankR_Acc_X_SRegVT,shankR_Acc_X_SRegML,shankR_Acc_X_SRegAP,shankR_Acc_X_SRegRV,shankR_Acc_X_StrRegVT,shankR_Acc_X_StrRegML,shankR_Acc_X_StrRegAP,shankR_Acc_X_SymVT,shankR_Acc_X_SymML,...,trunk_Acc_X_corrFRO,trunk_Acc_X_corrHOR,trunk_Acc_X_rmsSAG,trunk_Acc_X_rmsFRO,trunk_Acc_X_rmsHOR,trunk_Acc_X_rmsMLRV,trunk_Acc_X_SPARCVT,trunk_Acc_X_SPARCML,trunk_Acc_X_SPARCAP,trunk_Acc_X_SPARCRV
0,122.44898,0.285569,0.495707,-0.160078,0.21397,0.945711,0.985408,1.029803,107.228541,66.125975,...,0.043111,0.341648,10.599529,10.415038,2.403405,0.091554,-3.071642,-10.458117,-6.866548,-3.057941
1,120.0,0.239314,0.432572,-0.099891,0.203387,0.976182,0.983757,0.982618,121.245552,77.832868,...,0.08668,0.196986,10.483364,10.28797,2.388058,0.086168,-2.96753,-11.177151,-6.492172,-3.076911
2,125.0,0.27634,0.488866,-0.143244,0.232202,0.97381,0.953578,0.970531,111.581767,64.434003,...,0.06076,0.231924,10.562399,10.361816,2.454786,0.090167,-3.142571,-9.916257,-6.325247,-3.113634
3,125.0,0.281268,0.475575,-0.145298,0.244913,0.95791,0.938596,0.93329,109.20812,65.482971,...,0.030185,0.24093,10.545568,10.322694,2.526696,0.087933,-3.178559,-11.679509,-6.218074,-3.150904
4,122.44898,0.27244,0.515308,-0.127347,0.246637,0.97296,0.96648,1.003304,112.497208,60.895558,...,-0.00932,0.23793,10.532983,10.306601,2.511757,0.084381,-3.157578,-11.490847,-5.925662,-3.134865


Target


Unnamed: 0,CONDITION_BnkL,CONDITION_BnkR,CONDITION_CS,CONDITION_FE,CONDITION_GR,CONDITION_SlpD,CONDITION_SlpU
0,1,0,0,0,0,0,0
1,1,0,0,0,0,0,0
2,1,0,0,0,0,0,0
3,1,0,0,0,0,0,0
4,1,0,0,0,0,0,0


In [6]:
# Train-Test Split (80% train, 20% test)
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [7]:
# Normalize Features
scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)

### 1. Random Forest

In [8]:
# Training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(features_train_scaled, target_train)

In [9]:
# Prediction
target_predict = model.predict(features_test_scaled)

In [10]:
# Convert Probabilities to Binary
if target_predict.shape[1] == len(target_columns):  
    target_predict = (target_predict == target_predict.max(axis=1, keepdims=True)).astype(int)  

In [11]:
# Evaluation of Model

# Accuracy
accuracy = accuracy_score(target_test, target_predict)

# Precision
precision = precision_score(target_test, target_predict, average="macro")

# Recall
recall = recall_score(target_test, target_predict, average="macro")

# F1-Score 
f1 = f1_score(target_test, target_predict, average="macro")


# Print Metrics
print(f" Accuracy: {accuracy:.4f}")
print(f" Precision: {precision:.4f}")
print(f" Recall: {recall:.4f}")
print(f" F1 Score: {f1:.4f}")

 Accuracy: 0.7895
 Precision: 0.4349
 Recall: 0.9912
 F1 Score: 0.6015


### 2. KNN

In [12]:
# Training
knn = KNeighborsClassifier(n_neighbors=5)  # You can tune 'n_neighbors' for better performance
knn.fit(features_train_scaled, target_train)

In [13]:
# Prediction
target_predict = knn.predict(features_test_scaled)

In [14]:
# Evaluation

# Accuracy
accuracy = accuracy_score(target_test, target_predict)

# Precision
precision = precision_score(target_test, target_predict, average="macro")

# Recall
recall = recall_score(target_test, target_predict, average="macro")

# F1-Score 
f1 = f1_score(target_test, target_predict, average="macro")


# Print Metrics
print(f" Accuracy: {accuracy:.4f}")
print(f" Precision: {precision:.4f}")
print(f" Recall: {recall:.4f}")
print(f" F1 Score: {f1:.4f}")

 Accuracy: 0.9394
 Precision: 0.9606
 Recall: 0.9224
 F1 Score: 0.9404


### 3. Random Forest Hyperparameter Tuning

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],  # Number of trees
    'max_depth': [None, 10, 20],     # Tree depth 
    'min_samples_split': [2, 5, 10],  # Min samples to split a node
    'min_samples_leaf': [1, 2, 4],   # Min samples per leaf
    'bootstrap': [True, False]       # Whether to use bootstrapping
}

rf = RandomForestClassifier(random_state=42)

# Grid Search with 3-fold cross-validation
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, 
                           cv=3, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(features_train, target_train)

# Best parameters and model
print("Best Hyperparameters:", grid_search.best_params_)
best_rf = grid_search.best_estimator_

# Evaluate the tuned model
target_predict = best_rf.predict(features_test)

accuracy = accuracy_score(target_test, target_predict)
precision = precision_score(target_test, target_predict, average='macro')
recall = recall_score(target_test, target_predict, average='macro')
f1 = f1_score(target_test, target_predict, average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")


Fitting 3 folds for each of 162 candidates, totalling 486 fits
Best Hyperparameters: {'bootstrap': False, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Accuracy: 0.8718
Precision: 0.9903
Recall: 0.8363
F1-score: 0.9035
