# Process Data, Train, Test, and Evaluate Model

## Setup

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,  classification_report
import pickle
import os

In [6]:
# global variables
seed = 83 # last 2 digits of student number
training_size = 0.8
testing_size = 1 - training_size

## Data processing

In [7]:
# Load all the data
DATA_PATH = "raw_data"
acceleration_x = pd.read_csv(os.path.join(DATA_PATH, "acceleration_x.csv"))
acceleration_y = pd.read_csv(os.path.join(DATA_PATH, "acceleration_y.csv"))
acceleration_z = pd.read_csv(os.path.join(DATA_PATH, "acceleration_z.csv"))
gyroscope_x = pd.read_csv(os.path.join(DATA_PATH, "gyroscope_x.csv"))
gyroscope_y = pd.read_csv(os.path.join(DATA_PATH, "gyroscope_y.csv"))
gyroscope_z = pd.read_csv(os.path.join(DATA_PATH, "gyroscope_z.csv"))
labels = pd.read_csv(os.path.join(DATA_PATH, "labels.csv"))

# get the mean of each data point
df = pd.DataFrame({
    "ax": acceleration_x.mean(axis=1),
    "ay": acceleration_y.mean(axis=1),
    "az": acceleration_z.mean(axis=1),
    "gx": gyroscope_x.mean(axis=1),
    "gy": gyroscope_y.mean(axis=1),
    "gz": gyroscope_z.mean(axis=1),
    "label": labels.iloc[:, 0]  # first column of labels
})

print(df)  # preview first 5 rows

           ax        ay        az        gx        gy        gz  label
0    0.993808 -0.097925  0.118973  0.040237  0.035752  0.004211      0
1    0.988688 -0.109838  0.136743  0.816688  0.007225  0.168807      0
2    1.001085 -0.055752  0.089284 -0.110351  0.044556 -0.029301      0
3    0.989726 -0.084137  0.099642 -0.198425  0.027685 -0.084684      0
4    0.995308 -0.050707  0.118655 -0.134315  0.041383 -0.045558      0
..        ...       ...       ...       ...       ...       ...    ...
251  1.022338  0.005755  0.037175  2.002824  0.141620  0.257420      3
252  1.003883  0.002298  0.087594  1.765348  0.106364  0.286881      3
253  1.006630  0.008186  0.062279  1.715213  0.078407  0.191578      3
254  1.006600  0.058155  0.031343  1.790672  0.119087  0.160586      3
255  1.004353  0.046171  0.025106  1.858132  0.119590  0.116498      3

[256 rows x 7 columns]


In [8]:
# split data
X = df.drop(columns=["label"])
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=testing_size, random_state=seed, stratify=y
)

# Save X_train
pd.DataFrame(X_train).to_csv("X_train.csv", index=False)

# Save X_test
pd.DataFrame(X_test).to_csv("X_test.csv", index=False)

# Save y_train
pd.DataFrame(y_train, columns=["label"]).to_csv("y_train.csv", index=False)

# Save y_test
pd.DataFrame(y_test, columns=["label"]).to_csv("y_test.csv", index=False)

In [9]:
# scale data

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(X_train.shape)
print(X_test.shape)

(204, 6)
(52, 6)


## Train Model

In [13]:
svm_classifier = SVC(kernel='linear', C=1, random_state=seed)
svm_classifier.fit(X_train_scaled, y_train)

# Save the model
with open("svm_model.pkl", "wb") as f:
    pickle.dump(svm_classifier, f)

## Test Model

In [14]:
def display_results(ground_truth, prediction, title):
    print(f"========== {title} ==========")
    print(f"Accuracy: {accuracy_score(ground_truth, prediction):.2f}\n")
    print(classification_report(ground_truth, prediction))

    cm = confusion_matrix(ground_truth, prediction)
    print("Confusion Matrix:")
    print(cm, "\n")

y_pred_train = svm_classifier.predict(X_train_scaled)
y_pred_test = svm_classifier.predict(X_test_scaled)

display_results(y_train, y_pred_train, "Training")
display_results(y_test, y_pred_test, "Testing")

Accuracy: 1.00

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        49
           1       1.00      1.00      1.00        50
           2       1.00      1.00      1.00        54
           3       1.00      1.00      1.00        51

    accuracy                           1.00       204
   macro avg       1.00      1.00      1.00       204
weighted avg       1.00      1.00      1.00       204

Confusion Matrix:
[[49  0  0  0]
 [ 0 50  0  0]
 [ 0  0 54  0]
 [ 0  0  0 51]] 

Accuracy: 1.00

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        14
           3       1.00      1.00      1.00        13

    accuracy                           1.00        52
   macro avg       1.00      1.00      1.00        52
weighted avg       1.00      1.00      1.00        52

Confusion Matrix: