In [None]:
import pandas as pd
df = pd.read_csv('combined_training_data.csv')
df.head()

Unnamed: 0,meanX,sdX,rangeX,meanY,sdY,rangeY,meanZ,sdZ,rangeZ,meanGx,sdGx,rangeGx,meanGy,sdGy,rangeGy,meanGz,sdGz,rangeGz,label,studentId
0,0.8922,0.2107,0.8303,0.0381,0.029,0.1162,-0.0347,0.1504,0.6028,-17.6717,23.217,62.1338,-204.5369,232.9069,626.4648,4.6834,5.3712,15.0757,Right,11611553
1,0.9239,0.1547,0.6499,0.0589,0.0322,0.1082,0.0484,0.1363,0.6276,-9.0251,16.646,49.0112,-178.4058,217.445,614.9902,1.1719,2.6221,8.5449,Right,11611553
2,0.9301,0.1498,0.6594,0.047,0.0317,0.1195,-0.0177,0.1142,0.4865,-10.328,13.4871,38.8184,-163.7486,217.7073,632.019,2.1232,2.5445,7.2632,Right,11611553
3,0.919,0.197,0.8376,0.0662,0.0368,0.1199,0.0313,0.1533,0.6186,-18.339,22.2053,68.2373,-192.4316,253.4296,700.5005,1.1149,1.9271,6.4087,Right,11611553
4,0.9271,0.1419,0.6176,0.0133,0.0191,0.0684,-0.0693,0.0973,0.4619,-9.7087,18.8354,55.2368,-176.8758,231.486,690.2466,1.6683,4.5471,15.625,Right,11611553


In [11]:
df.shape

(108, 20)

In [None]:
# Extract features and labels
feature_columns = ['meanAx', 'sdAx', 'rangeAx', 'meanAy', 'sdAy', 'rangeAy', 
                   'meanAz', 'sdAz', 'rangeAz', 'meanGx', 'sdGx', 'rangeGx', 
                   'meanGy', 'sdGy', 'rangeGy', 'meanGz', 'sdGz', 'rangeGz']

X = df[feature_columns]
y = df['label']

print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Unique labels: {y.unique()}")

Features shape: (108, 18)
Labels shape: (108,)
Unique labels: ['Right' 'left' 'up' 'down' 'push']


In [39]:
# Check class distribution BEFORE encoding
print("\n=== CLASS DISTRIBUTION ===")
label_counts = df['label'].value_counts()
print(label_counts)
print(f"\nTotal samples: {len(df)}")


=== CLASS DISTRIBUTION ===
label
up       23
down     22
push     22
Right    21
left     20
Name: count, dtype: int64

Total samples: 108


In [None]:
def encode_label(lbl):
    label_map = {'right': 0, 'left': 1,  'up': 2, 'down': 3, 'push':4}
    return label_map.get(lbl, -1)
df['label'] = df['label'].apply(encode_label)

# IMPORTANT: Re-extract y AFTER encoding to numeric values
y = df['label']
print(f"Encoded labels: {y.unique()}")
print(f"Label type: {type(y.iloc[0])}")

Encoded labels: [0 1 2 3 4]
Label type: <class 'numpy.int64'>


In [40]:
from sklearn.preprocessing import StandardScaler
# Normalize the data
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [33]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2)

In [34]:
from sklearn.svm import SVC
# Create an SVM classifier with a polynomial kernel
svm_model = SVC(kernel='rbf')
# Train the SVM model
svm_model.fit(X_train, y_train)

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [35]:
from sklearn.metrics import classification_report, accuracy_score
y_pred = svm_model.predict(X_test)
# Evaluate the model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         6
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         4

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22



In [36]:
import pickle
with open('svm_model.pkl', 'wb') as file:
    pickle.dump(svm_model, file)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [37]:
# Verify model predictions are numeric
print("\n=== VALIDATION ===")
test_pred = svm_model.predict(X_test[:5])
print(f"Sample predictions: {test_pred}")
print(f"Prediction types: {[type(p).__name__ for p in test_pred]}")
print(f"Expected: numeric 0-4, Got: {test_pred[0]} (type: {type(test_pred[0]).__name__})")

if isinstance(test_pred[0], str):
    print("ERROR: Model is predicting strings! Need to retrain with numeric labels.")
else:
    print("OK: Model predicts numeric values")


=== VALIDATION ===
Sample predictions: [3 2 0 2 4]
Prediction types: ['int64', 'int64', 'int64', 'int64', 'int64']
Expected: numeric 0-4, Got: 3 (type: int64)
OK: Model predicts numeric values
