In [115]:
import pandas as pd
# Load combined consistent data from both students (same Arduino orientation)
df = pd.read_csv('master_training_data_with_gyro.csv')
df.head()

Unnamed: 0,meanAx,sdAx,rangeAx,meanAy,sdAy,rangeAy,meanAz,sdAz,rangeAz,meanGx,sdGx,rangeGx,meanGy,sdGy,rangeGy,meanGz,sdGz,rangeGz,label,studentId
0,0.8922,0.2107,0.8303,0.0381,0.029,0.1162,-0.0347,0.1504,0.6028,-17.6717,23.217,62.1338,-204.5369,232.9069,626.4648,4.6834,5.3712,15.0757,Right,11611553
1,0.9239,0.1547,0.6499,0.0589,0.0322,0.1082,0.0484,0.1363,0.6276,-9.0251,16.646,49.0112,-178.4058,217.445,614.9902,1.1719,2.6221,8.5449,Right,11611553
2,0.9301,0.1498,0.6594,0.047,0.0317,0.1195,-0.0177,0.1142,0.4865,-10.328,13.4871,38.8184,-163.7486,217.7073,632.019,2.1232,2.5445,7.2632,Right,11611553
3,0.919,0.197,0.8376,0.0662,0.0368,0.1199,0.0313,0.1533,0.6186,-18.339,22.2053,68.2373,-192.4316,253.4296,700.5005,1.1149,1.9271,6.4087,Right,11611553
4,0.9271,0.1419,0.6176,0.0133,0.0191,0.0684,-0.0693,0.0973,0.4619,-9.7087,18.8354,55.2368,-176.8758,231.486,690.2466,1.6683,4.5471,15.625,Right,11611553


In [127]:
# Find labels that are not in the expected set
expected_labels = {'right', 'left', 'up', 'down', 'push'}
unexpected_df = df[~df['label'].isin(expected_labels)]

if not unexpected_df.empty:
    print("!!! UNEXPECTED LABELS FOUND !!!")
    print(unexpected_df)
else:
    print("OK: All labels are valid.")

!!! UNEXPECTED LABELS FOUND !!!
     meanAx    sdAx  rangeAx  meanAy    sdAy  rangeAy  meanAz    sdAz  \
0    0.8922  0.2107   0.8303  0.0381  0.0290   0.1162 -0.0347  0.1504   
1    0.9239  0.1547   0.6499  0.0589  0.0322   0.1082  0.0484  0.1363   
2    0.9301  0.1498   0.6594  0.0470  0.0317   0.1195 -0.0177  0.1142   
3    0.9190  0.1970   0.8376  0.0662  0.0368   0.1199  0.0313  0.1533   
4    0.9271  0.1419   0.6176  0.0133  0.0191   0.0684 -0.0693  0.0973   
..      ...     ...      ...     ...     ...      ...     ...     ...   
364  1.0620  0.3456   1.2217 -0.1482  0.1079   0.3849 -0.1851  0.1561   
365  1.0218  0.3551   1.3723 -0.0022  0.1131   0.3573 -0.2907  0.2169   
366  1.0147  0.2916   1.0347 -0.1770  0.0394   0.1234 -0.3139  0.1987   
367  1.0480  0.2403   0.8508 -0.1025  0.0907   0.2656 -0.3442  0.1884   
368  1.0115  0.3828   1.1308 -0.2236  0.1358   0.4826 -0.3289  0.1929   

     rangeAz   meanGx      sdGx   rangeGx    meanGy      sdGy   rangeGy  \
0     0.6028 -17

In [116]:
df.shape

(369, 20)

In [117]:
# Extract features and labels
feature_columns = ['meanAx', 'sdAx', 'rangeAx', 'meanAy', 'sdAy', 'rangeAy', 
                   'meanAz', 'sdAz', 'rangeAz', 'meanGx', 'sdGx', 'rangeGx', 
                   'meanGy', 'sdGy', 'rangeGy', 'meanGz', 'sdGz', 'rangeGz']

X = df[feature_columns]
y = df['label']

print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Unique labels: {y.unique()}")

Features shape: (369, 18)
Labels shape: (369,)
Unique labels: ['Right' 'left' 'up' 'down' 'push' 'right']


In [118]:
# Check class distribution BEFORE encoding
print("\n=== CLASS DISTRIBUTION ===")
label_counts = df['label'].value_counts()
print(label_counts)
print(f"\nTotal samples: {len(df)}")


=== CLASS DISTRIBUTION ===
label
up       85
left     76
down     75
push     60
right    52
Right    21
Name: count, dtype: int64

Total samples: 369


In [None]:
def encode_label(lbl):
    # Convert label to lowercase to handle inconsistencies like 'Right' vs 'right'
    lbl_lower = str(lbl).lower()
    label_map = {'right': 0, 'left': 1,  'up': 2, 'down': 3, 'push': 4}
    return label_map.get(lbl_lower, -1)

df['label'] = df['label'].apply(encode_label)

# IMPORTANT: Re-extract y AFTER encoding to numeric values
y = df['label']

# Verify that there are no -1 labels
if -1 in y.unique():
    print("ERROR: Found -1 in labels after encoding. Check for unexpected label strings.")
    print(df[df['label'] == -1])
else:
    print(f"OK: Encoded labels are: {y.unique()}")
    
print(f"Label type: {type(y.iloc[0])}")

Encoded labels: [-1  1  2  3  4  0]
Label type: <class 'numpy.int64'>


In [120]:
from sklearn.preprocessing import StandardScaler
# Normalize the data
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [121]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2)

In [122]:
from sklearn.svm import SVC
# Create an SVM classifier with balanced class weights and moderate regularization
# Custom class weights: give less weight to "down" since it's over-confident
class_weights = {0: 1.0, 1: 1.0, 2: 1.0, 3: 0.7, 4: 1.0}  # Down gets 0.7x weight
svm_model = SVC(kernel='rbf', class_weight=class_weights, C=0.8, gamma='scale', 
                probability=True, decision_function_shape='ovo')
# Train the SVM model
svm_model.fit(X_train, y_train)

0,1,2
,C,0.8
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,"{0: 1.0, 1: 1.0, 2: 1.0, 3: 0.7, ...}"


In [123]:
from sklearn.metrics import classification_report, accuracy_score
y_pred = svm_model.predict(X_test)
# Evaluate the model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8378378378378378
Classification Report:
               precision    recall  f1-score   support

          -1       0.33      1.00      0.50         1
           0       0.71      1.00      0.83        10
           1       0.86      0.86      0.86        21
           2       0.93      0.82      0.88        17
           3       1.00      0.54      0.70        13
           4       0.86      1.00      0.92        12

    accuracy                           0.84        74
   macro avg       0.78      0.87      0.78        74
weighted avg       0.87      0.84      0.84        74



In [124]:
import pickle
with open('svm_model.pkl', 'wb') as file:
    pickle.dump(svm_model, file)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [125]:
# Verify model predictions are numeric
print("\n=== VALIDATION ===")
test_pred = svm_model.predict(X_test[:5])
print(f"Sample predictions: {test_pred}")
print(f"Prediction types: {[type(p).__name__ for p in test_pred]}")
print(f"Expected: numeric 0-4, Got: {test_pred[0]} (type: {type(test_pred[0]).__name__})")

if isinstance(test_pred[0], str):
    print("ERROR: Model is predicting strings! Need to retrain with numeric labels.")
else:
    print("OK: Model predicts numeric values")


=== VALIDATION ===
Sample predictions: [2 4 1 1 1]
Prediction types: ['int64', 'int64', 'int64', 'int64', 'int64']
Expected: numeric 0-4, Got: 2 (type: int64)
OK: Model predicts numeric values


In [126]:
# Analyze decision confidence for each class
print("\n=== DECISION FUNCTION ANALYSIS ===")
decision_scores = svm_model.decision_function(X_test)
probabilities = svm_model.predict_proba(X_test)

label_names = {0: 'right', 1: 'left', 2: 'up', 3: 'down', 4: 'push'}

for i in range(min(5, len(X_test))):
    actual = label_names[y_test.iloc[i]]
    predicted = label_names[svm_model.predict(X_test[i:i+1])[0]]
    confidence = max(probabilities[i]) * 100
    print(f"\nSample {i}: Actual={actual}, Predicted={predicted}, Confidence={confidence:.1f}%")
    print(f"  Probabilities: {dict(zip([label_names[j] for j in range(5)], probabilities[i]))}")


=== DECISION FUNCTION ANALYSIS ===

Sample 0: Actual=left, Predicted=up, Confidence=44.5%
  Probabilities: {'right': np.float64(0.02880252573172149), 'left': np.float64(0.007978501663982923), 'up': np.float64(0.3552534117430015), 'down': np.float64(0.4445120668097405), 'push': np.float64(0.04141640904592843)}

Sample 1: Actual=push, Predicted=push, Confidence=50.6%
  Probabilities: {'right': np.float64(0.009313348930160557), 'left': np.float64(0.11345709650113904), 'up': np.float64(0.07535099076597478), 'down': np.float64(0.18333665823431097), 'push': np.float64(0.11260386370894221)}

Sample 2: Actual=left, Predicted=left, Confidence=47.0%
  Probabilities: {'right': np.float64(0.008409801476375796), 'left': np.float64(0.010852296738876646), 'up': np.float64(0.46956860194560074), 'down': np.float64(0.02111812949593056), 'push': np.float64(0.07446793620795995)}

Sample 3: Actual=down, Predicted=left, Confidence=48.4%
  Probabilities: {'right': np.float64(0.006273537167594801), 'left': n