In [None]:
import pandas as pd
# Load combined consistent data from both students (same Arduino orientation)
df = pd.read_csv('training_data.csv')
# Test: Remove 'push' gestures to see if it improves classification for other gestures
df = df[df['label'] != 'push']
df.head()

Unnamed: 0,meanAx,sdAx,rangeAx,meanAy,sdAy,rangeAy,meanAz,sdAz,rangeAz,meanGx,sdGx,rangeGx,meanGy,sdGy,rangeGy,meanGz,sdGz,rangeGz,label,studentId
0,0.2663,0.0685,0.2906,-0.4478,0.1059,0.35,-0.8874,0.0912,-0.3447,23.9705,87.9754,-304.8706,-21.0531,32.5295,-140.4419,-0.9725,42.1156,-139.0381,right,11611553
1,0.9055,0.2807,1.0003,0.0866,0.0597,0.2261,-0.1778,0.1802,-0.6813,35.498,40.3968,-128.6621,-256.9377,172.8009,-573.0591,57.2144,45.3477,-144.2871,right,11611553
2,0.9653,0.1883,0.7652,0.1955,0.0801,0.2907,-0.1621,0.1756,-0.7032,46.1019,41.3384,-130.0659,-213.0168,195.4574,-607.666,54.4922,46.7434,-143.9209,right,11611553
3,0.9683,0.2109,0.8591,0.1727,0.0661,0.2334,-0.1905,0.1608,-0.6064,47.6685,45.8164,-141.4185,-215.7593,235.366,-674.1943,59.5256,59.2903,-173.2788,right,11611553
4,0.9586,0.2532,1.0283,0.2378,0.0682,0.2767,-0.1117,0.14,-0.5524,39.7339,57.958,-178.9551,-232.076,288.3963,-772.3999,75.7039,90.9537,-241.333,right,11611553


In [57]:
# Extract features and labels
feature_columns = ['meanAx', 'sdAx', 'rangeAx', 'meanAy', 'sdAy', 'rangeAy', 
                   'meanAz', 'sdAz', 'rangeAz', 'meanGx', 'sdGx', 'rangeGx', 
                   'meanGy', 'sdGy', 'rangeGy', 'meanGz', 'sdGz', 'rangeGz']

X = df[feature_columns]
y = df['label']

print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Unique labels: {y.unique()}")

Features shape: (169, 18)
Labels shape: (169,)
Unique labels: ['right' 'left' 'up' 'down']


In [58]:
# Check class distribution BEFORE encoding
print("\n=== CLASS DISTRIBUTION ===")
label_counts = df['label'].value_counts()
print(label_counts)
print(f"\nTotal samples: {len(df)}")


=== CLASS DISTRIBUTION ===
label
left     45
down     44
right    41
up       39
Name: count, dtype: int64

Total samples: 169


In [59]:
def encode_label(lbl):
    # Convert label to lowercase to handle inconsistencies like 'Right' vs 'right'
    lbl_lower = str(lbl).lower()
    label_map = {'right': 0, 'left': 1,  'up': 2, 'down': 3, 'push': 4}
    return label_map.get(lbl_lower, -1)

df['label'] = df['label'].apply(encode_label)

# IMPORTANT: Re-extract y AFTER encoding to numeric values
y = df['label']

# Verify that there are no -1 labels
if -1 in y.unique():
    print("ERROR: Found -1 in labels after encoding. Check for unexpected label strings.")
    print(df[df['label'] == -1])
else:
    print(f"OK: Encoded labels are: {y.unique()}")
    
print(f"Label type: {type(y.iloc[0])}")

OK: Encoded labels are: [0 1 2 3]
Label type: <class 'numpy.int64'>


In [60]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Normalization will be done after train-test split to avoid data leakage

In [61]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [62]:
from sklearn.svm import SVC
# Create an SVM classifier with balanced weights to automatically handle class imbalance
svm_model = SVC(kernel='rbf', 
                class_weight='balanced', 
                C=1.0, 
                gamma=0.1, 
                probability=True, 
                decision_function_shape='ovo')
# Train the SVM model
svm_model.fit(X_train, y_train)

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,0.1
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,'balanced'


In [63]:
from sklearn.metrics import classification_report, accuracy_score
y_pred = svm_model.predict(X_test)
# Evaluate the model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9411764705882353
Classification Report:
               precision    recall  f1-score   support

           0       0.78      1.00      0.88         7
           1       1.00      1.00      1.00         4
           2       1.00      0.83      0.91        12
           3       1.00      1.00      1.00        11

    accuracy                           0.94        34
   macro avg       0.94      0.96      0.95        34
weighted avg       0.95      0.94      0.94        34



In [64]:
import pickle
with open('svm_model.pkl', 'wb') as file:
    pickle.dump(svm_model, file)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [65]:
# Verify model predictions are numeric
print("\n=== VALIDATION ===")
test_pred = svm_model.predict(X_test[:5])
print(f"Sample predictions: {test_pred}")
print(f"Prediction types: {[type(p).__name__ for p in test_pred]}")
print(f"Expected: numeric 0-4, Got: {test_pred[0]} (type: {type(test_pred[0]).__name__})")

if isinstance(test_pred[0], str):
    print("ERROR: Model is predicting strings! Need to retrain with numeric labels.")
else:
    print("OK: Model predicts numeric values")


=== VALIDATION ===
Sample predictions: [3 3 0 0 2]
Prediction types: ['int64', 'int64', 'int64', 'int64', 'int64']
Expected: numeric 0-4, Got: 3 (type: int64)
OK: Model predicts numeric values


In [66]:
# Analyze decision confidence for each class
print("\n=== DECISION FUNCTION ANALYSIS ===")
decision_scores = svm_model.decision_function(X_test)
probabilities = svm_model.predict_proba(X_test)

label_names = {0: 'right', 1: 'left', 2: 'up', 3: 'down'}  # Removed 'push' for testing

for i in range(min(5, len(X_test))):
    actual = label_names[y_test.iloc[i]]
    predicted = label_names[svm_model.predict(X_test[i:i+1])[0]]
    confidence = max(probabilities[i]) * 100
    print(f"\nSample {i}: Actual={actual}, Predicted={predicted}, Confidence={confidence:.1f}%")
    print(f"  Probabilities: {dict(zip([label_names[j] for j in range(4)], probabilities[i]))}")


=== DECISION FUNCTION ANALYSIS ===

Sample 0: Actual=down, Predicted=down, Confidence=45.4%
  Probabilities: {'right': np.float64(0.3616711165587824), 'left': np.float64(0.16034907335449886), 'up': np.float64(0.02356728424703034), 'down': np.float64(0.45441252583968844)}

Sample 1: Actual=down, Predicted=down, Confidence=98.7%
  Probabilities: {'right': np.float64(0.0040667956974229265), 'left': np.float64(0.003961100127682878), 'up': np.float64(0.004615549318731411), 'down': np.float64(0.9873565548561627)}

Sample 2: Actual=right, Predicted=right, Confidence=86.2%
  Probabilities: {'right': np.float64(0.8618546581653201), 'left': np.float64(0.10524705678374978), 'up': np.float64(0.018616066530151687), 'down': np.float64(0.014282218520778515)}

Sample 3: Actual=right, Predicted=right, Confidence=97.6%
  Probabilities: {'right': np.float64(0.9755960838525932), 'left': np.float64(0.009130459496504885), 'up': np.float64(0.008717359511403197), 'down': np.float64(0.006556097139498557)}

Sa