In [None]:
import pandas as pd
df = pd.read_csv('training_data.csv')
df = df[df['label'] != 'push']
df.head()

Unnamed: 0,meanAx,sdAx,rangeAx,meanAy,sdAy,rangeAy,meanAz,sdAz,rangeAz,meanGx,sdGx,rangeGx,meanGy,sdGy,rangeGy,meanGz,sdGz,rangeGz,label,studentId
0,0.9679,0.2467,0.9634,-0.2738,0.1306,0.4613,-0.4429,0.2753,-0.951,1.2329,27.7312,-91.0034,-193.0257,132.3646,-378.6621,-43.2414,39.3023,-116.0889,right,11804106
1,1.0147,0.3566,1.1539,-0.1511,0.1433,0.5248,-0.376,0.186,-0.6379,9.2285,48.6599,-174.6216,-224.471,107.7086,-321.3501,-42.3299,32.3132,-90.0879,right,11804106
2,1.0819,0.4401,1.4689,-0.149,0.0992,0.3985,-0.3194,0.3332,-1.1453,-16.8538,87.1097,-278.4424,-230.6885,121.5911,-391.1133,-65.8,23.2705,-78.6743,right,11804106
3,1.0942,0.278,0.9828,-0.2154,0.188,0.647,-0.3281,0.0928,-0.3226,-5.0741,86.1526,-310.6079,-225.293,77.1939,-262.7563,-13.5213,45.057,-167.6025,right,11804106
4,0.9802,0.3731,1.1923,-0.1418,0.1271,0.4563,-0.4507,0.1598,-0.5585,5.717,42.9314,-132.3242,-208.9396,93.9282,-290.3442,-42.2892,22.3983,-83.6182,right,11804106


In [68]:
# Extract features and labels
feature_columns = ['meanAx', 'sdAx', 'rangeAx', 'meanAy', 'sdAy', 'rangeAy', 
                   'meanAz', 'sdAz', 'rangeAz', 'meanGx', 'sdGx', 'rangeGx', 
                   'meanGy', 'sdGy', 'rangeGy', 'meanGz', 'sdGz', 'rangeGz']

X = df[feature_columns]
y = df['label']

print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Unique labels: {y.unique()}")

Features shape: (713, 18)
Labels shape: (713,)
Unique labels: ['right' 'left' 'up' 'down']


In [69]:
# Check class distribution BEFORE encoding
print("\n=== CLASS DISTRIBUTION ===")
label_counts = df['label'].value_counts()
print(label_counts)
print(f"\nTotal samples: {len(df)}")


=== CLASS DISTRIBUTION ===
label
right    199
left     194
down     164
up       156
Name: count, dtype: int64

Total samples: 713


In [70]:
def encode_label(lbl):
    # Convert label to lowercase to handle inconsistencies like 'Right' vs 'right'
    lbl_lower = str(lbl).lower()
    label_map = {'right': 0, 'left': 1,  'up': 2, 'down': 3, 'push': 4}
    return label_map.get(lbl_lower, -1)

df['label'] = df['label'].apply(encode_label)

# IMPORTANT: Re-extract y AFTER encoding to numeric values
y = df['label']

# Verify that there are no -1 labels
if -1 in y.unique():
    print("ERROR: Found -1 in labels after encoding. Check for unexpected label strings.")
    print(df[df['label'] == -1])
else:
    print(f"OK: Encoded labels are: {y.unique()}")
    
print(f"Label type: {type(y.iloc[0])}")

OK: Encoded labels are: [0 1 2 3]
Label type: <class 'numpy.int64'>


In [71]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Normalization will be done after train-test split to avoid data leakage

In [72]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.svm import SVC
svm_model = SVC(kernel='rbf', 
                class_weight='balanced', 
                C=1.0, 
                gamma=0.1, 
                probability=True, 
                decision_function_shape='ovo')
# Train the SVM model
svm_model.fit(X_train, y_train)

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,0.1
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,'balanced'


In [74]:
from sklearn.metrics import classification_report, accuracy_score
y_pred = svm_model.predict(X_test)
# Evaluate the model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9300699300699301
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.86      0.87        36
           1       0.91      0.93      0.92        42
           2       0.97      0.95      0.96        38
           3       0.96      1.00      0.98        27

    accuracy                           0.93       143
   macro avg       0.93      0.93      0.93       143
weighted avg       0.93      0.93      0.93       143



In [75]:
import pickle
with open('svm_model.pkl', 'wb') as file:
    pickle.dump(svm_model, file)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [76]:
# Verify model predictions are numeric
print("\n=== VALIDATION ===")
test_pred = svm_model.predict(X_test[:5])
print(f"Sample predictions: {test_pred}")
print(f"Prediction types: {[type(p).__name__ for p in test_pred]}")
print(f"Expected: numeric 0-4, Got: {test_pred[0]} (type: {type(test_pred[0]).__name__})")

if isinstance(test_pred[0], str):
    print("ERROR: Model is predicting strings! Need to retrain with numeric labels.")
else:
    print("OK: Model predicts numeric values")


=== VALIDATION ===
Sample predictions: [0 3 2 3 3]
Prediction types: ['int64', 'int64', 'int64', 'int64', 'int64']
Expected: numeric 0-4, Got: 0 (type: int64)
OK: Model predicts numeric values


In [None]:
# Analyze decision confidence for each class
print("\n=== DECISION FUNCTION ANALYSIS ===")
decision_scores = svm_model.decision_function(X_test)
probabilities = svm_model.predict_proba(X_test)

label_names = {0: 'right', 1: 'left', 2: 'up', 3: 'down'}

for i in range(min(5, len(X_test))):
    actual = label_names[y_test.iloc[i]]
    predicted = label_names[svm_model.predict(X_test[i:i+1])[0]]
    confidence = max(probabilities[i]) * 100
    print(f"\nSample {i}: Actual={actual}, Predicted={predicted}, Confidence={confidence:.1f}%")
    print(f"  Probabilities: {dict(zip([label_names[j] for j in range(4)], probabilities[i]))}")


=== DECISION FUNCTION ANALYSIS ===

Sample 0: Actual=left, Predicted=right, Confidence=55.9%
  Probabilities: {'right': np.float64(0.558668602557881), 'left': np.float64(0.43201994071935323), 'up': np.float64(0.008453851666814824), 'down': np.float64(0.0008576050559508008)}

Sample 1: Actual=down, Predicted=down, Confidence=99.8%
  Probabilities: {'right': np.float64(0.0015712362067178789), 'left': np.float64(0.0003733817796384412), 'up': np.float64(0.00022207688007761954), 'down': np.float64(0.997833305133566)}

Sample 2: Actual=up, Predicted=up, Confidence=99.2%
  Probabilities: {'right': np.float64(0.004662748103100136), 'left': np.float64(0.0022599123706004945), 'up': np.float64(0.991838526810638), 'down': np.float64(0.0012388127156613374)}

Sample 3: Actual=down, Predicted=down, Confidence=99.6%
  Probabilities: {'right': np.float64(0.0012711800241111697), 'left': np.float64(0.0005054617468066341), 'up': np.float64(0.0019025805357066336), 'down': np.float64(0.9963207776933757)}

