In [1]:
import pandas as pd
data = pd.read_csv('Training_data1.csv')
data.head()

Unnamed: 0,meanX,sdX,rangeX,meanY,sdY,rangeY,meanZ,sdZ,rangeZ,wristArmed,label,studentId
0,0.0049,0.0046,0.0156,-0.3972,0.0064,0.0303,-0.9036,0.0027,0.0113,0,still,11965827
1,0.0097,0.0068,0.0274,-0.3963,0.0058,0.026,-0.9045,0.0015,0.0055,0,still,11965827
2,0.013,0.0066,0.0223,-0.394,0.0073,0.0288,-0.905,0.0031,0.0151,0,still,11965827
3,0.0203,0.0124,0.0439,-0.3937,0.0167,0.075,-0.9051,0.0046,0.0174,0,still,11965827
4,0.0274,0.0129,0.0439,-0.3943,0.0176,0.075,-0.9051,0.0047,0.0174,0,still,11965827


# SVM Framework for Motion Data Classification

This notebook provides a framework for classifying motion data using Support Vector Machines (SVM). 
The data contains accelerometer features (mean, standard deviation, range) for X, Y, Z axes along with wrist position and activity labels.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Set random state for reproducibility
RANDOM_STATE = 42

## Step 1: Data Exploration and Understanding

In [None]:
# Load and explore the data
data = pd.read_csv('Training_data1.csv')

print("Dataset Shape:", data.shape)
print("\nColumn Names:")
print(data.columns.tolist())
print("\nData Types:")
print(data.dtypes)
print("\nFirst few rows:")
print(data.head())

# Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())

# Check class distribution
print("\nClass Distribution:")
print(data['label'].value_counts())

# Basic statistics
print("\nBasic Statistics:")
print(data.describe())

## Step 2: Data Visualization

In [None]:
# Visualize class distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
data['label'].value_counts().plot(kind='bar')
plt.title('Activity Label Distribution')
plt.xlabel('Activity')
plt.ylabel('Count')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
data['wristArmed'].value_counts().plot(kind='bar')
plt.title('Wrist Position Distribution')
plt.xlabel('Wrist Armed (0=No, 1=Yes)')
plt.ylabel('Count')

plt.tight_layout()
plt.show()

# Correlation matrix of numerical features
numerical_features = ['meanX', 'sdX', 'rangeX', 'meanY', 'sdY', 'rangeY', 'meanZ', 'sdZ', 'rangeZ']
plt.figure(figsize=(10, 8))
correlation_matrix = data[numerical_features].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()

## Step 3: Data Preprocessing

In [None]:
# Define features and target
feature_columns = ['meanX', 'sdX', 'rangeX', 'meanY', 'sdY', 'rangeY', 
                  'meanZ', 'sdZ', 'rangeZ', 'wristArmed']
X = data[feature_columns]
y = data['label']

print("Feature matrix shape:", X.shape)
print("Target vector shape:", y.shape)
print("\nFeatures being used:")
for i, feature in enumerate(feature_columns):
    print(f"{i+1}. {feature}")

# Encode categorical labels if needed
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
print(f"\nLabel encoding mapping:")
for i, label in enumerate(label_encoder.classes_):
    print(f"{label} -> {i}")

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=RANDOM_STATE, stratify=y_encoded
)

print(f"\nTraining set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

# Feature scaling (important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("\nFeature scaling completed.")

## Step 4: SVM Model Training and Hyperparameter Tuning

In [None]:
# Basic SVM model
print("Training basic SVM model...")
svm_basic = SVC(random_state=RANDOM_STATE)
svm_basic.fit(X_train_scaled, y_train)

# Cross-validation score for basic model
cv_scores_basic = cross_val_score(svm_basic, X_train_scaled, y_train, cv=5)
print(f"Basic SVM Cross-validation accuracy: {cv_scores_basic.mean():.4f} (+/- {cv_scores_basic.std() * 2:.4f})")

# Hyperparameter tuning with GridSearchCV
print("\nPerforming hyperparameter tuning...")
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
    'kernel': ['rbf', 'poly', 'sigmoid']
}

# Use a smaller grid for faster execution if needed
# param_grid = {
#     'C': [0.1, 1, 10],
#     'gamma': ['scale', 0.01, 0.1],
#     'kernel': ['rbf', 'linear']
# }

grid_search = GridSearchCV(
    SVC(random_state=RANDOM_STATE),
    param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,  # Use all available cores
    verbose=1
)

grid_search.fit(X_train_scaled, y_train)

print(f"\nBest parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

# Get the best model
best_svm = grid_search.best_estimator_

## Step 5: Model Evaluation

In [None]:
# Make predictions
y_pred = best_svm.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=label_encoder.classes_, 
            yticklabels=label_encoder.classes_)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Feature importance (for linear kernel only)
if best_svm.kernel == 'linear':
    feature_importance = np.abs(best_svm.coef_[0])
    feature_names = feature_columns
    
    plt.figure(figsize=(10, 6))
    indices = np.argsort(feature_importance)[::-1]
    plt.bar(range(len(feature_importance)), feature_importance[indices])
    plt.xticks(range(len(feature_importance)), [feature_names[i] for i in indices], rotation=45)
    plt.title('Feature Importance (Linear SVM)')
    plt.ylabel('Absolute Coefficient Value')
    plt.tight_layout()
    plt.show()
else:
    print(f"\nFeature importance visualization not available for {best_svm.kernel} kernel.")

## Step 6: Advanced Analysis (Optional)

In [None]:
# Compare different SVM kernels
kernels = ['linear', 'rbf', 'poly', 'sigmoid']
kernel_results = {}

print("Comparing different SVM kernels:")
for kernel in kernels:
    svm_kernel = SVC(kernel=kernel, random_state=RANDOM_STATE)
    svm_kernel.fit(X_train_scaled, y_train)
    
    # Cross-validation
    cv_scores = cross_val_score(svm_kernel, X_train_scaled, y_train, cv=5)
    
    # Test accuracy
    y_pred_kernel = svm_kernel.predict(X_test_scaled)
    test_accuracy = accuracy_score(y_test, y_pred_kernel)
    
    kernel_results[kernel] = {
        'cv_mean': cv_scores.mean(),
        'cv_std': cv_scores.std(),
        'test_accuracy': test_accuracy
    }
    
    print(f"{kernel.upper()} Kernel:")
    print(f"  CV Accuracy: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
    print(f"  Test Accuracy: {test_accuracy:.4f}")
    print()

# Visualize kernel comparison
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
cv_means = [kernel_results[k]['cv_mean'] for k in kernels]
cv_stds = [kernel_results[k]['cv_std'] for k in kernels]
plt.bar(kernels, cv_means, yerr=cv_stds, capsize=5)
plt.title('Cross-Validation Accuracy by Kernel')
plt.ylabel('Accuracy')
plt.ylim(0, 1)

plt.subplot(1, 2, 2)
test_accs = [kernel_results[k]['test_accuracy'] for k in kernels]
plt.bar(kernels, test_accs)
plt.title('Test Accuracy by Kernel')
plt.ylabel('Accuracy')
plt.ylim(0, 1)

plt.tight_layout()
plt.show()

## Step 7: Model Deployment and Prediction Function

In [None]:
# Create a prediction function for new data
def predict_activity(meanX, sdX, rangeX, meanY, sdY, rangeY, meanZ, sdZ, rangeZ, wristArmed):
    """
    Predict activity based on motion sensor features
    
    Parameters:
    - meanX, sdX, rangeX: X-axis statistics
    - meanY, sdY, rangeY: Y-axis statistics  
    - meanZ, sdZ, rangeZ: Z-axis statistics
    - wristArmed: 0 or 1 indicating wrist position
    
    Returns:
    - Predicted activity label
    - Prediction probability
    """
    # Create feature vector
    features = np.array([[meanX, sdX, rangeX, meanY, sdY, rangeY, 
                         meanZ, sdZ, rangeZ, wristArmed]])
    
    # Scale the features
    features_scaled = scaler.transform(features)
    
    # Make prediction
    prediction_encoded = best_svm.predict(features_scaled)[0]
    prediction_proba = best_svm.predict_proba(features_scaled)[0] if hasattr(best_svm, 'predict_proba') else None
    
    # Decode the prediction
    prediction_label = label_encoder.inverse_transform([prediction_encoded])[0]
    
    return prediction_label, prediction_proba

# Example prediction
print("Example prediction:")
sample_features = X_test.iloc[0]  # Use first test sample
predicted_label, proba = predict_activity(*sample_features)
actual_label = label_encoder.inverse_transform([y_test[0]])[0]

print(f"Input features: {sample_features.to_dict()}")
print(f"Predicted activity: {predicted_label}")
print(f"Actual activity: {actual_label}")
if proba is not None:
    print(f"Prediction probabilities: {dict(zip(label_encoder.classes_, proba))}")

# Save the model (optional)
import joblib

# Save the trained model, scaler, and label encoder
joblib.dump(best_svm, 'svm_motion_classifier.pkl')
joblib.dump(scaler, 'feature_scaler.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

print("\nModel, scaler, and label encoder saved to disk.")