In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [14]:
train_df = pd.read_excel("data/train-sample-data.xlsx", )
unseen_df = pd.read_excel("data/test-sample-data.xlsx")
train_df.shape, unseen_df.shape

((1254, 16), (276, 16))

In [15]:
input_columns = []
ear_v_len = 15

for i in range(ear_v_len):
    column_name = "EAR" + str(i + 1)
    input_columns.append(column_name)


X = train_df[input_columns]
y = train_df.EYE_STATE


In [16]:
1276 * 0.2, 1276 * 0.8

(255.20000000000002, 1020.8000000000001)

In [17]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model 1
# Initialize the SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0)

# Train the SVM classifier on the training set
svm_classifier.fit(X_train_scaled, y_train)

# Make predictions on the testing set
y_pred = svm_classifier.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

Accuracy: 0.9123505976095617
Confusion Matrix:
[[80  5  0]
 [10 67  1]
 [ 0  6 82]]
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.94      0.91        85
           1       0.86      0.86      0.86        78
           2       0.99      0.93      0.96        88

    accuracy                           0.91       251
   macro avg       0.91      0.91      0.91       251
weighted avg       0.91      0.91      0.91       251



In [18]:
# Performance of unseen data
X_unseen = unseen_df[input_columns]
y_unseen = unseen_df.EYE_STATE

X_unseen_scaled = scaler.transform(X_unseen)

# Make predictions on the testing set
y_unseen_pred = svm_classifier.predict(X_unseen_scaled)

# Evaluate the model
accuracy = accuracy_score(y_unseen, y_unseen_pred)
conf_matrix = confusion_matrix(y_unseen, y_unseen_pred)
classification_rep = classification_report(y_unseen, y_unseen_pred)

print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")


Accuracy: 0.9239130434782609
Confusion Matrix:
[[87  5  0]
 [13 79  0]
 [ 0  3 89]]
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.95      0.91        92
           1       0.91      0.86      0.88        92
           2       1.00      0.97      0.98        92

    accuracy                           0.92       276
   macro avg       0.93      0.92      0.92       276
weighted avg       0.93      0.92      0.92       276



In [16]:
model_filename = 'models/svm_model_1.joblib'
joblib.dump(svm_classifier, model_filename)

['models/svm_model_1.joblib']

In [17]:
scaler_filename = 'models/feature_scaler_1.joblib'
joblib.dump(scaler, scaler_filename)

['models/feature_scaler_1.joblib']

In [19]:
# Model 2
# Initialize the SVM classifier
svm_classifier = SVC(kernel='rbf')

# Train the SVM classifier on the training set
svm_classifier.fit(X_train_scaled, y_train)

# Make predictions on the testing set
y_pred = svm_classifier.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print("***** Test set results ******")
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

# Make predictions on the unseen data set
y_unseen_pred = svm_classifier.predict(X_unseen_scaled)

# Evaluate the model
accuracy = accuracy_score(y_unseen, y_unseen_pred)
conf_matrix = confusion_matrix(y_unseen, y_unseen_pred)
classification_rep = classification_report(y_unseen, y_unseen_pred)

print("***** Unseen data set results ******")
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

***** Test set results ******
Accuracy: 0.9243027888446215
Confusion Matrix:
[[81  4  0]
 [ 8 69  1]
 [ 0  6 82]]
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.95      0.93        85
           1       0.87      0.88      0.88        78
           2       0.99      0.93      0.96        88

    accuracy                           0.92       251
   macro avg       0.92      0.92      0.92       251
weighted avg       0.93      0.92      0.92       251

***** Unseen data set results ******
Accuracy: 0.9528985507246377
Confusion Matrix:
[[85  7  0]
 [ 5 87  0]
 [ 0  1 91]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.92      0.93        92
           1       0.92      0.95      0.93        92
           2       1.00      0.99      0.99        92

    accuracy                           0.95       276
   macro avg       0.95      0.95      0.95       276
weighted avg

In [21]:
model_filename = 'models/svm_model_2.joblib'
joblib.dump(svm_classifier, model_filename)

['models/svm_model_2.joblib']

### Hyperparameter Tuning

In [20]:
# Model 3

# Define the SVM model
svm_model = SVC(random_state=42)

# Define the hyperparameters and their possible values for tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto', 0.1, 1],
}

# Create a GridSearchCV object
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='accuracy')

# Fit the model to the training data
grid_search.fit(X_train_scaled, y_train)

# Print the best hyperparameters found by GridSearchCV
print("Best Hyperparameters:", grid_search.best_params_)

# Get the best model
best_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_model.predict(X_test_scaled)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Best Hyperparameters: {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
Accuracy: 0.9402390438247012


In [21]:


# Make predictions on the testing set
y_pred = best_model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print("***** Test set results ******")
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

# Make predictions on the unseen data set
y_unseen_pred = best_model.predict(X_unseen_scaled)

# Evaluate the model
accuracy = accuracy_score(y_unseen, y_unseen_pred)
conf_matrix = confusion_matrix(y_unseen, y_unseen_pred)
classification_rep = classification_report(y_unseen, y_unseen_pred)

print("***** Unseen data set results ******")
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")

***** Test set results ******
Accuracy: 0.9402390438247012
Confusion Matrix:
[[80  5  0]
 [ 4 73  1]
 [ 0  5 83]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.94      0.95        85
           1       0.88      0.94      0.91        78
           2       0.99      0.94      0.97        88

    accuracy                           0.94       251
   macro avg       0.94      0.94      0.94       251
weighted avg       0.94      0.94      0.94       251

***** Unseen data set results ******
Accuracy: 0.9456521739130435
Confusion Matrix:
[[85  7  0]
 [ 5 86  1]
 [ 0  2 90]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.92      0.93        92
           1       0.91      0.93      0.92        92
           2       0.99      0.98      0.98        92

    accuracy                           0.95       276
   macro avg       0.95      0.95      0.95       276
weighted avg