In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
import os
from PIL import Image
from sklearn.model_selection import GridSearchCV
from pickle import dump
from sklearn.model_selection import ParameterGrid


In [9]:
images_folder = '/Users/shaneab/Projects/Machine Learning/Expression recognition/jonathanheix dataset/images'

X = []
y = []

for subdir, dirs, files in os.walk(images_folder):
    for file in files:
        if file.endswith(('jpg', 'jpeg', 'png')):
            img_path = os.path.join(subdir, file)
            label = os.path.basename(subdir)
            
            image = Image.open(img_path).convert('L')
            image = image.resize((48, 48))
            X.append(np.array(image).flatten())
            y.append(label)

with open("svc_standardscaler_gridsearch_xy_dump.pkl", "wb") as f:
    dump((X,y), f, protocol=5)

In [10]:
X = np.array(X)
y = np.array(y)

In [11]:
# Normalize image data
X = X / 255.0  # Normalize pixel values to [0, 1]

In [12]:
# Encode labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [13]:
pca = PCA(n_components=100)  # Reduce to 100 components
X_reduced = pca.fit_transform(X)

In [14]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reduced)

In [15]:
with open("svc_standardscaler_gridsearch_normalizers_dump.pkl", "wb") as f:
    dump((label_encoder, pca, scaler), f, protocol=5)

In [16]:
X_train , X_test, y_train, y_test =  train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# Grid Search
param_grid = {
    'C': [1, 10, 100],  # Smaller range for C
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'gamma': ['scale'], 
    'degree': [1, 2, 3],  
}

all_combinations = list(ParameterGrid(param_grid))

for combination in all_combinations:
    print(combination)
    
    svc = SVC(**combination)
    svc.fit(X_train, y_train)

    # Make predictions
    y_pred = svc.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


print("\nTotal combinations:", len(all_combinations))


# Perform Grid Search with 5-fold cross-validation
svc_model = GridSearchCV(SVC(), param_grid, cv=3, verbose=2, n_jobs=2)
svc_model.fit(X_train, y_train)

with open("svc_model_standardscaler_grisearch_pca_dump.pkl", "wb") as f:
    dump(svc_model, f, protocol=5)

print("Best Parameters:", svc_model.best_params_)

{'C': 1, 'degree': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 0.69
Classification Report:
              precision    recall  f1-score   support

       angry       0.72      0.53      0.61      2001
     disgust       0.97      0.38      0.55       231
        fear       0.71      0.56      0.62      2068
       happy       0.68      0.86      0.76      3578
     neutral       0.63      0.68      0.66      2441
         sad       0.62      0.66      0.64      2429
    surprise       0.86      0.75      0.80      1607

    accuracy                           0.69     14355
   macro avg       0.74      0.63      0.66     14355
weighted avg       0.70      0.69      0.68     14355

{'C': 1, 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'}
Accuracy: 0.38
Classification Report:
              precision    recall  f1-score   support

       angry       0.31      0.13      0.19      2001
     disgust       0.00      0.00      0.00       231
        fear       0.27      0.12      0.16      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.29
Classification Report:
              precision    recall  f1-score   support

       angry       0.14      0.09      0.11      2001
     disgust       0.00      0.00      0.00       231
        fear       0.13      0.08      0.10      2068
       happy       0.39      0.60      0.47      3578
     neutral       0.25      0.26      0.25      2441
         sad       0.22      0.21      0.21      2429
    surprise       0.33      0.31      0.32      1607

    accuracy                           0.29     14355
   macro avg       0.21      0.22      0.21     14355
weighted avg       0.25      0.29      0.26     14355

{'C': 1, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.69
Classification Report:
              precision    recall  f1-score   support

       angry       0.72      0.53      0.61      2001
     disgust       0.97      0.38      0.55       231
        fear       0.71      0.56      0.62      2068
       happy       0.68      0.86      0.76      3578
     neutral       0.63      0.68      0.66      2441
         sad       0.62      0.66      0.64      2429
    surprise       0.86      0.75      0.80      1607

    accuracy                           0.69     14355
   macro avg       0.74      0.63      0.66     14355
weighted avg       0.70      0.69      0.68     14355

{'C': 1, 'degree': 2, 'gamma': 'scale', 'kernel': 'poly'}
Accuracy: 0.54
Classification Report:
              precision    recall  f1-score   support

       angry       0.51      0.40      0.45      2001
     disgust       0.93      0.34      0.50       231
        fear       0.49      0.40      0.44      2068
       happy       0.54      0.76      0.63      357

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.69
Classification Report:
              precision    recall  f1-score   support

       angry       0.72      0.53      0.61      2001
     disgust       0.97      0.38      0.55       231
        fear       0.71      0.56      0.62      2068
       happy       0.68      0.86      0.76      3578
     neutral       0.63      0.68      0.66      2441
         sad       0.62      0.66      0.64      2429
    surprise       0.86      0.75      0.80      1607

    accuracy                           0.69     14355
   macro avg       0.74      0.63      0.66     14355
weighted avg       0.70      0.69      0.68     14355

{'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
Accuracy: 0.71
Classification Report:
              precision    recall  f1-score   support

       angry       0.94      0.54      0.69      2001
     disgust       0.99      0.39      0.55       231
        fear       0.93      0.60      0.73      2068
       happy       0.53      0.97      0.68      357

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
# Make Prediciton
best_knn = svc_model.best_estimator_
best_knn
y_pred = best_knn.predict(X_test)
y_pred

array([4, 4, 3, ..., 4, 5, 2])

In [20]:
# Evaluate the model
print("Classification Report:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

Classification Report:
Accuracy: 0.8946011842563567
              precision    recall  f1-score   support

       angry       0.90      0.85      0.87      1963
     disgust       0.93      0.91      0.92       217
        fear       0.91      0.86      0.88      2064
       happy       0.90      0.93      0.92      3590
     neutral       0.85      0.90      0.87      2504
         sad       0.89      0.87      0.88      2417
    surprise       0.93      0.93      0.93      1600

    accuracy                           0.89     14355
   macro avg       0.90      0.89      0.90     14355
weighted avg       0.90      0.89      0.89     14355

Accuracy: 0.89


In [21]:
# Here you can replace pickle with joblib or cloudpickle
# from pickle import dump
# with open("filename.pkl", "wb") as f:
#     dump(clf, f, protocol=5)