In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
import os
from PIL import Image
from sklearn.model_selection import GridSearchCV
from pickle import dump
from sklearn.model_selection import ParameterGrid


In [3]:
images_folder = '/Users/shaneab/Projects/Machine Learning/Expression recognition/jonathanheix dataset/images'

X = []
y = []

for subdir, dirs, files in os.walk(images_folder):
    for file in files:
        if file.endswith(('jpg', 'jpeg', 'png')):
            img_path = os.path.join(subdir, file)
            label = os.path.basename(subdir)
            
            image = Image.open(img_path).convert('L')
            image = image.resize((48, 48))
            X.append(np.array(image).flatten())
            y.append(label)

with open("svc_standardscaler_gridsearch_xy_dump.pkl", "wb") as f:
    dump((X,y), f, protocol=5)

In [4]:
X = np.array(X)
y = np.array(y)

In [5]:
# Normalize image data
X = X / 255.0  # Normalize pixel values to [0, 1]

In [6]:
# Encode labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [7]:
pca = PCA(n_components=100)  # Reduce to 100 components
X_reduced = pca.fit_transform(X)

In [8]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reduced)

In [9]:
with open("svc_standardscaler_gridsearch_normalizers_dump.pkl", "wb") as f:
    dump((label_encoder, pca, scaler), f, protocol=5)

In [10]:
X_train , X_test, y_train, y_test =  train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [11]:
# # Grid Search
# param_grid = {
#     'C': [1, 10, 100],  # Smaller range for C
#     'kernel': ['rbf', 'poly', 'sigmoid'],
#     'gamma': ['scale'], 
#     'degree': [1, 2, 3],  
# }

# all_combinations = list(ParameterGrid(param_grid))

# for combination in all_combinations:
#     print(combination)
    
#     svc = SVC(**combination)
#     svc.fit(X_train, y_train)

#     # Make predictions
#     y_pred = svc.predict(X_test)

#     # Evaluate the model
#     accuracy = accuracy_score(y_test, y_pred)
#     print(f"Accuracy: {accuracy:.2f}")
#     print("Classification Report:")
#     print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


# print("\nTotal combinations:", len(all_combinations))


# # Perform Grid Search with 5-fold cross-validation
# svc_model = GridSearchCV(SVC(), param_grid, cv=3, verbose=2, n_jobs=2)
# svc_model.fit(X_train, y_train)

# with open("svc_model_standardscaler_grisearch_pca_dump.pkl", "wb") as f:
#     dump(svc_model, f, protocol=5)

# print("Best Parameters:", svc_model.best_params_)

param_grid = {
    'C': [1, 10, 100],
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'gamma': ['scale'],
    'degree': [1, 2, 3],
}


print("Performing Grid Search...")
svc_model = GridSearchCV(
    SVC(), param_grid, cv=3, verbose=2, n_jobs=2, refit=True, return_train_score=True
)
svc_model.fit(X_train, y_train)

# Extract all results and display them
results_df = pd.DataFrame(svc_model.cv_results_)
results_df = results_df.sort_values(by="mean_test_score", ascending=False)

# Display top results
print("\nAll Parameter Combinations and Accuracies:")
print(results_df[['params', 'mean_test_score', 'std_test_score']])

# Save the best model
best_model = svc_model.best_estimator_
with open("svc_model_best_standardscaler_grisearch_pca_dump.pkl", "wb") as f:
    dump(best_model, f)

# Display the best parameters
print("\nBest Parameters:", svc_model.best_params_)




print("\nEvaluating the Best Model...")
# Make predictions
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# Evaluate the model
print(f"Accuracy on Test Set: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Performing Grid Search...
Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV] END .............C=1, degree=1, gamma=scale, kernel=rbf; total time= 4.9min
[CV] END .............C=1, degree=1, gamma=scale, kernel=rbf; total time= 5.1min
[CV] END ............C=1, degree=1, gamma=scale, kernel=poly; total time= 3.2min
[CV] END .............C=1, degree=1, gamma=scale, kernel=rbf; total time= 4.9min
[CV] END ............C=1, degree=1, gamma=scale, kernel=poly; total time= 2.5min
[CV] END ............C=1, degree=1, gamma=scale, kernel=poly; total time= 3.1min
[CV] END .........C=1, degree=1, gamma=scale, kernel=sigmoid; total time= 3.5min
[CV] END .........C=1, degree=1, gamma=scale, kernel=sigmoid; total time= 3.3min
[CV] END .........C=1, degree=1, gamma=scale, kernel=sigmoid; total time= 3.2min
[CV] END .............C=1, degree=2, gamma=scale, kernel=rbf; total time= 6.2min
[CV] END .............C=1, degree=2, gamma=scale, kernel=rbf; total time= 5.8min


KeyboardInterrupt: 

In [19]:
# # Make Prediciton
# best_knn = svc_model.best_estimator_
# best_knn
# y_pred = best_knn.predict(X_test)
# y_pred

array([4, 4, 3, ..., 4, 5, 2])

In [20]:
# # Evaluate the model
# print("Classification Report:")
# print("Accuracy:", accuracy_score(y_test, y_pred))
# print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
# print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

Classification Report:
Accuracy: 0.8946011842563567
              precision    recall  f1-score   support

       angry       0.90      0.85      0.87      1963
     disgust       0.93      0.91      0.92       217
        fear       0.91      0.86      0.88      2064
       happy       0.90      0.93      0.92      3590
     neutral       0.85      0.90      0.87      2504
         sad       0.89      0.87      0.88      2417
    surprise       0.93      0.93      0.93      1600

    accuracy                           0.89     14355
   macro avg       0.90      0.89      0.90     14355
weighted avg       0.90      0.89      0.89     14355

Accuracy: 0.89


In [21]:
# Here you can replace pickle with joblib or cloudpickle
# from pickle import dump
# with open("filename.pkl", "wb") as f:
#     dump(clf, f, protocol=5)