In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
import os
from PIL import Image
from sklearn.model_selection import GridSearchCV

In [13]:
images_folder = '../images'

X = []
y = []

for subdir, dirs, files in os.walk(images_folder):
    for file in files:
        if file.endswith(('jpg', 'jpeg', 'png')):
            img_path = os.path.join(subdir, file)
            label = os.path.basename(subdir)
            
            image = Image.open(img_path).convert('L')
            image = image.resize((48, 48))
            X.append(np.array(image).flatten())
            y.append(label)

In [14]:
X = np.array(X)
y = np.array(y)

In [15]:
# Normalize image data
X = X / 255.0  # Normalize pixel values to [0, 1]

In [16]:
# Encode labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [17]:
pca = PCA(n_components=100)  # Reduce to 100 components
X_reduced = pca.fit_transform(X)

In [18]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reduced)

In [19]:
X_train , X_test, y_train, y_test =  train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [25]:
# Grid Search
param_grid = {
    'C': [10, 100],  # Smaller range for C
    'kernel': ['rbf', 'poly', 'sigmoid'], # Common kernels
    'gamma': ['scale'],  # Focus on 'scale' and a specific value
    'degree': [3],  # Fixed degree for 'poly' kernel
}



# Perform Grid Search with 5-fold cross-validation
svc_model = GridSearchCV(SVC(), param_grid, cv=3, verbose=2, n_jobs=2)
svc_model.fit(X_train, y_train)
print("Best Parameters:", svc_model.best_params_)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best Parameters: {'C': 100, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}


In [23]:
# Make Prediciton
best_knn = svc_model.best_estimator_
best_knn
y_pred = best_knn.predict(X_test)
y_pred

array([4, 4, 3, ..., 4, 5, 2])

In [24]:
# Evaluate the model
print("Classification Report:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

Classification Report:
Accuracy: 0.8835249042145594
              precision    recall  f1-score   support

       angry       0.84      0.85      0.85      1963
     disgust       0.97      0.91      0.94       217
        fear       0.86      0.85      0.86      2064
       happy       0.91      0.93      0.92      3590
     neutral       0.87      0.88      0.88      2504
         sad       0.86      0.86      0.86      2417
    surprise       0.94      0.91      0.93      1600

    accuracy                           0.88     14355
   macro avg       0.90      0.88      0.89     14355
weighted avg       0.88      0.88      0.88     14355

Accuracy: 0.88
