In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [2]:
images_folder = '../images'

X = []
y = []

for subdir, dirs, files in os.walk(images_folder):
    for file in files:
        if file.endswith(('jpg', 'jpeg', 'png')):
            img_path = os.path.join(subdir, file)
            label = os.path.basename(subdir)
            
            image = Image.open(img_path).convert('L')
            image = image.resize((48, 48))
            X.append(np.array(image).flatten())
            y.append(label)

In [3]:
# Convert lists to NumPy arrays
X = np.array(X)
y = np.array(y)

In [4]:
# Normalize image data
X = X / 255.0  # Normalize pixel values to [0, 1]

In [5]:
# Encode labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [6]:
pca = PCA(n_components=100)  # Reduce to 100 components
X_reduced = pca.fit_transform(X)

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reduced)

In [8]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [9]:
# Grid Search
param_grid = {
    'n_neighbors': [1, 3, 5, 7, 9, 11],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'weights': ['uniform', 'distance']
}
knn_model = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
knn_model.fit(X_train, y_train)
print("Best Parameters:", knn_model.best_params_)

Best Parameters: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}


In [10]:
# Make Prediciton
best_knn = knn_model.best_estimator_
best_knn
y_pred = best_knn.predict(X_test)
y_pred

array([4, 3, 3, ..., 4, 5, 2])

In [11]:
# Evaluate the model
print("Classification Report:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

Classification Report:
Accuracy: 0.8801114594218042
              precision    recall  f1-score   support

       angry       0.89      0.85      0.87      1963
     disgust       0.89      0.94      0.91       217
        fear       0.86      0.88      0.87      2064
       happy       0.92      0.87      0.90      3590
     neutral       0.82      0.89      0.85      2504
         sad       0.87      0.87      0.87      2417
    surprise       0.91      0.93      0.92      1600

    accuracy                           0.88     14355
   macro avg       0.88      0.89      0.88     14355
weighted avg       0.88      0.88      0.88     14355

Accuracy: 0.88


In [None]:
# Use the original dataset for visualization
for i in range(5):
    idx = np.random.randint(0, len(X))
    image = X[idx].reshape(48, 48)  # Original data
    plt.imshow(image, cmap='gray')
    plt.title(f"True: {y_test[idx]}, Predicted: {y_pred[idx]}")
    plt.axis('off')
    plt.show()


ValueError: cannot reshape array of size 100 into shape (48,48)