In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SequentialFeatureSelector


In [2]:
# Load dataset
data = pd.read_csv('/Users/shaneab/Projects/Machine Learning/Expression recognition/fer20131.csv')
X = np.array([np.fromstring(pixel, dtype=int, sep=' ') for pixel in data['pixels']])
y = data['emotion'].values


In [3]:
# Normalize pixel values
X = X / 255.0

In [4]:
#Normalisasi Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled

array([[-0.60646962, -0.4562069 , -0.4011047 , ..., -0.08714567,
        -0.05788751, -0.41197523],
       [ 0.37587539,  0.40796695,  0.41422283, ...,  1.03537587,
         0.88911749,  0.88301341],
       [ 1.34609268,  1.17337807,  0.52711433, ..., -0.3193915 ,
        -0.04509014,  0.47674246],
       ...,
       [-1.24923857, -1.23396336, -1.22897574, ...,  0.53217656,
         0.24924925, -0.01840025],
       [-1.09157826, -1.09816461, -1.07845373, ..., -1.00322646,
        -1.06887933, -1.09755746],
       [-1.22498314, -1.28334472, -1.25406274, ...,  0.98376568,
         1.09387532,  1.09884485]])

In [5]:
# Reduce dimensionality with PCA
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X_scaled)

In [6]:
# Apply Sequential Feature Selection (SFS) for feature selection
knn = KNeighborsClassifier(n_neighbors=5)
sfs = SequentialFeatureSelector(knn, n_features_to_select=20, direction='forward', cv=5)
X_selected = sfs.fit_transform(X_pca, y) 

In [7]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
X_train

array([[-23.24064106, -11.57598653,  14.13890421, ...,   1.89553923,
         -1.2863029 ,  -1.98806132],
       [-19.28521726,   6.41154315,  31.6600425 , ...,   2.81721972,
          1.15320361,   3.13714173],
       [ 23.26895285, -23.85334953,  -6.94569126, ...,  -0.34063684,
         -0.59316465,   0.07553209],
       ...,
       [-15.60397751,   1.70759424,   1.79209842, ...,   2.69775085,
         -0.79770341,  -0.39407512],
       [ 25.99418322,   4.57945957, -10.16653206, ...,   3.28720607,
         -3.66826265,   2.41704113],
       [-72.15384978,   7.76697874,  20.09596846, ...,  -2.60245238,
          2.53335454,  -3.42109106]])

In [8]:
# Grid Search
param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}
knn_model = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
knn_model.fit(X_train, y_train)
print("Best Parameters:", knn_model.best_params_)

Best Parameters: {'n_neighbors': 1}


In [9]:
# Make Prediciton
best_knn = knn_model.best_estimator_
best_knn
y_pred = best_knn.predict(X_test)
y_pred

array([3, 3, 3, ..., 3, 3, 2])

In [10]:
# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.40749512398996934
Classification Report:
               precision    recall  f1-score   support

           0       0.36      0.36      0.36       985
           1       0.36      0.58      0.44       102
           2       0.43      0.42      0.43      1043
           3       0.44      0.44      0.44      1765
           4       0.36      0.34      0.35      1210
           5       0.55      0.59      0.57       795
           6       0.33      0.33      0.33      1278

    accuracy                           0.41      7178
   macro avg       0.40      0.44      0.42      7178
weighted avg       0.41      0.41      0.41      7178

