In [3]:
import pandas as pd
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score

# Load and preprocess the dataset
celeb_data = pd.read_csv('../list_attr_celeba.csv')

celeb_data.replace(1, 1.0, inplace=True)
celeb_data.replace(-1, 0.0, inplace=True)
celeb_data.drop(columns=[
    'image_id', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Bags_Under_Eyes',
    'Bushy_Eyebrows', 'Bangs', 'Blurry', 'Smiling', 'Mouth_Slightly_Open',
    'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace',
    'Wearing_Necktie'
], inplace=True)

# Define features (X) and target (y)
X = celeb_data.drop("Attractive", axis=1)
y = celeb_data["Attractive"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Apply SelectKBest with Chi-Square to select top 10 features
selector = SelectKBest(score_func=chi2, k=10)  # Select top 10 features
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)

# Get selected feature names
selected_features = X.columns[selector.get_support()]
print("Selected features:")
print(selected_features)

# Train KNN model on selected features
knn = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors
knn.fit(X_train_selected, y_train)

# Make predictions
y_pred = knn.predict(X_test_selected)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Selected features:
Index(['Big_Nose', 'Chubby', 'Double_Chin', 'Eyeglasses', 'Gray_Hair',
       'Heavy_Makeup', 'Male', 'Pointy_Nose', 'Wavy_Hair', 'Young'],
      dtype='object')

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.56      0.66     29734
           1       0.67      0.85      0.75     31046

    accuracy                           0.71     60780
   macro avg       0.73      0.71      0.70     60780
weighted avg       0.73      0.71      0.71     60780



In [4]:
# accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.6f}")

# precision
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision:.6f}")

# recall
recall = recall_score(y_test, y_pred)
print(f"Recall: {recall:.6f}")

# F1 score
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1:.6f}")

Accuracy: 0.711797
Precision: 0.671292
Recall: 0.853894
F1 Score: 0.751662
