In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score

# Load and preprocess the dataset
celeb_data = pd.read_csv('../list_attr_celeba.csv')

celeb_data.replace(1, 1.0, inplace=True)
celeb_data.replace(-1, 0.0, inplace=True)
celeb_data.drop(columns=[
    'image_id', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Bags_Under_Eyes',
    'Bushy_Eyebrows', 'Bangs', 'Blurry', 'Smiling', 'Mouth_Slightly_Open',
    'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace',
    'Wearing_Necktie'
], inplace=True)

# Define features (X) and target (y)
X = celeb_data.drop("Attractive", axis=1)
y = celeb_data["Attractive"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train RandomForestClassifier for feature importance
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# Get feature importances
feature_importances = rf.feature_importances_

# Sort features by importance and select top N (e.g., top 10)
N = 10
sorted_rf = sorted(zip(X_train.columns, feature_importances), key=lambda x: x[1], reverse=True)
top_features = [feature for feature, importance in sorted_rf[:N]]

print("Selected Top Features:")
for feature, importance in sorted_rf[:N]:
    print(f"{feature}: importance={importance:.4f}")

# Filter datasets to include only the top features
X_train_selected = X_train[top_features]
X_test_selected = X_test[top_features]

# Train KNN model on selected features
knn = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors
knn.fit(X_train_selected, y_train)

# Make predictions
y_pred = knn.predict(X_test_selected)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Selected Top Features:
Heavy_Makeup: importance=0.2523
Young: importance=0.1477
Male: importance=0.1114
Big_Nose: importance=0.0490
Pointy_Nose: importance=0.0410
Oval_Face: importance=0.0360
Chubby: importance=0.0360
Wavy_Hair: importance=0.0318
Eyeglasses: importance=0.0293
High_Cheekbones: importance=0.0261

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.72      0.72     29734
           1       0.73      0.74      0.74     31046

    accuracy                           0.73     60780
   macro avg       0.73      0.73      0.73     60780
weighted avg       0.73      0.73      0.73     60780



In [4]:
# accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.6f}")

# precision
precision = precision_score(y_test, y_pred)
print(f"Precision: {precision:.6f}")

# recall
recall = recall_score(y_test, y_pred)
print(f"Recall: {recall:.6f}")

# F1 score
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1:.6f}")

Accuracy: 0.732017
Precision: 0.734776
Recall: 0.743864
F1 Score: 0.739292
