In [9]:
import pandas as pd
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load and preprocess the dataset
celeb_data = pd.read_csv('../list_attr_celeba.csv')

celeb_data.replace(1, 1.0, inplace=True)
celeb_data.replace(-1, 0.0, inplace=True)
celeb_data.drop(columns=['image_id', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Bags_Under_Eyes',
                         'Bushy_Eyebrows', 'Bangs', 'Blurry', 'Smiling', 'Mouth_Slightly_Open',
                         'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace',
                         'Wearing_Necktie'], inplace=True)

# Separate features and target
X = celeb_data.drop("Attractive", axis=1).values
y = celeb_data["Attractive"].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature selection using Chi-Squared test
selector = SelectKBest(score_func=chi2, k=10)  # Select top 10 features
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)

# Train QDA model
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train_selected, y_train)

# Make predictions
y_pred = qda.predict(X_test_selected)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of QDA model: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy of QDA model: 0.69

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.40      0.55     29734
           1       0.62      0.96      0.76     31046

    accuracy                           0.69     60780
   macro avg       0.77      0.68      0.66     60780
weighted avg       0.76      0.69      0.66     60780

