In [1]:
# Logistic Regression Model

# 5 Methods:
# Chi-Squared
# Extra Trees
# L1 Regularization - Lasso
# Random Forest # 
# Mutual Info

In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

celeb_data = pd.read_csv('../list_attr_celeba.csv')

celeb_data.replace(1, 1.0, inplace=True)
celeb_data.replace(-1, 0.0, inplace=True)
celeb_data.drop(columns = ['image_id', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Bags_Under_Eyes', 'Bushy_Eyebrows', 'Bangs', 'Blurry', 'Smiling', 'Mouth_Slightly_Open', 'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace', 'Wearing_Necktie'], inplace=True)

celeb_data.head()

column_names = celeb_data.drop("Attractive", axis=1).columns
X = celeb_data.drop("Attractive", axis=1).values
y = celeb_data["Attractive"].values

In [3]:
# training logistic regression (lasso) model
lasso = LogisticRegression(penalty='l1', solver='liblinear', random_state=42)
lasso.fit(X, y)

# lasso coefficients
lasso_coefficients = lasso.coef_.flatten()
sorted_lasso = sorted(zip(column_names, lasso_coefficients), key=lambda x: abs(x[1]), reverse=True)

print("\nLogistic Regression (Lasso) Coefficients (sorted by absolute value):")
for feature, coefficient in sorted_lasso:
    print(f"{feature}: {coefficient:.4f}")


Logistic Regression (Lasso) Coefficients (sorted by absolute value):
Chubby: -1.8687
Young: 1.5701
Gray_Hair: -1.4311
Heavy_Makeup: 1.4144
Bald: -1.2632
Eyeglasses: -1.1990
Double_Chin: -0.9540
Pale_Skin: 0.8839
Receding_Hairline: -0.8623
Pointy_Nose: 0.5691
Oval_Face: 0.5552
Straight_Hair: 0.4262
Narrow_Eyes: -0.4219
Wavy_Hair: 0.3916
Sideburns: 0.3800
High_Cheekbones: 0.3357
Brown_Hair: 0.3279
Big_Nose: -0.2772
Rosy_Cheeks: 0.2633
Mustache: -0.2084
Blond_Hair: 0.2078
Goatee: -0.1970
No_Beard: -0.1931
Male: -0.1620
Black_Hair: 0.1222
Big_Lips: -0.0115


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# training Random Forest model
rf = RandomForestClassifier(random_state=42)
rf.fit(X, y)

# feauture importances
feature_importances = rf.feature_importances_

# sort/display Random Forest feature importances
sorted_rf = sorted(zip(column_names, feature_importances), key=lambda x: x[1], reverse=True)

print("\nRandom Forest Feature Importances (sorted):")
for feature, importance in sorted_rf:
    print(f"{feature}: {importance:.4f}")

# choose features based on both Lasso and Random Forest
lasso_selected_features = [column_names[i] for i in range(len(lasso_coefficients)) if lasso_coefficients[i] != 0]
rf_selected_features = [column_names[i] for i, imp in enumerate(feature_importances) if imp > 0.01]
combined_selected_features = list(set(lasso_selected_features).intersection(rf_selected_features))

print("\nCombined Selected Features (Lasso and Random Forest):")
print(combined_selected_features)

# filtering to include only selected features
X_selected = pd.DataFrame(X, columns=column_names)[combined_selected_features].values

# splitting dataset using selected features
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# training Random Forest on selected features
rf_selected = RandomForestClassifier(random_state=42)
rf_selected.fit(X_train, y_train)

# predidt on test set
y_pred_test = rf_selected.predict(X_test)

# accuracy
rf_accuracy = accuracy_score(y_test, y_pred_test)
print(f"\nThe accuracy of the Random Forest model with Lasso and Random Forest selected features is: {rf_accuracy:.6f}")



Random Forest Feature Importances (sorted):
Heavy_Makeup: 0.2593
Young: 0.1524
Male: 0.1108
Big_Nose: 0.0500
Pointy_Nose: 0.0406
Chubby: 0.0381
Oval_Face: 0.0359
Wavy_Hair: 0.0313
Eyeglasses: 0.0312
High_Cheekbones: 0.0246
Straight_Hair: 0.0226
Big_Lips: 0.0220
Receding_Hairline: 0.0220
Narrow_Eyes: 0.0185
Black_Hair: 0.0183
Brown_Hair: 0.0177
Gray_Hair: 0.0162
No_Beard: 0.0152
Pale_Skin: 0.0142
Double_Chin: 0.0127
Blond_Hair: 0.0124
Rosy_Cheeks: 0.0086
Goatee: 0.0077
Sideburns: 0.0076
Mustache: 0.0065
Bald: 0.0039

Combined Selected Features (Lasso and Random Forest):
['Chubby', 'Oval_Face', 'Brown_Hair', 'Young', 'Straight_Hair', 'Black_Hair', 'Receding_Hairline', 'Blond_Hair', 'Gray_Hair', 'Heavy_Makeup', 'No_Beard', 'Narrow_Eyes', 'Male', 'High_Cheekbones', 'Eyeglasses', 'Double_Chin', 'Pale_Skin', 'Wavy_Hair', 'Big_Lips', 'Big_Nose', 'Pointy_Nose']

The accuracy of the Random Forest model with Lasso and RandomForest-selected features is: 0.761599
