In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score

# Load and preprocess the dataset
celeb_data = pd.read_csv('../list_attr_celeba.csv')

celeb_data.replace(1, 1.0, inplace=True)
celeb_data.replace(-1, 0.0, inplace=True)
celeb_data.drop(columns=[
    'image_id', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Bags_Under_Eyes', 
    'Bushy_Eyebrows', 'Bangs', 'Blurry', 'Smiling', 'Mouth_Slightly_Open', 
    'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace', 
    'Wearing_Necktie'
], inplace=True)

# Define features (X) and target (y)
X = celeb_data.drop("Attractive", axis=1)
y = celeb_data["Attractive"]

# Perform Random Forest feature selection
rf = RandomForestClassifier(random_state=42)
rf.fit(X, y)

# # Get feature importances and sort them
# feature_importances = rf.feature_importances_
# sorted_rf = sorted(zip(X.columns, feature_importances), key=lambda x: x[1], reverse=True)

# # Select top 10 features
# N = 10
# top_features = [feature for feature, importance in sorted_rf[:N]]
# print("Selected Top Features:")
# for feature, importance in sorted_rf[:N]:
#     print(f"{feature}: importance={importance:.4f}")

# # Filter dataset to include only the top features
# X_selected = X[top_features]

X_selected = celeb_data[["Heavy_Makeup", "Young", "Chubby", "Eyeglasses", "Wavy_Hair", "Male", "Pointy_Nose", "Double_Chin", "Oval_Face", "Big_Nose"]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)  # Add extra dimension for binary classification
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Define the Feedforward Neural Network
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)  # First hidden layer with 32 neurons
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, 16)         # Second hidden layer with 16 neurons
        self.fc3 = nn.Linear(16, 1)          # Output layer
        self.sigmoid = nn.Sigmoid()          # Sigmoid for binary classification
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# Initialize the model, loss function, and optimizer
input_size = X_train_tensor.shape[1]
model = NeuralNetwork(input_size)
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluate the model
model.eval()
with torch.no_grad():
    y_pred_probs = model(X_test_tensor)
    y_pred = (y_pred_probs >= 0.5).float()  # Convert probabilities to binary predictions

# Convert predictions to numpy for sklearn evaluation
y_pred_numpy = y_pred.squeeze().numpy()
y_test_numpy = y_test_tensor.squeeze().numpy()

# Print evaluation metrics
print("\nClassification Report:")
print(classification_report(y_test_numpy, y_pred_numpy))

Epoch [10/50], Loss: 0.6912
Epoch [20/50], Loss: 0.6862
Epoch [30/50], Loss: 0.6782
Epoch [40/50], Loss: 0.6652
Epoch [50/50], Loss: 0.6465

Classification Report:
              precision    recall  f1-score   support

         0.0       0.73      0.71      0.72     29734
         1.0       0.73      0.75      0.74     31046

    accuracy                           0.73     60780
   macro avg       0.73      0.73      0.73     60780
weighted avg       0.73      0.73      0.73     60780



In [5]:
# accuracy
accuracy = accuracy_score(y_test_numpy, y_pred_numpy)
print(f"Accuracy: {accuracy:.6f}")

# precision
precision = precision_score(y_test_numpy, y_pred_numpy)
print(f"Precision: {precision:.6f}")

# recall
recall = recall_score(y_test_numpy, y_pred_numpy)
print(f"Recall: {recall:.6f}")

# F1 score
f1 = f1_score(y_test_numpy, y_pred_numpy)
print(f"F1 Score: {f1:.6f}")

Accuracy: 0.728381
Precision: 0.729052
Recall: 0.745185
F1 Score: 0.737030
