In [23]:
import os, random
import numpy as np
import pandas as pd
import torch

# Set random seeds
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [24]:
import pandas as pd

# Use pd.read_csv for CSV files
train_data = pd.read_csv('celeba_training/labels.csv')
test_data = pd.read_csv('celeba_testing/labels.csv')

print(train_data.head())

   Unnamed: 0 img_name  gender  smiling
0           0    0.jpg      -1        1
1           1    1.jpg      -1        1
2           2    2.jpg       1       -1
3           3    3.jpg      -1       -1
4           4    4.jpg      -1       -1


In [25]:
#to check for missing or inconsistent values :
print(train_data.head())
print(train_data.info())

   Unnamed: 0 img_name  gender  smiling
0           0    0.jpg      -1        1
1           1    1.jpg      -1        1
2           2    2.jpg       1       -1
3           3    3.jpg      -1       -1
4           4    4.jpg      -1       -1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  5000 non-null   int64 
 1   img_name    5000 non-null   object
 2   gender      5000 non-null   int64 
 3   smiling     5000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 156.4+ KB
None


In [26]:
import pandas as pd

# Load raw data
raw_train_data = pd.read_csv('celeba_training/labels.csv')

# Check original distribution
print("Original Gender Distribution:", raw_train_data['gender'].value_counts())
print("Original Smiling Distribution:", raw_train_data['smiling'].value_counts())

# Transform -1 to 0
raw_train_data['gender'] = raw_train_data['gender'].apply(lambda x: 0 if x == -1 else 1)
raw_train_data['smiling'] = raw_train_data['smiling'].apply(lambda x: 0 if x == -1 else 1)

# Check transformed distribution
print("Transformed Gender Distribution:", raw_train_data['gender'].value_counts())
print("Transformed Smiling Distribution:", raw_train_data['smiling'].value_counts())


Original Gender Distribution: gender
-1    2500
 1    2500
Name: count, dtype: int64
Original Smiling Distribution: smiling
 1    2500
-1    2500
Name: count, dtype: int64
Transformed Gender Distribution: gender
0    2500
1    2500
Name: count, dtype: int64
Transformed Smiling Distribution: smiling
1    2500
0    2500
Name: count, dtype: int64


In [27]:
train_data['gender'] = train_data['gender'].apply(lambda x: 0 if x == -1 else 1)
train_data['smiling'] = train_data['smiling'].apply(lambda x: 0 if x == -1 else 1)

test_data['gender'] = test_data['gender'].apply(lambda x: 0 if x == -1 else 1)
test_data['smiling'] = test_data['smiling'].apply(lambda x: 0 if x == -1 else 1)


In [28]:
import os
from PIL import Image
import numpy as np
import pandas as pd

train_image_folder = "celeba_training/img"  
test_image_folder = "celeba_testing/img"    

if not os.path.exists(train_image_folder):
    print(f"Training image folder not found at {train_image_folder}.")
if not os.path.exists(test_image_folder):
    print(f"Test image folder not found at {test_image_folder}.")
print("Train Images:", os.listdir(train_image_folder)[:5])
print("Test Images:", os.listdir(test_image_folder)[:5])


Train Images: ['63.jpg', '823.jpg', '4217.jpg', '3578.jpg', '2666.jpg']
Test Images: ['63.jpg', '823.jpg', '189.jpg', '77.jpg', '837.jpg']


In [29]:
from PIL import Image
import numpy as np

# Define the image size for resizing (e.g., 64x64 pixels)
image_size = (64, 64)

def preprocess_images(image_folder, labels_df):

    X = []
    y_gender = []
    y_smiling = []
    
    for _, row in labels_df.iterrows():
        img_path = os.path.join(image_folder, row['img_name'])
        try:
            # Open the image and resize it
            img = Image.open(img_path).convert('RGB')
            img = img.resize(image_size)
            
            # Convert to numpy array and normalize pixel values
            img_array = np.array(img) / 255.0  # Normalize to [0, 1]
            X.append(img_array)
            
            # Append labels
            y_gender.append(row['gender'])
            y_smiling.append(row['smiling'])
        except Exception as e:
            print(f"Error processing image {row['img_name']}: {e}")
    
    # Convert lists to numpy arrays
    X = np.array(X)
    y_gender = np.array(y_gender)
    y_smiling = np.array(y_smiling)
    
    return X, y_gender, y_smiling

# Preprocess training images
X_train, y_train_gender, y_train_smiling = preprocess_images(train_image_folder, train_data)

# Preprocess testing images
X_test, y_test_gender, y_test_smiling = preprocess_images(test_image_folder, test_data)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


Training data shape: (5000, 64, 64, 3)
Testing data shape: (1000, 64, 64, 3)


In [30]:
# Check label distribution for training data
print("Gender Label Distribution (Train):", np.bincount(y_train_gender))
print("Smiling Label Distribution (Train):", np.bincount(y_train_smiling))

# Check label distribution for testing data
print("Gender Label Distribution (Test):", np.bincount(y_test_gender))
print("Smiling Label Distribution (Test):", np.bincount(y_test_smiling))


Gender Label Distribution (Train): [2500 2500]
Smiling Label Distribution (Train): [2500 2500]
Gender Label Distribution (Test): [500 500]
Smiling Label Distribution (Test): [500 500]


In [31]:
from sklearn.model_selection import train_test_split

# Split the training data into training and validation sets
X_train_split, X_val, y_train_gender_split, y_val_gender, y_train_smiling_split, y_val_smiling = train_test_split(
    X_train, y_train_gender, y_train_smiling, test_size=0.2, random_state=42, stratify=y_train_gender)

# Print the shapes of the split datasets
print("Training data shape:", X_train_split.shape)
print("Validation data shape:", X_val.shape)
print("Training gender labels shape:", y_train_gender_split.shape)
print("Validation gender labels shape:", y_val_gender.shape)
print("Training smiling labels shape:", y_train_smiling_split.shape)
print("Validation smiling labels shape:", y_val_smiling.shape)


Training data shape: (4000, 64, 64, 3)
Validation data shape: (1000, 64, 64, 3)
Training gender labels shape: (4000,)
Validation gender labels shape: (1000,)
Training smiling labels shape: (4000,)
Validation smiling labels shape: (1000,)


In [32]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [33]:
# Create Tensor datasets
train_dataset = TensorDataset(torch.tensor(X_train_split.reshape(X_train_split.shape[0], -1), dtype=torch.float32),
                               torch.tensor(y_train_gender_split, dtype=torch.long),
                               torch.tensor(y_train_smiling_split, dtype=torch.long))

val_dataset = TensorDataset(torch.tensor(X_val.reshape(X_val.shape[0], -1), dtype=torch.float32),
                             torch.tensor(y_val_gender, dtype=torch.long),
                             torch.tensor(y_val_smiling, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test.reshape(X_test.shape[0], -1), dtype=torch.float32),
                              torch.tensor(y_test_gender, dtype=torch.long),
                              torch.tensor(y_test_smiling, dtype=torch.long))
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("DataLoaders created successfully!")


DataLoaders created successfully!


In [34]:
# Define the FFNN model
class MultiTaskFFNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(MultiTaskFFNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # Shared hidden layer
        self.relu = nn.ReLU()  # Activation function
        self.fc_gender = nn.Linear(hidden_size, 2)  # Output layer for gender (binary)
        self.fc_smiling = nn.Linear(hidden_size, 2)  # Output layer for smiling (binary)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        gender_out = self.fc_gender(x)
        smiling_out = self.fc_smiling(x)
        return gender_out, smiling_out

# Initialize the model
input_size = X_train_split.shape[1] * X_train_split.shape[2] * X_train_split.shape[3]  # Flattened image size
hidden_size = 128  # Number of neurons in the hidden layer
model = MultiTaskFFNN(input_size, hidden_size)

print("FFNN model defined successfully!")


FFNN model defined successfully!


In [35]:
# Move model to the configured device
model.to(device)
print("FFNN model moved to device successfully!")

FFNN model moved to device successfully!


In [36]:
# Define loss functions, optimizer, and training parameters
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification tasks
optimizer = optim.Adam(model.parameters(), lr=0.00001)  # Adam optimizer with a small learning rate

# Number of training epochs
num_epochs = 130

In [37]:
# Training loop with device-aware tensors
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, gender_labels, smiling_labels in train_loader:
        # Move data to device
        images = images.view(images.size(0), -1).to(device)
        gender_labels = gender_labels.to(device)
        smiling_labels = smiling_labels.to(device)

        # Forward pass
        gender_outputs, smiling_outputs = model(images)

        # Compute losses
        loss = criterion(gender_outputs, gender_labels) + criterion(smiling_outputs, smiling_labels)

        # Optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

Epoch 1/130, Loss: 84.1317
Epoch 2/130, Loss: 79.4230
Epoch 2/130, Loss: 79.4230
Epoch 3/130, Loss: 76.2781
Epoch 3/130, Loss: 76.2781
Epoch 4/130, Loss: 73.9066
Epoch 4/130, Loss: 73.9066
Epoch 5/130, Loss: 71.6899
Epoch 5/130, Loss: 71.6899
Epoch 6/130, Loss: 69.8258
Epoch 6/130, Loss: 69.8258
Epoch 7/130, Loss: 68.2793
Epoch 7/130, Loss: 68.2793
Epoch 8/130, Loss: 66.5805
Epoch 8/130, Loss: 66.5805
Epoch 9/130, Loss: 65.0784
Epoch 9/130, Loss: 65.0784
Epoch 10/130, Loss: 63.8222
Epoch 10/130, Loss: 63.8222
Epoch 11/130, Loss: 62.6485
Epoch 11/130, Loss: 62.6485
Epoch 12/130, Loss: 61.4044
Epoch 12/130, Loss: 61.4044
Epoch 13/130, Loss: 60.1313
Epoch 13/130, Loss: 60.1313
Epoch 14/130, Loss: 59.2390
Epoch 14/130, Loss: 59.2390
Epoch 15/130, Loss: 58.3576
Epoch 15/130, Loss: 58.3576
Epoch 16/130, Loss: 57.5562
Epoch 16/130, Loss: 57.5562
Epoch 17/130, Loss: 56.2313
Epoch 17/130, Loss: 56.2313
Epoch 18/130, Loss: 55.7884
Epoch 18/130, Loss: 55.7884
Epoch 19/130, Loss: 54.8151
Epoch 19/

In [38]:
# Validation loop
model.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Disable gradient computation for validation
    val_loss = 0
    correct_gender = 0
    correct_smiling = 0
    total = 0
    
    for images, gender_labels, smiling_labels in val_loader:
        # Flatten the images
        images = images.view(images.size(0), -1).to(device)
        gender_labels = gender_labels.to(device)
        smiling_labels = smiling_labels.to(device)

        # Forward pass
        gender_outputs, smiling_outputs = model(images)
        
        # Compute individual losses
        gender_loss = criterion(gender_outputs, gender_labels)
        smiling_loss = criterion(smiling_outputs, smiling_labels)
        
        # Combine losses
        val_loss += (gender_loss.item() + smiling_loss.item())
        
        # Calculate accuracies
        _, gender_preds = torch.max(gender_outputs, 1)
        _, smiling_preds = torch.max(smiling_outputs, 1)
        correct_gender += (gender_preds == gender_labels).sum().item()
        correct_smiling += (smiling_preds == smiling_labels).sum().item()
        total += gender_labels.size(0)
    
    print(f"Validation Loss: {val_loss:.4f}")
    print(f"Gender Accuracy: {correct_gender / total:.4f}")
    print(f"Smiling Accuracy: {correct_smiling / total:.4f}")


Validation Loss: 9.7172
Gender Accuracy: 0.8780
Smiling Accuracy: 0.8540


In [39]:
# Validation and Test loops with device-aware tensors
model.eval()

def evaluate(loader, split_name="Validation"):
    total_loss = 0
    correct_gender = 0
    correct_smiling = 0
    total = 0
    with torch.no_grad():
        for images, gender_labels, smiling_labels in loader:
            images = images.view(images.size(0), -1).to(device)
            gender_labels = gender_labels.to(device)
            smiling_labels = smiling_labels.to(device)

            gender_outputs, smiling_outputs = model(images)
            loss = criterion(gender_outputs, gender_labels) + criterion(smiling_outputs, smiling_labels)
            total_loss += loss.item()

            _, gender_preds = torch.max(gender_outputs, 1)
            _, smiling_preds = torch.max(smiling_outputs, 1)
            correct_gender += (gender_preds == gender_labels).sum().item()
            correct_smiling += (smiling_preds == smiling_labels).sum().item()
            total += gender_labels.size(0)

    print(f"{split_name} Loss: {total_loss:.4f}")
    print(f"{split_name} Gender Accuracy: {correct_gender/total:.4f}")
    print(f"{split_name} Smiling Accuracy: {correct_smiling/total:.4f}")

# Run evaluation
evaluate(val_loader, "Validation")
evaluate(test_loader, "Test")

Validation Loss: 9.7172
Validation Gender Accuracy: 0.8780
Validation Smiling Accuracy: 0.8540
Test Loss: 9.0730
Test Gender Accuracy: 0.8900
Test Smiling Accuracy: 0.8720
Test Loss: 9.0730
Test Gender Accuracy: 0.8900
Test Smiling Accuracy: 0.8720
