In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import pandas as pd
import os

# file paths, always check to see that they're correct !
train_file_path = '/kaggle/input/cdsproj/sampled_data 3/train_data.csv'  
test_file_path = '/kaggle/input/cdsproj/sampled_data 3/test_data.csv'
val_file_path = '/kaggle/input/cdsproj/sampled_data 3/val_data.csv'  
train_image_directory = '/kaggle/input/cdsproj/sampled_data 3/train_data'  
test_image_directory = '/kaggle/input/cdsproj/sampled_data 3/test_data'  
val_image_directory = '/kaggle/input/cdsproj/sampled_data 3/val_data'

# defining a dataset class
class AgePredictionDataset(Dataset):
  def __init__(self, csv_file, img_dir, transform=None):
    self.data_frame = pd.read_csv(csv_file)
    self.img_dir = img_dir
    self.transform = transform

  def __len__(self):
    return len(self.data_frame)

  def __getitem__(self, idx):
    img_name = os.path.join(self.img_dir, self.data_frame.iloc[idx, 0])
    try:
      image = Image.open(img_name).convert('RGB')
      if self.transform:
        image = self.transform(image)
    except FileNotFoundError:
      print(f"Warning: Missing file {img_name}")
      return None, None  # Returning None for both image and age
    age = self.data_frame.iloc[idx, 1]
    return image, age


# defining image transformations
transform = transforms.Compose([
  transforms.Resize((128, 128)),
  transforms.ToTensor(),
])

train_dataset = AgePredictionDataset(csv_file=train_file_path, img_dir=train_image_directory, transform=transform)
test_dataset = AgePredictionDataset(csv_file=test_file_path, img_dir=test_image_directory, transform=transform)
val_dataset = AgePredictionDataset(csv_file=val_file_path, img_dir=val_image_directory, transform=transform)

def custom_collate_fn(batch):
  batch = [item for item in batch if item[0] is not None and item[1] is not None]
  if len(batch) == 0:  # If all items are None, return an empty batch
    return torch.tensor([]), torch.tensor([])
  return torch.utils.data.dataloader.default_collate(batch)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)


# defining a simple CNN model for age prediction
class SimpleCNN(nn.Module):
  def __init__(self):
    super(SimpleCNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
    self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
    self.pool = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(64 * 32 * 32, 512)
    self.fc2 = nn.Linear(512, 1)  # Predicting age as a single value
    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.pool(self.relu(self.conv1(x)))
    x = self.pool(self.relu(self.conv2(x)))
    x = x.view(-1, 64 * 32 * 32)
    x = self.relu(self.fc1(x))
    x = self.fc2(x)
    return x

# initializing model, loss function, and optimizer
model = SimpleCNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 30  

# Training loop with validation
for epoch in range(num_epochs):
  model.train()
  running_loss = 0.0
  for images, ages in train_loader:
    if images.nelement() == 0:  # Skip empty batches
      continue
    
    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs.squeeze(), ages.float())
    loss.backward()
    optimizer.step()
    running_loss += loss.item()
    
    # Print training loss
  print(f'Epoch {epoch+1}, Training Loss: {running_loss/len(train_loader)}')

    # Validation step
  model.eval()  # Set model to evaluation mode
  val_loss = 0.0
  with torch.no_grad():
    for images, ages in val_loader:
      if images.nelement() == 0:   #skipping empty batches
        continue

      outputs = model(images)
      loss = criterion(outputs.squeeze(), ages.float())
      val_loss += loss.item()

    # Print validation loss
  print(f'Epoch {epoch+1}, Validation Loss: {val_loss/len(val_loader)}')



Epoch 1, Training Loss: 369.1182181781626
Epoch 1, Validation Loss: 303.5899730294438
Epoch 2, Training Loss: 257.2170911737942
Epoch 2, Validation Loss: 263.78907187510345
Epoch 3, Training Loss: 186.55131413260884
Epoch 3, Validation Loss: 187.46410815998658
Epoch 4, Training Loss: 132.53300679660737
Epoch 4, Validation Loss: 173.05277080859167
Epoch 5, Training Loss: 90.19954175694103
Epoch 5, Validation Loss: 139.39346585031282
Epoch 6, Training Loss: 61.74816110988352
Epoch 6, Validation Loss: 117.51610807645119
Epoch 7, Training Loss: 42.482885787066294
Epoch 7, Validation Loss: 105.58125528238587
Epoch 8, Training Loss: 30.74591208677241
Epoch 8, Validation Loss: 109.4781407501738
Epoch 9, Training Loss: 23.182023439560343
Epoch 9, Validation Loss: 104.9308389243433
Epoch 10, Training Loss: 19.938073846235632
Epoch 10, Validation Loss: 97.41530578419314
Epoch 11, Training Loss: 17.493680296607195
Epoch 11, Validation Loss: 97.0741656755997
Epoch 12, Training Loss: 14.55481127519

In [2]:
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np  


# setting the model to evaluation mode
model.eval()

# initializing lists to store predicted and actual ages
predicted_ages = []
actual_ages = []

# iterating over the test DataLoader
with torch.no_grad():
    for images, ages in test_loader:
        # forward pass to get predictions
        outputs = model(images)
        # converting predictions to numpy array
        predicted_ages.extend(outputs.squeeze().cpu().numpy())
        # converting actual ages to numpy array
        actual_ages.extend(ages.cpu().numpy())

# calculating evaluation metrics
mae = mean_absolute_error(actual_ages, predicted_ages)
mse = mean_squared_error(actual_ages, predicted_ages)
rmse = np.sqrt(mse)

# printing evaluation results
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 5.823839934888005
Mean Squared Error (MSE): 85.00910760231251
Root Mean Squared Error (RMSE): 9.220038373147506
