In [8]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from transformers import CLIPProcessor, CLIPModel
from torch.optim.lr_scheduler import StepLR

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, label_mapping=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        if label_mapping is not None:
            self.img_labels['age_band'] = self.img_labels['age_band'].map(label_mapping)

    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels['filename'].iloc[idx])
        try:
            image = Image.open(img_path).convert('RGB')
        except FileNotFoundError:
            print(f"File not found: {img_path}, logging and skipping.")
            return None  # Indicate missing file
        if self.transform:
            image = self.transform(image)
        label = self.img_labels['age_band'].iloc[idx]
        return image, label


In [11]:
def custom_collate_fn(batch):
    batch = [item for item in batch if item is not None]  # Filter out None
    if not batch:  # If all were None, return dummy tensors
        return torch.tensor([]), torch.tensor([])
    images, labels = zip(*batch)  # Unpack filtered items
    return torch.stack(images), torch.tensor(labels)


In [12]:
# Transformations
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [13]:
# Load CSV for label mapping
train_labels = pd.read_csv('/kaggle/input/dataface3/sampled_data 2/train_data.csv')
label_mapping = {label: idx for idx, label in enumerate(sorted(train_labels['age_band'].unique()))}

# Datasets
train_dataset = CustomImageDataset('/kaggle/input/dataface3/sampled_data 2/train_data.csv', '/kaggle/input/dataface3/sampled_data 2/train_data', transform=train_transform, label_mapping=label_mapping)
val_dataset = CustomImageDataset('/kaggle/input/dataface3/sampled_data 2/val_data.csv', '/kaggle/input/dataface3/sampled_data 2/val_data', transform=val_test_transform, label_mapping=label_mapping)

# DataLoaders with custom collate function
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)

In [14]:
# Model preparation
model = models.densenet121(pretrained=True)
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, len(label_mapping))
model.to(device)



DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [15]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)


In [16]:
# Training loop
for epoch in range(30):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        if images.nelement() == 0:  # Skip batches where all files were missing
            continue
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}')


File not found: /kaggle/input/dataface3/sampled_data 2/train_data/24_0_1_20170116220224657 .jpg, logging and skipping.
Epoch 1, Loss: 2.4809278642430024
File not found: /kaggle/input/dataface3/sampled_data 2/train_data/24_0_1_20170116220224657 .jpg, logging and skipping.
Epoch 2, Loss: 2.2495872649279507
File not found: /kaggle/input/dataface3/sampled_data 2/train_data/24_0_1_20170116220224657 .jpg, logging and skipping.
Epoch 3, Loss: 2.1682084469871725
File not found: /kaggle/input/dataface3/sampled_data 2/train_data/24_0_1_20170116220224657 .jpg, logging and skipping.
Epoch 4, Loss: 2.1020918183148227
File not found: /kaggle/input/dataface3/sampled_data 2/train_data/24_0_1_20170116220224657 .jpg, logging and skipping.
Epoch 5, Loss: 2.047088859170516
File not found: /kaggle/input/dataface3/sampled_data 2/train_data/24_0_1_20170116220224657 .jpg, logging and skipping.
Epoch 6, Loss: 2.006605849164055
File not found: /kaggle/input/dataface3/sampled_data 2/train_data/24_0_1_20170116220

NameError: name 'num_epochs' is not defined

RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'

RuntimeError: The size of tensor a (19) must match the size of tensor b (32) at non-singleton dimension 1

RuntimeError: The size of tensor a (19) must match the size of tensor b (32) at non-singleton dimension 1

RuntimeError: The size of tensor a (19) must match the size of tensor b (32) at non-singleton dimension 1

In [31]:
import torch
from torch.utils.data import DataLoader

# Assuming `model`, `val_loader`, and `device` are already defined

def rmse(predictions, targets):
    """Compute the Root Mean Square Error."""
    return torch.sqrt(torch.mean((predictions - targets) ** 2))

def validate(model, data_loader, criterion, device):
    """Validate the model."""
    model.eval()  # Set model to evaluation mode
    total_loss = 0.0
    total_rmse = 0.0
    total_samples = 0
    correct_within_tolerance = 0
    tolerance = 7.0  # You can adjust the tolerance level

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device).float()
            outputs = model(inputs).squeeze()  # Ensure outputs are correctly sized
            
            # Calculate and accumulate the loss
            loss = criterion(outputs, targets)
            total_loss += loss.item() * inputs.size(0)
            
            # Calculate and accumulate RMSE
            batch_rmse = rmse(outputs, targets)
            total_rmse += batch_rmse.item() * inputs.size(0)
            
            # Calculate and accumulate correct predictions within tolerance
            accurate_predictions = torch.abs(outputs - targets) <= tolerance
            correct_within_tolerance += accurate_predictions.sum().item()
            
            total_samples += inputs.size(0)

    avg_loss = total_loss / total_samples
    avg_rmse = total_rmse / total_samples
    accuracy_within_tolerance = 100 * correct_within_tolerance / total_samples
    
    print(f'Validation Loss: {avg_loss:.4f}')
    print(f'Validation RMSE: {avg_rmse:.4f}')
    print(f'Accuracy Within ±{tolerance} Years: {accuracy_within_tolerance:.2f}%')

    return avg_loss, avg_rmse, accuracy_within_tolerance

# Example criterion for a regression task
criterion = torch.nn.MSELoss()

# Replace `val_loader` with your validation DataLoader
# Example usage of the validation function
avg_loss, avg_rmse, accuracy_within_tolerance = validate(model, val_loader, criterion, device)


Validation Loss: 140.9590
Validation RMSE: 11.3770
Accuracy Within ±7.0 Years: 49.79%
