# Age recognition 

**Authors**: Richard Šléher, Tomáš Majerník

**Dataset**: https://www.kaggle.com/datasets/arashnic/faces-age-detection-dataset/code?select=train.csv

In [None]:
import os
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import wandb
from PIL import Image
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from summarytools import dfSummary
from timm import create_model
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms

warnings.filterwarnings("ignore")


Hyperparameters

In [None]:
IMAGE_SIZE = 128
batch_size = 256
num_epochs = 50

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

## EDA

In [None]:
data = pd.read_csv("data/train.csv")

In [None]:
dfSummary(data)

In [None]:
sns.histplot(data["Class"])

Máme nepomer tried, čiže pri trénovaní modelu sme nastavili rozdielne weighty pre triedy aby sme "vyrovnali" tento nepomer.

In [None]:
image_sizes = []

for img_id in data["ID"]:
    img_path = os.path.join("data/train/", img_id)
    try:
        with Image.open(img_path) as img:
            image_sizes.append(img.size)
    except Exception as e:
        print(f"Error processing file {img_path}: {e}")

if image_sizes:
    widths, heights = zip(*image_sizes, strict=False)

    # Create a scatter plot
    plt.figure(figsize=(7, 7))
    plt.scatter(widths, heights, alpha=0.25)
    plt.xlabel("Width (pixels)")
    plt.ylabel("Height (pixels)")
    plt.title("Scatter Plot of Image Sizes")
    plt.show()
else:
    print("No image sizes were processed.")

Scatterplot distribúcie veľkostí obrázkov. Môžeme pozorovať, že obrázky majú veľký rozsah veľkostí.

In [None]:
pixel_intensities = []

for img_id in data["ID"]:
    img_path = os.path.join("data/train/", img_id)
    try:
        with Image.open(img_path) as img:
            grayscale_img = img.convert("L")
            pixel_intensities.append(np.array(grayscale_img).flatten())
    except Exception as e:
        print(f"Error processing file {img_path}: {e}")

if pixel_intensities:
    combined_intensities = np.concatenate(pixel_intensities)

    plt.figure(figsize=(10, 6))
    plt.boxplot(
        combined_intensities,
        vert=False,
        patch_artist=True,
        boxprops={"facecolor": "lightblue"},
    )
    plt.xlabel("Pixel Intensity")
    plt.title("Box Plot of Pixel Intensities")
    plt.show()
else:
    print("No pixel intensities were processed.")

In [None]:
fig = plt.figure()

for i in range(9):
    plt.subplot(3, 3, i + 1)
    img = plt.imread("data/train/" + data.iloc[i]["ID"])
    plt.imshow(img)
    plt.title(data.iloc[i]["Class"])
    plt.axis("off")

plt.show()

In [None]:
X = data["ID"]
y = data["Class"]

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, stratify=y, shuffle=True, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, shuffle=True, random_state=42
)

In [None]:
X_train_df = pd.DataFrame(X_train)
X_train_df["Target"] = y
dfSummary(X_train_df)

In [None]:
X_test_df = pd.DataFrame(X_test)
X_test_df["Target"] = y_test
dfSummary(X_test_df)

In [None]:
X_val_df = pd.DataFrame(X_val)
X_val_df["Target"] = y_val
dfSummary(X_val_df)

In [None]:
image_data = []

for filename in os.listdir("./data/Train/"):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        image_path = os.path.join("./data/Train/", filename)
        with Image.open(image_path) as image:
            image_data.append(
                {
                    "filename": filename,
                    "width": image.width,
                    "height": image.height,
                    "mode": image.mode,
                    "image": image,
                }
            )

        df = pd.DataFrame(image_data)

print(df.head())

In [None]:
print("Width Stats:")
print(df["width"].describe())
print("Height Stats:")
print(df["height"].describe())

df["aspect_ratio"] = df["width"] / df["height"]
print("Aspect Ratio Stats:")
print(df["aspect_ratio"].describe())

print("Image Modes Count:")
print(df["mode"].value_counts())

In [None]:
class AgeDataset(Dataset):
    def __init__(self, filenames: str, labels: str, transform: transforms.Compose=None) -> None:
        self.filenames = filenames
        self.labels = labels
        self.transform = transform
        self.label_mapping = {"YOUNG": 0, "MIDDLE": 1, "OLD": 2}

    def __len__(self) -> int:
        return len(self.filenames)

    def __getitem__(self, idx: int) -> tuple:
        img_name = os.path.join("data/train/", self.filenames.iloc[idx])
        image = Image.open(img_name).convert("RGB")
        label = self.labels.iloc[idx]
        label = self.label_mapping[label]

        if self.transform:
            image = self.transform(image)

        return image, label


transform = transforms.Compose(
    [
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5]),
    ]
)


train_dataset = AgeDataset(filenames=X_train, labels=y_train, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = AgeDataset(filenames=X_val, labels=y_val, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

test_dataset = AgeDataset(filenames=X_test, labels=y_test, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Dané transofrmácie sme vybrali na základe toho, že sa najviac hodia na normalizáciu násho typu obrázkov. Pretože, keď máme tvár nemôžeme ju otočiť napr. o 180 stupňov, lebo by si daný model mýlil oči s ústami napríklad. Horizontal flip a color jitter sú mierne transformácie aby sa model nenaučil iba na jeden uhol alebo jednu farbu pleti.

### Voľba modelov

1.
- modifikovaná verzia LeNet-5 s pridanou batch normalizáciou
- jednoduchá architektúra pridali sme batch norm aby sme redukovali overfitting

2.

3.

## 1. Custom CNN 

In [None]:
class CNN(nn.Module):
    def __init__(self, in_channels, num_classes) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.fc1 = nn.Linear(256 * 8 * 8, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, num_classes)

        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)

        self.dropout1 = nn.Dropout(0.2)
        self.dropout2 = nn.Dropout(0.2)
        self.dropout3 = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        x = self.pool4(F.relu(self.bn4(self.conv4(x))))

        x = x.view(x.size(0), -1)

        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.dropout2(x)
        x = F.relu(self.fc4(x))
        s = self.dropout3(x)
        x = self.fc5(x)

        return x


In [None]:
model = CNN(in_channels=3, num_classes=3).to(device=device)

In [None]:
def evaluate(model: CNN, val_loader: DataLoader, criterion: nn.CrossEntropyLoss) -> tuple:
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions, average="weighted")
    recall = recall_score(all_labels, all_predictions, average="weighted")
    f1 = f1_score(all_labels, all_predictions, average="weighted")

    return val_loss / len(val_loader), accuracy, precision, recall, f1

In [None]:
class_weights = torch.tensor([1.0, 2.0, 3.0]).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
wandb.init(
    project="age-recognition",
    name="Custom-CNN",
    config={
        "model": "Custom-CNN",
        "learning_rate": 1e-4,
        "batch_size": batch_size,
        "epochs": num_epochs
    }
)

In [None]:
# Training loop
model.to(device)

train_losses = []
val_losses = []
val_accuracies = []
train_accuracies = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct / total

    val_loss, val_accuracy, precision, recall, f1 = evaluate(
        model, val_loader, criterion
    )

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)
    train_accuracies.append(train_accuracy)

    wandb.log({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_accuracy": train_accuracy,
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "f1": f1,
    })

    print(
        f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f},\
    Train Accuracy: {train_accuracy:.3f}, Val Accuracy: {val_accuracy:.3f},\
    Precision: {precision:.3f}, Recall: {recall:.3f}, F1: {f1:.3f}"
    )

    scheduler.step()

wandb.finish()
torch.save(model.state_dict(), "CNN.pth")

In [None]:
# Plotting the training and validation loss
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")
plt.show()

In [None]:
# Plotting the validation accuracy
plt.figure(figsize=(10, 5))
plt.plot(val_accuracies, label="Validation Accuracy")
plt.plot(train_accuracies, label="Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.legend()
plt.title("Accuracy")
plt.show()

Z výsledkov vidíme, že sa nám prestal zlepšovať model približne od 20 epochu.

## 2. Custom CNN

In [None]:
conv_base = create_model("xception", pretrained=True, num_classes=0)
conv_base.global_pool = nn.Identity()


class CustomModel(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.base_model = conv_base
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.batch_norm1 = nn.BatchNorm1d(2048)
        self.fc1 = nn.Linear(2048, 256)
        self.batch_norm2 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.batch_norm3 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.5)
        self.output = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.base_model(x)
        x = self.global_avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.batch_norm1(x)
        x = F.relu(self.fc1(x))
        x = self.batch_norm2(x)
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.batch_norm3(x)
        x = self.dropout2(x)
        x = self.output(x)
        return F.softmax(x, dim=1)


num_classes = 3
model = CustomModel(num_classes=num_classes).to(device=device)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
class_weights = torch.tensor([1.0, 2.0, 3.0]).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
wandb.init(
    project="age-recognition",
    name="Custom-CNN_Xception",
    config={
        "model": "Custom-CNN_Xception",
        "learning_rate": 1e-4,
        "batch_size": batch_size,
        "epochs": num_epochs-40
    }
)

In [None]:
train_losses = []
train_losses = []
val_losses = []
val_accuracies = []
train_accuracies = []

for epoch in range(num_epochs-40):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct / total

    val_loss, val_accuracy, precision, recall, f1 = evaluate(
        model, val_loader, criterion
    )

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)
    train_accuracies.append(train_accuracy)

    wandb.log({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_accuracy": train_accuracy,
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "f1": f1,
    })

    print(
        f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f},\
    Train Accuracy: {train_accuracy:.3f}, Val Accuracy: {val_accuracy:.3f},\
    Precision: {precision:.3f}, Recall: {recall:.3f}, F1: {f1:.3f}"
    )

    scheduler.step()

wandb.finish()
torch.save(model.state_dict(), "CNN_Xception.pth")

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")
plt.show()

In [None]:
# Plotting the validation accuracy
plt.figure(figsize=(10, 5))
plt.plot(val_accuracies, label="Validation Accuracy")
plt.plot(train_accuracies, label="Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.legend()
plt.title("Accuracy")
plt.show()

Z výsledkov vidíme...

## 3. Resnet-18 - pretrained

Source: https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html

In [None]:
model_resnet = models.resnet18(pretrained=True)
model_resnet.fc = nn.Linear(model_resnet.fc.in_features, 3)

model = model_resnet.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_resnet.parameters(), lr=1e-3)

In [None]:
wandb.init(
    project="age-recognition",
    name="Resnet-18",
    config={
        "model": "Resnet-18",
        "learning_rate": 1e-3,
        "batch_size": batch_size,
        "epochs": num_epochs-40
    }
)

In [None]:
train_losses = []
train_losses = []
val_losses = []
val_accuracies = []
train_accuracies = []

for epoch in range(num_epochs-40):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct / total

    val_loss, val_accuracy, precision, recall, f1 = evaluate(
        model, val_loader, criterion
    )

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)
    train_accuracies.append(train_accuracy)

    wandb.log({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_accuracy": train_accuracy,
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "f1": f1,
    })

    print(
        f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f},\
    Train Accuracy: {train_accuracy:.3f}, Val Accuracy: {val_accuracy:.3f},\
    Precision: {precision:.3f}, Recall: {recall:.3f}, F1: {f1:.3f}"
    )

    scheduler.step()

wandb.finish()
torch.save(model.state_dict(), "resnet.pth")

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")
plt.show()

In [None]:
# Plotting the validation accuracy
plt.figure(figsize=(10, 5))
plt.plot(val_accuracies, label="Validation Accuracy")
plt.plot(train_accuracies, label="Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.legend()
plt.title("Accuracy")
plt.show()

Z výsledkov vidíme...

## Our face prediction using Resnet-18

In [None]:
def predict_image(model, image_path, transform, device):
    model.eval()
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    return predicted.item()

In [None]:
image_paths = ["riso.jpg", "tomas.jpg"]
predictions = []

for image_path in image_paths:
    prediction = predict_image(model, image_path, transform, device)
    predictions.append(prediction)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))
label_mapping = {0: "YOUNG", 1: "MIDDLE", 2: "OLD"}

for ax, image_path, prediction in zip(axes, image_paths, predictions, strict=False):
    image = plt.imread(image_path)
    ax.imshow(image)
    ax.set_title(f"Prediction: {label_mapping[prediction]}")
    ax.axis("off")

plt.show()