In [2]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# Step 1: Define Dataset and DataLoader
data_dir = 'C:\\Users\\Giuseppe\\Documents\\tesi\\datasets\\data'

In [4]:
# Transformations: Resize, Normalize
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize to fixed size
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

In [5]:
# Load Dataset
dataset = ImageFolder(root=data_dir, transform=transform)

In [15]:
# Split Dataset: Train/Validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

In [16]:
# DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [17]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)  # Adjust output layer

    def forward(self, x):
        return self.model(x)

In [18]:
# Number of classes (writers)
num_classes = len(dataset.classes)

In [19]:
# Initialize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN(num_classes=num_classes).to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\Giuseppe/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:02<00:00, 21.9MB/s]


In [20]:
# Step 3: Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [21]:
# Step 4: Train the Model
train_losses = []
train_accuracies = []
val_accuracies = []

num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate metrics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_losses.append(running_loss / len(train_loader))
    train_accuracies.append(100 * correct / total)

    # Validation Accuracy
    model.eval()
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == labels).sum().item()
            total_val += labels.size(0)
    val_accuracies.append(100 * correct_val / total_val)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, "
          f"Train Accuracy: {train_accuracies[-1]:.2f}%, Validation Accuracy: {val_accuracies[-1]:.2f}%")

Epoch [1/25], Loss: 6.7377, Train Accuracy: 3.90%, Validation Accuracy: 1.62%
Epoch [2/25], Loss: 5.4862, Train Accuracy: 7.31%, Validation Accuracy: 7.47%
Epoch [3/25], Loss: 4.4971, Train Accuracy: 16.08%, Validation Accuracy: 11.69%
Epoch [4/25], Loss: 3.5495, Train Accuracy: 28.35%, Validation Accuracy: 21.10%
Epoch [5/25], Loss: 2.6105, Train Accuracy: 42.89%, Validation Accuracy: 28.90%
Epoch [6/25], Loss: 1.9114, Train Accuracy: 53.37%, Validation Accuracy: 23.05%
Epoch [7/25], Loss: 1.2812, Train Accuracy: 68.81%, Validation Accuracy: 44.81%
Epoch [8/25], Loss: 0.8216, Train Accuracy: 80.99%, Validation Accuracy: 41.56%
Epoch [9/25], Loss: 0.5466, Train Accuracy: 88.38%, Validation Accuracy: 37.01%
Epoch [10/25], Loss: 0.3895, Train Accuracy: 92.45%, Validation Accuracy: 42.53%
Epoch [11/25], Loss: 0.2391, Train Accuracy: 95.21%, Validation Accuracy: 35.06%
Epoch [12/25], Loss: 0.1826, Train Accuracy: 96.99%, Validation Accuracy: 13.31%
Epoch [13/25], Loss: 0.1203, Train Accura

In [None]:
torch.save(model.state_dict(), 'handwriting_model.pth')

In [None]:
# Step 5: Plot Training and Validation Metrics
epochs = range(1, num_epochs + 1)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, label='Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, train_accuracies, label='Training Accuracy')
plt.plot(epochs, val_accuracies, label='Validation Accuracy', linestyle='--')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Split Dataset: Train/Test
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train, test = torch.utils.data.random_split(train_dataset, [train_size, val_size])

In [None]:
# DataLoader
train_load = DataLoader(train, batch_size=32, shuffle=True)
test_load = DataLoader(test, batch_size=32, shuffle=False)

In [None]:
def add_new_author(new_author_data_path, model, num_classes, train_loader, val_loader, epochs=10):
    """
    Function to add a new author to the handwriting identification system.

    Args:
        new_author_data_path (str): Path to the new author's data.
        model (torch.nn.Module): Pre-trained model.
        num_classes (int): Current number of classes in the model.
        train_loader (DataLoader): DataLoader for existing training data.
        val_loader (DataLoader): DataLoader for validation data.
        epochs (int): Number of fine-tuning epochs.

    Returns:
        torch.nn.Module: Fine-tuned model.
    """
    # Load new author's data
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    new_author_dataset = ImageFolder(root=new_author_data_path, transform=transform)
    new_author_loader = DataLoader(new_author_dataset, batch_size=32, shuffle=True)

    # Update the final layer of the model to include the new class
    model.fc = nn.Linear(model.fc.in_features, num_classes + 1)  # Add one more class
    model.to(device)

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Fine-tuning
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        # Train on both new author and existing data
        for images, labels in new_author_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(new_author_loader):.4f}")

    return model


In [None]:
# Path to new author's data
new_author_data_path = "/path/to/new/author/data"

# Add new author to the system
model = add_new_author(
    new_author_data_path=new_author_data_path,
    model=model,
    num_classes=num_classes,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=10
)

# Salva il modello aggiornato
torch.save(model.state_dict(), 'handwriting_model_updated.pth')


In [None]:
# Visualize correct predictions
model.eval()
correct_images, correct_labels, predicted_labels = [], [], []

with torch.no_grad():
    for images, labels in test_load:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        mask = (predicted == labels)
        correct_images.extend(images[mask].cpu())
        correct_labels.extend(labels[mask].cpu())
        predicted_labels.extend(predicted[mask].cpu())
        if len(correct_images) >= 10:  # Display 10 correct images
            break

# Visualize correct predictions
plt.figure(figsize=(12, 6))
for i in range(10):
    plt.subplot(2, 5, i + 1)
    img = correct_images[i].permute(1, 2, 0)  # Convert from CxHxW to HxWxC
    img = img * 0.5 + 0.5  # Denormalize
    plt.imshow(img.numpy())
    plt.title(f"Label: {correct_labels[i].item()}\nPred: {predicted_labels[i].item()}")
    plt.axis('off')
plt.tight_layout()
plt.show()