In [1]:
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from datetime import datetime
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Downloading EMNIST Letters dataset 
emnist = datasets.EMNIST(root='./data', split='letters', train=True, download=True, transform=transform)
print(emnist.class_to_idx)

Downloading https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip to ./data/EMNIST/raw/gzip.zip


100%|██████████| 562M/562M [00:10<00:00, 52.5MB/s] 


Extracting ./data/EMNIST/raw/gzip.zip to ./data/EMNIST/raw
{'N/A': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}


In [3]:
total_len = len(emnist)
train_size = int(0.75 * total_len)
val_size = int(0.20 * total_len)
extra_size = total_len - train_size - val_size

train_dataset, val_dataset, extra_dataset = random_split(emnist, [train_size, val_size, extra_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [4]:
print(f"Train set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Extra set size: {len(extra_dataset)}")

Train set size: 93600
Validation set size: 24960
Extra set size: 6240


In [5]:
class ResBlock(nn.Module):
    def __init__(self, input_features, output_features):
        super(ResBlock, self).__init__()
        self.stride = 1 if input_features == output_features else 2
        
        #main convolutional path
        self.features = nn.Sequential(
            nn.Conv2d(input_features, output_features, kernel_size=3, stride=self.stride, padding=1, bias=False),
            nn.BatchNorm2d(output_features),
            nn.ReLU(inplace=True),
            nn.Conv2d(output_features, output_features, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(output_features)
        )

        #shortcut connection
        self.shortcut = nn.Identity()
        if input_features != output_features:
            self.shortcut = nn.Sequential(
                nn.Conv2d(input_features, output_features, kernel_size=1, stride=self.stride, bias=False),
                nn.BatchNorm2d(output_features)
            )

    def forward(self, x):
        residual = self.shortcut(x)
        x = self.features(x)
        x += residual
        x = F.relu(x, inplace=True)
        return x

class Resnet18(nn.Module):
    def __init__(self, num_of_classes=26):  #classes is 26 for EMNIST letters
        super(Resnet18, self).__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False), 
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

            ResBlock(64, 64),
            ResBlock(64, 64),

            ResBlock(64, 128),
            ResBlock(128, 128),

            ResBlock(128, 256),
            ResBlock(256, 256),

            ResBlock(256, 512),
            ResBlock(512, 512),

            nn.AdaptiveAvgPool2d((1, 1))
        )

        self.classifier = nn.Sequential(
            nn.Linear(512, num_of_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [6]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, model_save_path="best_emnist_model.pth"):
    model.to(device)
    
    best_val_acc = 0.0
    history = {
        "train": {"losses": [], "accuracies": [], "epoch_times": []},
        "val": {"losses": [], "accuracies": []},
        "total_training_time": None
    }

    total_training_start = datetime.now()
    
    for epoch in range(num_epochs):
        epoch_start_time = datetime.now()
        print(f"\nEpoch {epoch + 1}/{num_epochs}")

        # Training phase
        model.train()
        train_loss, correct_preds, total_samples = 0.0, 0, 0

        for inputs, labels in train_loader:
            labels = labels - 1  
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * inputs.size(0)
            _, predictions = torch.max(outputs, 1)
            correct_preds += (predictions == labels).sum().item()
            total_samples += labels.size(0)

        avg_train_loss = train_loss / total_samples
        train_accuracy = correct_preds / total_samples
        history["train"]["losses"].append(avg_train_loss)
        history["train"]["accuracies"].append(train_accuracy * 100)

        # Validation phase
        model.eval()
        val_loss, correct_preds, total_samples = 0.0, 0, 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                labels = labels - 1  
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, predictions = torch.max(outputs, 1)
                correct_preds += (predictions == labels).sum().item()
                total_samples += labels.size(0)

        avg_val_loss = val_loss / total_samples
        val_accuracy = correct_preds / total_samples
        history["val"]["losses"].append(avg_val_loss)
        history["val"]["accuracies"].append(val_accuracy * 100)

        epoch_duration = (datetime.now() - epoch_start_time).total_seconds() / 60  # Minutes
        history["train"]["epoch_times"].append(epoch_duration)

        print(f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f}")
        print(f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}")
        print(f"Epoch duration: {epoch_duration:.2f} min")

        # Save best model based on validation accuracy
        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            torch.save(model.state_dict(), model_save_path)
            print(f"Best model saved with Val Acc: {val_accuracy:.4f}")

    total_training_duration = (datetime.now() - total_training_start).total_seconds() / 60 
    history["total_training_time"] = total_training_duration
    print(f"\nTotal training time: {total_training_duration:.2f} min")

    return history

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [8]:
#initialize model 
model = Resnet18(num_of_classes=26)
#initialize weights using He (Kaiming) uniform initialization
for module in model.modules():
    if isinstance(module, (nn.Conv2d, nn.Linear)):
        nn.init.kaiming_uniform_(module.weight, nonlinearity="relu")

#define loss criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

#training the model on EMNIST Letters dataset
resnet_he_acc_state = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    device=device,
    model_save_path="resnet_emnist_letters.pth"
)


Epoch 1/10
Train Loss: 0.3352, Train Acc: 0.8911
Val Loss: 0.2570, Val Acc: 0.9172
Epoch duration: 3.61 min
Best model saved with Val Acc: 0.9172

Epoch 2/10
Train Loss: 0.1982, Train Acc: 0.9321
Val Loss: 0.1859, Val Acc: 0.9372
Epoch duration: 3.58 min
Best model saved with Val Acc: 0.9372

Epoch 3/10
Train Loss: 0.1657, Train Acc: 0.9421
Val Loss: 0.1850, Val Acc: 0.9377
Epoch duration: 3.57 min
Best model saved with Val Acc: 0.9377

Epoch 4/10
Train Loss: 0.1484, Train Acc: 0.9461
Val Loss: 0.1780, Val Acc: 0.9378
Epoch duration: 3.58 min
Best model saved with Val Acc: 0.9378

Epoch 5/10
Train Loss: 0.1292, Train Acc: 0.9518
Val Loss: 0.1659, Val Acc: 0.9455
Epoch duration: 3.57 min
Best model saved with Val Acc: 0.9455

Epoch 6/10
Train Loss: 0.1147, Train Acc: 0.9562
Val Loss: 0.1509, Val Acc: 0.9501
Epoch duration: 3.58 min
Best model saved with Val Acc: 0.9501

Epoch 7/10
Train Loss: 0.1026, Train Acc: 0.9607
Val Loss: 0.1706, Val Acc: 0.9462
Epoch duration: 3.58 min

Epoch 8/

In [9]:
test_emnist = datasets.EMNIST(root='./data', split='letters', train=False, download=True, transform=transform)

test_size = int(0.3 * len(test_emnist))
unused_size = len(test_emnist) - test_size
test_dataset, _ = torch.utils.data.random_split(test_emnist, [test_size, unused_size])
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)
criterion = nn.CrossEntropyLoss()

# Evaluation function
def evaluate(model, weight_path, device):
    model.load_state_dict(torch.load(weight_path))
    model.to(device)
    model.eval()

    # Adjust for EMNIST Letters (26 classes)
    accuracy_metric = Accuracy(task="multiclass", num_classes=26).to(device)

    performances = {
        "test": {
            "loader": test_loader,
        }
    }

    for dataset_name in performances:
        all_labels = []
        all_preds = []
        all_probabilities = []
        total_loss = 0.0

        with torch.no_grad():
            for inputs, labels in performances[dataset_name]["loader"]:
                labels = labels - 1
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                total_loss += loss.item()
                probabilities = torch.softmax(outputs, dim=1)
                _, predicted = torch.max(outputs.data, 1)

                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(predicted.cpu().numpy())
                all_probabilities.extend(probabilities.cpu().numpy())

        avg_loss = total_loss / len(performances[dataset_name]["loader"])
        acc = accuracy_metric(torch.tensor(all_preds), torch.tensor(all_labels)) * 100

        performances[dataset_name].update({
            "loss": avg_loss,
            "accuracy": acc.item(),
            "all_labels": all_labels,
            "all_preds": all_preds,
            "all_probabilities": all_probabilities
        })

    return performances

In [10]:
# Evaluate the model
performance = evaluate(Resnet18(), "resnet_emnist_letters.pth", device = device)
print("Test accuracy:", performance["test"]["accuracy"])

  model.load_state_dict(torch.load(weight_path))


Test accuracy: 95.12820434570312


In [11]:
model = Resnet18(num_of_classes=26)  # Set to 26 for EMNIST letters
model.load_state_dict(torch.load("resnet_emnist_letters.pth", map_location=torch.device("cpu")))
torch.save(model.state_dict(), "resnet_emnist_letters_cpu.pth")

  model.load_state_dict(torch.load("resnet_emnist_letters.pth", map_location=torch.device("cpu")))
