In [51]:
import torch
from data.names_dataset import NamesDataset
from torch.utils.data import DataLoader

names_dataset = NamesDataset(data_folder="../datasets/names")


train_dataset, test_dataset = torch.utils.data.random_split(names_dataset, [0.85, 0.15])
print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Train dataset size: 17063
Test dataset size: 3011


In [52]:
BATCH_SIZE = 4


def collate_fn(
    batch: list[tuple[torch.Tensor, torch.Tensor]],
) -> list[tuple[torch.Tensor, torch.Tensor]]:
    return batch


train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=collate_fn,
)

test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collate_fn,
)

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class NamesClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NamesClassifier, self).__init__()
        self.rnn = nn.RNN(
            input_size=input_size, hidden_size=hidden_size, batch_first=True
        )
        self.h2o = nn.Linear(hidden_size, output_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x is of shape (batch_size, seq_length, input_size)
        output, hidden = self.rnn(x)
        output = self.h2o(hidden[0])
        # output is of shape (batch_size, output_size)
        output = F.log_softmax(output, dim=1)
        return output


def train(
    model: nn.Module,
    dataloader: DataLoader,
    optimizer: torch.optim.Optimizer,
    criterion: nn.Module,
):
    model.train()
    total_loss = 0.0
    for batch in dataloader:
        for input, label in batch:
            optimizer.zero_grad()
            output = model(input)
            loss = criterion(output, label.argmax(dim=1))

            loss.backward()
            optimizer.step()
            total_loss += loss.item()

    return total_loss / len(dataloader)


def evaluate(model: nn.Module, dataloader: DataLoader, criterion: nn.Module):
    model.eval()
    total_loss = 0.0
    correct = 0
    with torch.no_grad():
        for batch in dataloader:
            for input, label in batch:
                output = model(input)
                loss = criterion(output, label.argmax(dim=1))
                predictions = output.argmax(dim=1)
                correct += (predictions == label.argmax(dim=1)).sum().item()
                total_loss += loss.item()
    accuracy = correct / len(dataloader)
    return total_loss / len(dataloader), accuracy

In [55]:
rnn = NamesClassifier(
    input_size=len(names_dataset.index_to_token),
    hidden_size=128,
    output_size=len(names_dataset.countries),
)
print(rnn)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train(rnn, train_dataloader, optimizer, criterion)
    test_loss, accuracy = evaluate(rnn, test_dataloader, criterion)
    print(
        f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.4f}"
    )

NamesClassifier(
  (rnn): RNN(87, 128, batch_first=True)
  (h2o): Linear(in_features=128, out_features=18, bias=True)
)
Epoch 1/10, Train Loss: 4.6124, Test Loss: 3.9332, Accuracy: 2.7955
Epoch 2/10, Train Loss: 3.7825, Test Loss: 3.4503, Accuracy: 2.9296
Epoch 3/10, Train Loss: 3.4886, Test Loss: 3.1878, Accuracy: 3.0531
Epoch 4/10, Train Loss: 3.2751, Test Loss: 3.1873, Accuracy: 3.0345
Epoch 5/10, Train Loss: 3.1912, Test Loss: 3.3725, Accuracy: 2.9814
Epoch 6/10, Train Loss: 3.1489, Test Loss: 3.2088, Accuracy: 3.0611
Epoch 7/10, Train Loss: 3.1268, Test Loss: 3.4363, Accuracy: 2.9243
Epoch 8/10, Train Loss: 3.1145, Test Loss: 3.3121, Accuracy: 3.0000
Epoch 9/10, Train Loss: 3.1357, Test Loss: 3.3163, Accuracy: 3.0133
Epoch 10/10, Train Loss: 3.1634, Test Loss: 3.2900, Accuracy: 2.9947
