In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import numpy as np


In [9]:
class CyrillicHandwritingDataset(Dataset):
    def __init__(self, num_samples=1000, img_size=(32, 32), num_classes=33):
        self.num_samples = num_samples
        self.img_size = img_size
        self.num_classes = num_classes

        # Create synthetic data
        self.data = torch.randn(num_samples, 1, *img_size)
        self.labels = torch.randint(0, num_classes, (num_samples,))

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]
        return sample, label

# Create dataset and dataloaders
train_dataset = CyrillicHandwritingDataset()
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [10]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_classes, num_heads=8, num_layers=6, hidden_dim=512):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim, hidden_dim)
        self.transformer = nn.Transformer(d_model=hidden_dim, nhead=num_heads, num_encoder_layers=num_layers)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the images
        x = self.embedding(x).unsqueeze(1)  # Add sequence dimension
        transformer_out = self.transformer(x, x)
        out = self.fc(transformer_out.squeeze(1))
        return out

# Define model, loss function, and optimizer
input_dim = 32 * 32  # Image size flattened
num_classes = 33  # Number of Cyrillic characters

model = TransformerModel(input_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [11]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')


Epoch [1/5], Loss: 3.8276
Epoch [2/5], Loss: 3.6416
Epoch [3/5], Loss: 3.6261
Epoch [4/5], Loss: 3.5961
Epoch [5/5], Loss: 3.6079


In [12]:
# Create test dataset and dataloader (using synthetic data for example)
test_dataset = CyrillicHandwritingDataset()
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluate the model
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total:.2f}%')


Accuracy: 3.20%
