In [2]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import time

In [3]:
# Define the dataset class
class NumberDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.data.iloc[idx, 0])
        image = Image.open(img_name).convert('L')  # Convert to grayscale
        label = self.data.iloc[idx, 1]
        
        if self.transform:
            image = self.transform(image)
        
        # Convert label to tensor of individual digits
        label = torch.tensor([int(d) for d in str(label).zfill(6)], dtype=torch.long)
        
        return image, label

In [4]:
# Define the CNN model
class NumberCNN(nn.Module):
    def __init__(self):
        super(NumberCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 26 * 8, 128)
        self.fc2 = nn.Linear(128, 10 * 6)  # 10 classes for each of the 6 digits

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 26 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x.view(-1, 6, 10)  # Reshape to (batch_size, 6, 10)

In [5]:
# Set up data transformations
transform = transforms.Compose([
    transforms.Resize((104, 32)),
    transforms.ToTensor(),
])

# Create dataset and dataloader
dataset = NumberDataset(csv_file=os.path.join('generate_data', 'images','data.csv'), img_dir=os.path.join('generate_data', 'images'), transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the model, loss function, and optimizer
model = NumberCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [6]:

# Training loop
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = sum(criterion(outputs[:, i, :], labels[:, i]) for i in range(6))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader):.4f}")

print("Training completed!")

# Save the trained model
torch.save(model.state_dict(), f"model{time.time()}.pth")
print("Model saved as 'number_recognition_model.pth'")

Epoch 1/10, Loss: 13.8432
Epoch 2/10, Loss: 13.8110
Epoch 3/10, Loss: 13.8073
Epoch 4/10, Loss: 13.8058
Epoch 5/10, Loss: 13.8035
Epoch 6/10, Loss: 13.8031
Epoch 7/10, Loss: 13.8020
Epoch 8/10, Loss: 13.8019
Epoch 9/10, Loss: 13.8010
Epoch 10/10, Loss: 13.8006
Training completed!
Model saved as 'number_recognition_model.pth'
