In [18]:
!pip install idx2numpy
import os
import numpy as np
import pandas as pd
import idx2numpy

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split

from google.colab import drive
drive.mount('/content/drive')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Device: cuda


In [25]:
BASE_DIR = "/content/drive/MyDrive/AIProject"

DIGIT_MODEL_PATH_CNN = f"{BASE_DIR}/digit_cnn_10cls.pth"
LETTER_MODEL_PATH_CNN = f"{BASE_DIR}/letter_cnn_26cls.pth"

DIGIT_MODEL_PATH_LSTM = f"{BASE_DIR}/digit_lstm_10cls.pth"
LETTER_MODEL_PATH_LSTM = f"{BASE_DIR}/letter_lstm_26cls.pth"


In [26]:
def accuracy_from_logits(logits, targets):
    preds = logits.argmax(dim=1)
    correct = (preds == targets).sum().item()
    total = targets.size(0)
    return correct / total


def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    running_acc = 0.0
    total_batches = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_acc += accuracy_from_logits(outputs, labels)
        total_batches += 1

    return running_loss / total_batches, running_acc / total_batches


def eval_model(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    running_acc = 0.0
    total_batches = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            running_acc += accuracy_from_logits(outputs, labels)
            total_batches += 1

    return running_loss / total_batches, running_acc / total_batches


In [27]:
class SimpleLSTM(nn.Module):
    def __init__(self, num_classes, input_size=28, hidden_size=128, num_layers=2, bidirectional=False):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidirectional
        )
        self.fc = nn.Linear(hidden_size * self.num_directions, num_classes)

    def forward(self, x):
        # (B, 1, 28, 28) -> (B, 28, 28)
        x = x.squeeze(1)                 # remove channel dim -> (B, 28, 28)

        out, (h_n, c_n) = self.lstm(x)   # out: (B, seq_len, H*num_directions)

        last_out = out[:, -1, :]         # (B, H*num_directions)
        logits = self.fc(last_out)       # (B, num_classes)
        return logits


In [28]:
MNIST_DIR = "/content/drive/MyDrive/AIProject/numbers"
train_images_path = f"{MNIST_DIR}/train-images.idx3-ubyte"
train_labels_path = f"{MNIST_DIR}/train-labels.idx1-ubyte"
test_images_path = f"{MNIST_DIR}/t10k-images.idx3-ubyte"
test_labels_path = f"{MNIST_DIR}/t10k-labels.idx1-ubyte"

X_mnist_train = idx2numpy.convert_from_file(train_images_path)
y_mnist_train = idx2numpy.convert_from_file(train_labels_path)
X_mnist_test  = idx2numpy.convert_from_file(test_images_path)
y_mnist_test  = idx2numpy.convert_from_file(test_labels_path)

# normalize to (0, 1)
X_mnist_train = X_mnist_train.astype(np.float32) / 255.0
X_mnist_test = X_mnist_test.astype(np.float32)  / 255.0

# add channel dimension: (N, 28, 28) -> (N, 1, 28, 28)
if X_mnist_train.ndim == 3:
    X_mnist_train = X_mnist_train[:, None, :, :]
if X_mnist_test.ndim == 3:
    X_mnist_test  = X_mnist_test[:, None, :, :]

print("MNIST shapes:", X_mnist_train.shape, X_mnist_test.shape)

MNIST shapes: (60000, 1, 28, 28) (10000, 1, 28, 28)


In [29]:
X_digits = np.concatenate([X_mnist_train, X_mnist_test], axis=0)
y_digits = np.concatenate([y_mnist_train, y_mnist_test], axis=0)  # 0–9

X_digits_tensor = torch.from_numpy(X_digits)
y_digits_tensor = torch.from_numpy(y_digits)

digit_dataset = TensorDataset(X_digits_tensor, y_digits_tensor)

total_len = len(digit_dataset)
train_len = int(0.8 * total_len)
val_len = int(0.1 * total_len)
test_len = total_len - train_len - val_len

digit_train_ds, digit_val_ds, digit_test_ds = random_split(
    digit_dataset, [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)
)

BATCH_SIZE = 128
digit_train_dl = DataLoader(digit_train_ds, batch_size=BATCH_SIZE, shuffle=True)
digit_val_dl = DataLoader(digit_val_ds, batch_size=BATCH_SIZE, shuffle=False)
digit_test_dl = DataLoader(digit_test_ds, batch_size=BATCH_SIZE, shuffle=False)


In [31]:
digit_model = SimpleLSTM(num_classes=10, hidden_size=128, num_layers=2, bidirectional=True).to(device)
digit_criterion = nn.CrossEntropyLoss()
digit_optimizer = torch.optim.Adam(digit_model.parameters(), lr=1e-3)

EPOCHS = 15
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(digit_model, digit_train_dl, digit_optimizer, digit_criterion, device)
    val_loss, val_acc = eval_model(digit_model, digit_val_dl, digit_criterion, device)
    print(
        f"DIGITS LSTM -> Epoch {epoch}: "
        f"train loss = {train_loss:.4f}, train accuracy = {train_acc*100:.2f}% | "
        f"validation loss = {val_loss:.4f}, validation accuracy = {val_acc*100:.2f}%"
    )

test_loss, test_acc = eval_model(digit_model, digit_test_dl, digit_criterion, device)
print(f"DIGITS LSTM -> Test loss = {test_loss:.4f}, test accuracy = {test_acc*100:.2f}%")

DIGIT_LSTM_MODEL_PATH = "/content/drive/MyDrive/AIProject/digit_lstm_10cls.pth"
torch.save(digit_model.state_dict(), DIGIT_LSTM_MODEL_PATH)
print("Saved digit LSTM model to:", DIGIT_LSTM_MODEL_PATH)

DIGITS LSTM -> Epoch 1: train loss = 0.5730, train accuracy = 80.92% | validation loss = 0.1908, validation accuracy = 94.25%
DIGITS LSTM -> Epoch 2: train loss = 0.1457, train accuracy = 95.49% | validation loss = 0.1437, validation accuracy = 95.24%
DIGITS LSTM -> Epoch 3: train loss = 0.0984, train accuracy = 96.95% | validation loss = 0.0870, validation accuracy = 97.52%
DIGITS LSTM -> Epoch 4: train loss = 0.0682, train accuracy = 97.91% | validation loss = 0.0649, validation accuracy = 98.23%
DIGITS LSTM -> Epoch 5: train loss = 0.0554, train accuracy = 98.27% | validation loss = 0.0646, validation accuracy = 98.20%
DIGITS LSTM -> Epoch 6: train loss = 0.0491, train accuracy = 98.47% | validation loss = 0.0569, validation accuracy = 98.27%
DIGITS LSTM -> Epoch 7: train loss = 0.0412, train accuracy = 98.73% | validation loss = 0.0625, validation accuracy = 98.20%
DIGITS LSTM -> Epoch 8: train loss = 0.0358, train accuracy = 98.89% | validation loss = 0.0649, validation accuracy =

In [32]:
AZ_DIR = "/content/drive/MyDrive/AIProject/letters kaggle"
AZ_CSV = f"{AZ_DIR}/A_Z Handwritten Data.csv"

az_df = pd.read_csv(AZ_CSV, header=None)  # Kaggle A_Z CSV: first column is label, next 784 columns are pixels

# labels: 0–25 for A–Z
y_az = az_df.iloc[:, 0].values.astype(np.int64)

# pixels
X_az = az_df.iloc[:, 1:].values.astype(np.float32)

# normalize to (0, 1)
X_az /= 255.0

# reshape to (N, 1, 28, 28)
X_az = X_az.reshape(-1, 1, 28, 28)
print("A_Z shapes:", X_az.shape, y_az.shape)

A_Z shapes: (372451, 1, 28, 28) (372451,)


In [33]:
X_letters_tensor = torch.from_numpy(X_az)
y_letters_tensor = torch.from_numpy(y_az)

letter_dataset = TensorDataset(X_letters_tensor, y_letters_tensor)

total_len = len(letter_dataset)
train_len = int(0.8 * total_len)
val_len = int(0.1 * total_len)
test_len = total_len - train_len - val_len

letter_train_ds, letter_val_ds, letter_test_ds = random_split(
    letter_dataset, [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)
)

BATCH_SIZE = 128
letter_train_dl = DataLoader(letter_train_ds, batch_size=BATCH_SIZE, shuffle=True)
letter_val_dl = DataLoader(letter_val_ds, batch_size=BATCH_SIZE, shuffle=False)
letter_test_dl = DataLoader(letter_test_ds, batch_size=BATCH_SIZE, shuffle=False)


In [34]:
letter_model = SimpleLSTM(num_classes=26, hidden_size=128, num_layers=2, bidirectional=True).to(device)
letter_criterion = nn.CrossEntropyLoss()
letter_optimizer = torch.optim.Adam(letter_model.parameters(), lr=1e-3)

EPOCHS = 15
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(letter_model, letter_train_dl, letter_optimizer, letter_criterion, device)
    val_loss, val_acc = eval_model(letter_model, letter_val_dl, letter_criterion, device)
    print(
        f"LETTERS LSTM -> Epoch {epoch}: "
        f"train loss = {train_loss:.4f}, train accuracy = {train_acc*100:.2f}% | "
        f"validation loss = {val_loss:.4f}, validation accuracy = {val_acc*100:.2f}%"
    )

test_loss, test_acc = eval_model(letter_model, letter_test_dl, letter_criterion, device)
print(f"LETTERS LSTM -> Test loss = {test_loss:.4f}, test accuracy = {test_acc*100:.2f}%")

LETTER_LSTM_MODEL_PATH = "/content/drive/MyDrive/AIProject/letter_lstm_26cls.pth"
torch.save(letter_model.state_dict(), LETTER_LSTM_MODEL_PATH)
print("Saved letter LSTM model to:", LETTER_LSTM_MODEL_PATH)

LETTERS LSTM -> Epoch 1: train loss = 0.4090, train accuracy = 87.99% | validation loss = 0.1325, validation accuracy = 96.26%
LETTERS LSTM -> Epoch 2: train loss = 0.1001, train accuracy = 97.12% | validation loss = 0.0843, validation accuracy = 97.59%
LETTERS LSTM -> Epoch 3: train loss = 0.0746, train accuracy = 97.84% | validation loss = 0.0688, validation accuracy = 98.02%
LETTERS LSTM -> Epoch 4: train loss = 0.0584, train accuracy = 98.29% | validation loss = 0.0538, validation accuracy = 98.46%
LETTERS LSTM -> Epoch 5: train loss = 0.0482, train accuracy = 98.58% | validation loss = 0.0492, validation accuracy = 98.59%
LETTERS LSTM -> Epoch 6: train loss = 0.0417, train accuracy = 98.73% | validation loss = 0.0544, validation accuracy = 98.42%
LETTERS LSTM -> Epoch 7: train loss = 0.0353, train accuracy = 98.91% | validation loss = 0.0456, validation accuracy = 98.72%
LETTERS LSTM -> Epoch 8: train loss = 0.0306, train accuracy = 99.07% | validation loss = 0.0498, validation ac