In [1]:
!pip install idx2numpy
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split

from google.colab import drive
drive.mount('/content/drive')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Collecting idx2numpy
  Downloading idx2numpy-1.2.3.tar.gz (6.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: idx2numpy
  Building wheel for idx2numpy (setup.py) ... [?25l[?25hdone
  Created wheel for idx2numpy: filename=idx2numpy-1.2.3-py3-none-any.whl size=7903 sha256=30a1626ed4b84e876d84773661579dcba0df584aa4115a7d13af474708f8fbcc
  Stored in directory: /root/.cache/pip/wheels/f7/48/00/ae031c97d62f39e1c3c4daa00426c09a65eb29ae5753a189ee
Successfully built idx2numpy
Installing collected packages: idx2numpy
Successfully installed idx2numpy-1.2.3
Mounted at /content/drive
Device: cuda


In [3]:
def accuracy_from_logits(logits, targets):
    preds = logits.argmax(dim=1)
    correct = (preds == targets).sum().item()
    total = targets.size(0)
    return correct / total

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    running_acc = 0.0
    total_batches = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_acc += accuracy_from_logits(outputs, labels)
        total_batches += 1

    return running_loss / total_batches, running_acc / total_batches

def eval_model(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    running_acc = 0.0
    total_batches = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            running_acc += accuracy_from_logits(outputs, labels)
            total_batches += 1

    return running_loss / total_batches, running_acc / total_batches

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.fc1 = nn.Linear(128 * 7 * 7, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)


In [4]:
import idx2numpy

MNIST_DIR = "/content/drive/MyDrive/AIProject/numbers"
train_images_path = f"{MNIST_DIR}/train-images.idx3-ubyte"
train_labels_path = f"{MNIST_DIR}/train-labels.idx1-ubyte"
test_images_path = f"{MNIST_DIR}/t10k-images.idx3-ubyte"
test_labels_path = f"{MNIST_DIR}/t10k-labels.idx1-ubyte"

X_mnist_train = idx2numpy.convert_from_file(train_images_path)
y_mnist_train = idx2numpy.convert_from_file(train_labels_path)
X_mnist_test = idx2numpy.convert_from_file(test_images_path)
y_mnist_test = idx2numpy.convert_from_file(test_labels_path)

X_mnist_train = X_mnist_train.astype(np.float32) / 255.0
X_mnist_test = X_mnist_test.astype(np.float32)  / 255.0

In [5]:
# add channel dim: (N,28,28) -> (N,1,28,28)
if X_mnist_train.ndim == 3:
    X_mnist_train = X_mnist_train[:, None, :, :]
if X_mnist_test.ndim == 3:
    X_mnist_test = X_mnist_test[:, None, :, :]

print("MNIST shapes:", X_mnist_train.shape, X_mnist_test.shape)

MNIST shapes: (60000, 1, 28, 28) (10000, 1, 28, 28)


In [6]:
AZ_DIR = "/content/drive/MyDrive/AIProject/letters kaggle"
AZ_CSV = f"{AZ_DIR}/A_Z Handwritten Data.csv"
az_df = pd.read_csv(AZ_CSV, header=None)

y_az = az_df.iloc[:, 0].values.astype(np.int64)       # 0–25 for A–Z
X_az = az_df.iloc[:, 1:].values.astype(np.float32)    # pixels

X_az /= 255.0
X_az = X_az.reshape(-1, 1, 28, 28)

print("A_Z shapes:", X_az.shape, y_az.shape)


A_Z shapes: (372451, 1, 28, 28) (372451,)


In [7]:
X_digits = np.concatenate([X_mnist_train, X_mnist_test], axis=0)
y_digits = np.concatenate([y_mnist_train, y_mnist_test], axis=0)  # already 0–9

X_digits_tensor = torch.from_numpy(X_digits)
y_digits_tensor = torch.from_numpy(y_digits)

digit_dataset = TensorDataset(X_digits_tensor, y_digits_tensor)

total_len = len(digit_dataset)
train_len = int(0.8 * total_len)
val_len = int(0.1 * total_len)
test_len = total_len - train_len - val_len

digit_train_ds, digit_val_ds, digit_test_ds = random_split(
    digit_dataset, [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)
)

BATCH_SIZE = 128
digit_train_dl = DataLoader(digit_train_ds, batch_size=BATCH_SIZE, shuffle=True)
digit_val_dl = DataLoader(digit_val_ds, batch_size=BATCH_SIZE, shuffle=False)
digit_test_dl = DataLoader(digit_test_ds, batch_size=BATCH_SIZE, shuffle=False)

digit_model = SimpleCNN(num_classes=10).to(device)
digit_criterion = nn.CrossEntropyLoss()
digit_optimizer = torch.optim.Adam(digit_model.parameters(), lr=1e-3)

EPOCHS = 15
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(digit_model, digit_train_dl, digit_optimizer, digit_criterion, device)
    val_loss, val_acc = eval_model(digit_model, digit_val_dl, digit_criterion, device)
    print(
        f"DIGITS CNN -> Epoch {epoch}: "
        f"train loss = {train_loss:.4f}, train accuracy = {train_acc*100:.2f}% | "
        f"validation loss = {val_loss:.4f}, validation accuracy = {val_acc*100:.2f}%"
    )

test_loss, test_acc = eval_model(digit_model, digit_test_dl, digit_criterion, device)
print(f"DIGITS CNN -> Test loss = {test_loss:.4f}, test acc={test_acc*100:.2f}%")

DIGIT_MODEL_PATH = "/content/drive/MyDrive/AIProject/digit_cnn_10cls.pth"
torch.save(digit_model.state_dict(), DIGIT_MODEL_PATH)
print("Saved digit model to:", DIGIT_MODEL_PATH)


DIGITS CNN -> Epoch 1: train loss = 0.1819, train accuracy = 94.28% | validation loss = 0.0654, validation accuracy = 98.03%
DIGITS CNN -> Epoch 2: train loss = 0.0433, train accuracy = 98.69% | validation loss = 0.0441, validation accuracy = 98.67%
DIGITS CNN -> Epoch 3: train loss = 0.0322, train accuracy = 99.02% | validation loss = 0.0359, validation accuracy = 98.87%
DIGITS CNN -> Epoch 4: train loss = 0.0240, train accuracy = 99.21% | validation loss = 0.0381, validation accuracy = 98.86%
DIGITS CNN -> Epoch 5: train loss = 0.0195, train accuracy = 99.38% | validation loss = 0.0366, validation accuracy = 99.00%
DIGITS CNN -> Epoch 6: train loss = 0.0143, train accuracy = 99.55% | validation loss = 0.0326, validation accuracy = 99.02%
DIGITS CNN -> Epoch 7: train loss = 0.0122, train accuracy = 99.60% | validation loss = 0.0347, validation accuracy = 98.99%
DIGITS CNN -> Epoch 8: train loss = 0.0108, train accuracy = 99.65% | validation loss = 0.0288, validation accuracy = 99.23%


In [8]:
X_letters_tensor = torch.from_numpy(X_az)
y_letters_tensor = torch.from_numpy(y_az)

letter_dataset = TensorDataset(X_letters_tensor, y_letters_tensor)

total_len = len(letter_dataset)
train_len = int(0.8 * total_len)
val_len = int(0.1 * total_len)
test_len = total_len - train_len - val_len

letter_train_ds, letter_val_ds, letter_test_ds = random_split(
    letter_dataset, [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)
)

letter_train_dl = DataLoader(letter_train_ds, batch_size=BATCH_SIZE, shuffle=True)
letter_val_dl = DataLoader(letter_val_ds, batch_size=BATCH_SIZE, shuffle=False)
letter_test_dl = DataLoader(letter_test_ds, batch_size=BATCH_SIZE, shuffle=False)

letter_model = SimpleCNN(num_classes=26).to(device)
letter_criterion = nn.CrossEntropyLoss()
letter_optimizer = torch.optim.Adam(letter_model.parameters(), lr=1e-3)

EPOCHS = 15
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(letter_model, letter_train_dl, letter_optimizer, letter_criterion, device)
    val_loss, val_acc = eval_model(letter_model, letter_val_dl, letter_criterion, device)
    print(
        f"LETTERS CNN -> Epoch {epoch}: "
        f"train loss = {train_loss:.4f}, train accuracy = {train_acc*100:.2f}% | "
        f"validation loss = {val_loss:.4f}, validation accuracy={val_acc*100:.2f}%"
    )

test_loss, test_acc = eval_model(letter_model, letter_test_dl, letter_criterion, device)
print(f"LETTERS CNN -> Test loss = {test_loss:.4f}, test accuracy = {test_acc*100:.2f}%")

LETTER_MODEL_PATH = "/content/drive/MyDrive/AIProject/letter_cnn_26cls.pth"
torch.save(letter_model.state_dict(), LETTER_MODEL_PATH)
print("Saved letter model to:", LETTER_MODEL_PATH)


LETTERS CNN -> Epoch 1: train loss = 0.1423, train accuracy = 95.97% | validation loss = 0.0539, validation accuracy=98.55%
LETTERS CNN -> Epoch 2: train loss = 0.0477, train accuracy = 98.62% | validation loss = 0.0413, validation accuracy=98.83%
LETTERS CNN -> Epoch 3: train loss = 0.0327, train accuracy = 99.02% | validation loss = 0.0335, validation accuracy=99.10%
LETTERS CNN -> Epoch 4: train loss = 0.0231, train accuracy = 99.27% | validation loss = 0.0363, validation accuracy=99.05%
LETTERS CNN -> Epoch 5: train loss = 0.0169, train accuracy = 99.45% | validation loss = 0.0284, validation accuracy=99.26%
LETTERS CNN -> Epoch 6: train loss = 0.0137, train accuracy = 99.55% | validation loss = 0.0252, validation accuracy=99.42%
LETTERS CNN -> Epoch 7: train loss = 0.0109, train accuracy = 99.65% | validation loss = 0.0264, validation accuracy=99.41%
LETTERS CNN -> Epoch 8: train loss = 0.0094, train accuracy = 99.70% | validation loss = 0.0306, validation accuracy=99.32%
LETTERS 