In [1]:
!pip install idx2numpy
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split

from google.colab import drive
drive.mount('/content/drive')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Collecting idx2numpy
  Downloading idx2numpy-1.2.3.tar.gz (6.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: idx2numpy
  Building wheel for idx2numpy (setup.py) ... [?25l[?25hdone
  Created wheel for idx2numpy: filename=idx2numpy-1.2.3-py3-none-any.whl size=7903 sha256=e3728ad850fdbc0a643a5b38167d900cf54f4ca01654efb06da281437687bd3c
  Stored in directory: /root/.cache/pip/wheels/f7/48/00/ae031c97d62f39e1c3c4daa00426c09a65eb29ae5753a189ee
Successfully built idx2numpy
Installing collected packages: idx2numpy
Successfully installed idx2numpy-1.2.3
Mounted at /content/drive
Device: cuda


In [8]:
def accuracy_from_logits(logits, targets):
    preds = logits.argmax(dim=1)
    correct = (preds == targets).sum().item()
    total = targets.size(0)
    return correct / total

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    running_acc  = 0.0
    total_batches = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_acc  += accuracy_from_logits(outputs, labels)
        total_batches += 1

    return running_loss / total_batches, running_acc / total_batches

def eval_model(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    running_acc  = 0.0
    total_batches = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            running_acc  += accuracy_from_logits(outputs, labels)
            total_batches += 1

    return running_loss / total_batches, running_acc / total_batches

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool  = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.fc1   = nn.Linear(128 * 7 * 7, 256)
        self.fc2   = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)


In [9]:
# ---------- MNIST ----------
import idx2numpy

MNIST_DIR = "/content/drive/MyDrive/AIProject/numbers"
train_images_path = f"{MNIST_DIR}/train-images.idx3-ubyte"
train_labels_path = f"{MNIST_DIR}/train-labels.idx1-ubyte"
test_images_path  = f"{MNIST_DIR}/t10k-images.idx3-ubyte"
test_labels_path  = f"{MNIST_DIR}/t10k-labels.idx1-ubyte"

X_mnist_train = idx2numpy.convert_from_file(train_images_path)
y_mnist_train = idx2numpy.convert_from_file(train_labels_path)
X_mnist_test  = idx2numpy.convert_from_file(test_images_path)
y_mnist_test  = idx2numpy.convert_from_file(test_labels_path)

X_mnist_train = X_mnist_train.astype(np.float32) / 255.0
X_mnist_test  = X_mnist_test.astype(np.float32)  / 255.0

In [10]:
# add channel dim: (N,28,28) -> (N,1,28,28)
if X_mnist_train.ndim == 3:
    X_mnist_train = X_mnist_train[:, None, :, :]
if X_mnist_test.ndim == 3:
    X_mnist_test  = X_mnist_test[:, None, :, :]

print("MNIST shapes:", X_mnist_train.shape, X_mnist_test.shape)

MNIST shapes: (60000, 1, 28, 28) (10000, 1, 28, 28)


In [11]:
# ---------- A_Z letters ----------
AZ_DIR = "/content/drive/MyDrive/AIProject/letters kaggle"
AZ_CSV = f"{AZ_DIR}/A_Z Handwritten Data.csv"
az_df = pd.read_csv(AZ_CSV, header=None)

y_az = az_df.iloc[:, 0].values.astype(np.int64)       # 0–25 for A–Z
X_az = az_df.iloc[:, 1:].values.astype(np.float32)    # pixels

X_az /= 255.0
X_az = X_az.reshape(-1, 1, 28, 28)

print("A_Z shapes:", X_az.shape, y_az.shape)


A_Z shapes: (372451, 1, 28, 28) (372451,)


In [12]:
# ---------- DIGIT MODEL (0–9) ----------

# use all MNIST digits
X_digits = np.concatenate([X_mnist_train, X_mnist_test], axis=0)
y_digits = np.concatenate([y_mnist_train, y_mnist_test], axis=0)  # already 0–9

X_digits_tensor = torch.from_numpy(X_digits)
y_digits_tensor = torch.from_numpy(y_digits)

digit_dataset = TensorDataset(X_digits_tensor, y_digits_tensor)

total_len = len(digit_dataset)
train_len = int(0.8 * total_len)
val_len   = int(0.1 * total_len)
test_len  = total_len - train_len - val_len

digit_train_ds, digit_val_ds, digit_test_ds = random_split(
    digit_dataset, [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)
)

BATCH_SIZE = 128
digit_train_dl = DataLoader(digit_train_ds, batch_size=BATCH_SIZE, shuffle=True)
digit_val_dl   = DataLoader(digit_val_ds,   batch_size=BATCH_SIZE, shuffle=False)
digit_test_dl  = DataLoader(digit_test_ds,  batch_size=BATCH_SIZE, shuffle=False)

digit_model = SimpleCNN(num_classes=10).to(device)
digit_criterion = nn.CrossEntropyLoss()
digit_optimizer = torch.optim.Adam(digit_model.parameters(), lr=1e-3)

EPOCHS = 15
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(digit_model, digit_train_dl, digit_optimizer, digit_criterion, device)
    val_loss, val_acc     = eval_model(digit_model, digit_val_dl, digit_criterion, device)
    print(
        f"[DIGITS] Epoch {epoch:02d}: "
        f"train_loss={train_loss:.4f}, train_acc={train_acc*100:.2f}% | "
        f"val_loss={val_loss:.4f}, val_acc={val_acc*100:.2f}%"
    )

test_loss, test_acc = eval_model(digit_model, digit_test_dl, digit_criterion, device)
print(f"[DIGITS] Test loss={test_loss:.4f}, test acc={test_acc*100:.2f}%")

DIGIT_MODEL_PATH = "/content/drive/MyDrive/AIProject/digit_cnn_10cls.pth"
torch.save(digit_model.state_dict(), DIGIT_MODEL_PATH)
print("Saved digit model to:", DIGIT_MODEL_PATH)


[DIGITS] Epoch 01: train_loss=0.1932, train_acc=94.09% | val_loss=0.0791, val_acc=97.61%
[DIGITS] Epoch 02: train_loss=0.0464, train_acc=98.56% | val_loss=0.0443, val_acc=98.60%
[DIGITS] Epoch 03: train_loss=0.0306, train_acc=99.05% | val_loss=0.0471, val_acc=98.52%
[DIGITS] Epoch 04: train_loss=0.0237, train_acc=99.25% | val_loss=0.0371, val_acc=98.86%
[DIGITS] Epoch 05: train_loss=0.0188, train_acc=99.39% | val_loss=0.0309, val_acc=99.06%
[DIGITS] Epoch 06: train_loss=0.0150, train_acc=99.54% | val_loss=0.0356, val_acc=99.02%
[DIGITS] Epoch 07: train_loss=0.0128, train_acc=99.59% | val_loss=0.0371, val_acc=98.99%
[DIGITS] Epoch 08: train_loss=0.0104, train_acc=99.66% | val_loss=0.0310, val_acc=99.09%
[DIGITS] Epoch 09: train_loss=0.0093, train_acc=99.70% | val_loss=0.0364, val_acc=99.07%
[DIGITS] Epoch 10: train_loss=0.0070, train_acc=99.76% | val_loss=0.0376, val_acc=98.96%
[DIGITS] Epoch 11: train_loss=0.0064, train_acc=99.79% | val_loss=0.0373, val_acc=99.13%
[DIGITS] Epoch 12: tr

In [13]:
# ---------- LETTER MODEL (A–Z) ----------

# labels y_az are 0–25; use them directly
X_letters_tensor = torch.from_numpy(X_az)
y_letters_tensor = torch.from_numpy(y_az)

letter_dataset = TensorDataset(X_letters_tensor, y_letters_tensor)

total_len = len(letter_dataset)
train_len = int(0.8 * total_len)
val_len   = int(0.1 * total_len)
test_len  = total_len - train_len - val_len

letter_train_ds, letter_val_ds, letter_test_ds = random_split(
    letter_dataset, [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)
)

letter_train_dl = DataLoader(letter_train_ds, batch_size=BATCH_SIZE, shuffle=True)
letter_val_dl   = DataLoader(letter_val_ds,   batch_size=BATCH_SIZE, shuffle=False)
letter_test_dl  = DataLoader(letter_test_ds,  batch_size=BATCH_SIZE, shuffle=False)

letter_model = SimpleCNN(num_classes=26).to(device)
letter_criterion = nn.CrossEntropyLoss()
letter_optimizer = torch.optim.Adam(letter_model.parameters(), lr=1e-3)

EPOCHS = 15
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train_one_epoch(letter_model, letter_train_dl, letter_optimizer, letter_criterion, device)
    val_loss, val_acc     = eval_model(letter_model, letter_val_dl, letter_criterion, device)
    print(
        f"[LETTERS] Epoch {epoch:02d}: "
        f"train_loss={train_loss:.4f}, train_acc={train_acc*100:.2f}% | "
        f"val_loss={val_loss:.4f}, val_acc={val_acc*100:.2f}%"
    )

test_loss, test_acc = eval_model(letter_model, letter_test_dl, letter_criterion, device)
print(f"[LETTERS] Test loss={test_loss:.4f}, test acc={test_acc*100:.2f}%")

LETTER_MODEL_PATH = "/content/drive/MyDrive/AIProject/letter_cnn_26cls.pth"
torch.save(letter_model.state_dict(), LETTER_MODEL_PATH)
print("Saved letter model to:", LETTER_MODEL_PATH)


[LETTERS] Epoch 01: train_loss=0.1367, train_acc=96.12% | val_loss=0.0518, val_acc=98.54%
[LETTERS] Epoch 02: train_loss=0.0461, train_acc=98.68% | val_loss=0.0399, val_acc=98.88%
[LETTERS] Epoch 03: train_loss=0.0321, train_acc=99.03% | val_loss=0.0369, val_acc=98.97%
[LETTERS] Epoch 04: train_loss=0.0219, train_acc=99.30% | val_loss=0.0304, val_acc=99.23%
[LETTERS] Epoch 05: train_loss=0.0165, train_acc=99.46% | val_loss=0.0332, val_acc=99.10%
[LETTERS] Epoch 06: train_loss=0.0133, train_acc=99.56% | val_loss=0.0282, val_acc=99.29%
[LETTERS] Epoch 07: train_loss=0.0102, train_acc=99.67% | val_loss=0.0272, val_acc=99.33%
[LETTERS] Epoch 08: train_loss=0.0087, train_acc=99.72% | val_loss=0.0308, val_acc=99.35%
[LETTERS] Epoch 09: train_loss=0.0076, train_acc=99.75% | val_loss=0.0291, val_acc=99.37%
[LETTERS] Epoch 10: train_loss=0.0071, train_acc=99.77% | val_loss=0.0272, val_acc=99.48%
[LETTERS] Epoch 11: train_loss=0.0060, train_acc=99.80% | val_loss=0.0318, val_acc=99.41%
[LETTERS] 