In [7]:
import numpy as np

In [8]:
# Load training set (60K images)
X_tr = np.load("kmnist_exp/kmnist-train-imgs.npz")["arr_0"]
y_tr = np.load("kmnist_exp/kmnist-train-labels.npz")["arr_0"]

# Load test set (10K images)
X_te = np.load("kmnist_exp/kmnist-test-imgs.npz")["arr_0"]
y_te = np.load("kmnist_exp/kmnist-test-labels.npz")["arr_0"]

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Preprocess: flatten 28x28 -> 784, normalize to [0,1], convert to tensors
X_train = torch.tensor(X_tr.reshape(-1, 784) / 255.0, dtype=torch.float32)
y_train = torch.tensor(y_tr, dtype=torch.long)
X_test = torch.tensor(X_te.reshape(-1, 784) / 255.0, dtype=torch.float32)
y_test = torch.tensor(y_te, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=256, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=256)

In [10]:
model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 10),
)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [11]:
for epoch in range(10):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        loss = loss_fn(model(X_batch), y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * len(X_batch)
    print(f"Epoch {epoch+1:2d} | Loss: {total_loss / len(X_train):.4f}")

Epoch  1 | Loss: 0.7179
Epoch  2 | Loss: 0.3566
Epoch  3 | Loss: 0.2695
Epoch  4 | Loss: 0.2164
Epoch  5 | Loss: 0.1771
Epoch  6 | Loss: 0.1479
Epoch  7 | Loss: 0.1258
Epoch  8 | Loss: 0.1078
Epoch  9 | Loss: 0.0914
Epoch 10 | Loss: 0.0770


In [12]:
model.eval()
correct = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        correct += (model(X_batch).argmax(1) == y_batch).sum().item()
print(f"Test accuracy: {correct / len(X_test):.4f}")

Test accuracy: 0.8903


## CNN Approach

In [None]:
import matplotlib.pyplot as plt

# Reshape data for CNN: (N, 1, 28, 28)
X_train_cnn = torch.tensor(X_tr.reshape(-1, 1, 28, 28) / 255.0, dtype=torch.float32)
X_test_cnn = torch.tensor(X_te.reshape(-1, 1, 28, 28) / 255.0, dtype=torch.float32)

train_loader_cnn = DataLoader(TensorDataset(X_train_cnn, y_train), batch_size=256, shuffle=True)
test_loader_cnn = DataLoader(TensorDataset(X_test_cnn, y_test), batch_size=256)