In [6]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from natorch.utils.data import train_test_split, one_hot
from natorch.nn.modules.losses import CrossEntropyLoss
from natorch.optim import SGD

from natorch.nn.models.lenet5.model import LeNet5

### Import Dataset

In [7]:
data_path = '../../../../../data/MNIST/MNIST.csv'
data = pd.read_csv(data_path)
data.head(10)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Data Pre-processing

**Spliting and Normalization**

In [8]:
X = data.iloc[:, 1:].to_numpy()
y = data.iloc[:, 0].to_numpy()
X = X/255.0
X.shape, y.shape

((42000, 784), (42000,))

In [9]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
x_train.shape, y_train.shape

((33600, 784), (33600,))

In [10]:
x_train = x_train.reshape(33600, 1, 28, 28)
pad_width = (
    (0, 0),  
    (0, 0),   
    (2, 2), 
    (2, 2) 
)

x_train = np.pad(x_train, pad_width=pad_width, mode='constant', constant_values=0)
x_train.shape

(33600, 1, 32, 32)

In [11]:
y_train = one_hot(y_train, num_classes = 10)
y_train.shape

(33600, 10)

### Define model

In [12]:
model = LeNet5()

In [13]:
def fit(x_train: np.ndarray, y_train: np.ndarray, epochs: int = 10, lr: float = 1e-3, batch_size: int = 32) -> None:

        N = x_train.shape[0]
        loss_fn = CrossEntropyLoss()
        optimizer = SGD(model.net._parameter(), lr=lr)

        for epoch in range(1, epochs+1):
            perm = np.random.permutation(N)
            epoch_loss = 0.0

            for i in tqdm(range(0, N, batch_size), desc=f"Epoch {epoch}"):
                optimizer.zero_grad()
                idx = perm[i : i + batch_size]
                xb = x_train[idx]
                yb = y_train[idx]

                # Forward
                preds = model.forward(xb)
                loss = loss_fn.forward(preds, yb)
                epoch_loss += loss * xb.shape[0]

                # Backward
                grad_loss = loss_fn.backward(1.0)
                model.backward(grad_loss)

                # Update
                optimizer.step()

            epoch_loss /= N
            print(f"Epoch {epoch}/{epochs} - Loss: {epoch_loss:.4f}")

In [14]:
fit(x_train, y_train, epochs = 10, lr = 0.01, batch_size = 32)

Epoch 1:   0%|          | 0/1050 [00:00<?, ?it/s]

Epoch 1: 100%|██████████| 1050/1050 [00:39<00:00, 26.69it/s]


Epoch 1/10 - Loss: 2.3072


Epoch 2: 100%|██████████| 1050/1050 [00:31<00:00, 33.79it/s]


Epoch 2/10 - Loss: 2.3036


Epoch 3: 100%|██████████| 1050/1050 [00:31<00:00, 33.59it/s]


Epoch 3/10 - Loss: 2.3033


Epoch 4: 100%|██████████| 1050/1050 [00:31<00:00, 33.59it/s]


Epoch 4/10 - Loss: 2.3033


Epoch 5: 100%|██████████| 1050/1050 [00:31<00:00, 33.46it/s]


Epoch 5/10 - Loss: 2.3032


Epoch 6: 100%|██████████| 1050/1050 [00:31<00:00, 33.26it/s]


Epoch 6/10 - Loss: 2.3032


Epoch 7: 100%|██████████| 1050/1050 [00:31<00:00, 33.07it/s]


Epoch 7/10 - Loss: 2.3032


Epoch 8: 100%|██████████| 1050/1050 [00:31<00:00, 33.02it/s]


Epoch 8/10 - Loss: 2.3031


Epoch 9: 100%|██████████| 1050/1050 [00:31<00:00, 33.14it/s]


Epoch 9/10 - Loss: 2.3031


Epoch 10: 100%|██████████| 1050/1050 [00:31<00:00, 33.12it/s]

Epoch 10/10 - Loss: 2.3030





### Test

In [17]:
x_test_sample = x_test[0, :]
y_test_sample = y_test[0]

x_test_sample = x_test_sample.reshape(1, 1, 28, 28)
y_test_sample = one_hot(y_test_sample, num_classes = 10)

pad_width = (
    (0, 0),  
    (0, 0),   
    (2, 2), 
    (2, 2) 
)

x_test_sample = np.pad(x_test_sample, pad_width=pad_width, mode='constant', constant_values=0)
x_test_sample.shape, y_test_sample.shape

((1, 1, 32, 32), (10,))

In [25]:
predict = model.forward(x_test_sample)

In [26]:
np.argmax(predict), np.argmax(y_test_sample)

(np.int64(1), np.int64(9))