In [1]:
import numpy as np
import pandas as pd
from neural_net.loss import CrossEntropyLoss
from neural_net.modules import ConvolutionalLayer, AvgPool, NeuralLayer, Dropout
from neural_net.activations import Relu, Sigmoid, Flatten
from neural_net.networks import ANN
from neural_net.base import Tensor
from PIL import Image
import io
import matplotlib.pyplot as plt
import copy
from tqdm import tqdm

In [2]:
splits = {'train': 'mnist/train-00000-of-00001.parquet', 'test': 'mnist/test-00000-of-00001.parquet'}
df = pd.read_parquet("hf://datasets/ylecun/mnist/" + splits["train"])

In [None]:
df.head(2)

In [None]:
to_int = lambda x: 1 if x else 0
one_hot = pd.get_dummies(df['label']).map(to_int)

df = df.drop(columns='label').join(one_hot)

df.head(2)

In [5]:
df = df.iloc[:10_000]

In [6]:
class DataSet:
    def __init__(self, df:pd.DataFrame, batch_size=32, shuffle=True, img_shape=(28,28), transform=None):
        self.df = df
        self.labels = df.columns.drop('image')
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_samples = len(df)
        self.indices = np.arange(self.num_samples)
        self.current_idx = 0  # Keeps track of batch index
        self.img_shape = img_shape
        self.transform = transform

        if self.shuffle:
            np.random.shuffle(self.indices)  # Shuffle data at the start

    def __iter__(self):
        self.current_idx = 0
        if self.shuffle:
            np.random.shuffle(self.indices)
        return self
    
    def __len__(self):
        n = self.num_samples//self.batch_size
        if self.num_samples%self.batch_size != 0:
            n+=1
        return n

    def __next__(self) -> tuple[Tensor, Tensor]:
        if self.current_idx >= self.num_samples:
            raise StopIteration  # Stop when all batches are processed

        if self.current_idx + self.batch_size < self.num_samples:
            next_idx = self.current_idx + self.batch_size
        else:
            next_idx = self.num_samples
        # Get batch indices
        batch_indices = self.indices[self.current_idx: next_idx]
        
        # Slice data for the batch
        X_batch = np.zeros((len(batch_indices), 1, self.img_shape[0], self.img_shape[1]))
        for i, b in enumerate(batch_indices):
            img = Image.open(io.BytesIO(self.df.iloc[b].image['bytes']))
            X_batch[i,0,:] = np.array(img, dtype=np.float32)/256
        if self.transform != None:
            X_batch = self.transform(X_batch)
        y_batch = self.df[self.labels].iloc[batch_indices].to_numpy(dtype=np.float32)

        # Move to the next batch
        self.current_idx = next_idx
        
        return Tensor(X_batch), Tensor(y_batch)

Split into train, test and validation set

In [7]:
# add some random noise in the training to help stabilizing net and avoid overfitting
def train_transf(x):
    return x + np.random.randn(*x.shape)*0.1

batch_size = 16
n_samples = len(df)

n_train = int(0.8*n_samples)
n_test = int( (n_samples-n_train)*0.7 )
n_val = n_samples-n_train-n_test

df_train = df[:n_train]
df_test = df[n_train: n_train+n_test]
df_val = df[n_train+n_test: n_train+n_test+n_val]

train_set = DataSet(df_train, batch_size=batch_size, transform=train_transf)
test_set = DataSet(df_test, batch_size=batch_size)
val_set = DataSet(df_val, batch_size=batch_size)

Set up the neural network

In [8]:
num_epochs = 50
lr = 1e-3
loss = CrossEntropyLoss()
out_dim = len( train_set.labels )

nn = ANN(loss, weight_decay=1E-3)

nn.layers.extend([
    ConvolutionalLayer(in_ch=1, out_ch=6, k_size=(5,5), padding=(2,2), stride=(2,2)),
    Relu(),
    ConvolutionalLayer(in_ch=6, out_ch=16, k_size=(5,5), padding=(0,0)),
    Relu(),
    ConvolutionalLayer(in_ch=16, out_ch=16, k_size=(5,5), padding=(2,2), stride=(2,2)),
    Relu(),
    Flatten(),
    NeuralLayer(in_dim=400, out_dim=120),
    #Dropout(0.1),
    Relu(),
    NeuralLayer(in_dim=120, out_dim=64),
    #Dropout(0.1),
    Relu(),
    NeuralLayer(in_dim=64, out_dim=out_dim),
]
)

# nn.layers.extend([
#     ConvolutionalLayer(in_ch=1, out_ch=6, k_size=(5,5), padding=(2,2)),
#     Relu(),
#     AvgPool(k_size=(2,2), stride=(2,2)),
#     ConvolutionalLayer(in_ch=6, out_ch=16, k_size=(5,5), padding=(0,0)),
#     Relu(),
#     AvgPool(k_size=(2,2), stride=(2,2)),
#     Flatten(),
#     NeuralLayer(in_dim=400, out_dim=120),
#     #Dropout(0.1),
#     Relu(),
#     NeuralLayer(in_dim=120, out_dim=64),
#     #Dropout(0.1),
#     Relu(),
#     NeuralLayer(in_dim=64, out_dim=out_dim),
# ]
# )

In [9]:
def accuracy(logits, y):
    sigma = Sigmoid()
    probs = sigma(logits)
    predictions = np.argmax(probs, axis=-1)
    reals = np.argmax(y, axis=-1)
    correct = 0
    for i in range(reals.shape[0]):
        if reals[i] == predictions[i]:
            correct += 1
    return correct

### Train

In [None]:
train_losses, test_losses, test_accuracies = [], [], []
best_test_loss = float('inf')
best_model = copy.deepcopy(nn)
for epoch in range(num_epochs):
    train_loss, test_loss, test_accuracy = 0,0,0
    # train
    nn.train = True
    for i,data in enumerate(tqdm(train_set)):
        nn.zero_grads()
        x, y = data
        y_hat = nn(x)
        train_loss += nn.get_loss(y_hat, y)
        nn.backward()

    train_losses.append(train_loss/len(train_set))
    
    nn.train = False
    for i,data in enumerate(tqdm(test_set)):
        x, y = data
        y_hat = nn(x)
        test_accuracy += accuracy(y_hat, y)
        test_loss += nn.get_loss(y_hat, y)
    test_losses.append(test_loss/len(test_set))
    test_accuracies.append( test_accuracy/test_set.num_samples )

    print(f"epoch {epoch+1:2.0f} \t train: {train_losses[-1]:3.3f}\t test: {test_losses[-1]:3.3f} \t accuracy {100*test_accuracies[-1]:3.3f}%")

    # save best model
    if test_losses[-1] < best_test_loss:
        best_test_loss = test_losses[-1]
        best_model = copy.deepcopy(nn)

In [None]:
plt.plot(range(1,num_epochs+1), train_losses, label="Train")
plt.plot(range(1,num_epochs+1), test_losses, label="Test")
plt.xlabel('Num. epochs')
plt.ylabel('CE loss')
plt.legend()
plt.show()

### Validation

In [None]:
best_model.train = False
val_accuracy = 0
for batch in val_set:
    i += 1
    x, y = batch
    y_hat = best_model(x)
    val_accuracy += accuracy(y_hat, y)
print(f"accurcay on validation set {100* (val_accuracy/val_set.num_samples) :3.3f}%")

In [None]:
_, axs = plt.subplots(2, 8, figsize=(15, 5))

val_set.current_idx = 1
batch = next(val_set)
x, y = batch
logits = best_model(x)

# predict class
sigma = Sigmoid()
probs = sigma(logits)
predictions = np.argmax(probs, axis=-1)

axs = axs.flatten()
for i, ax in enumerate(axs):
    ax.imshow(x[i].reshape(28,28))
    ax.title.set_text(f'Predicted {predictions[i]}')
