# Data loading and pre-processing

In [1]:
import tensorflow as tf

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

In [2]:
import numpy as np
# shuffle train data to diversify sequential input
keys = np.array(range(x_train.shape[0]))
np.random.shuffle(keys)
x_train = x_train[keys]
y_train = y_train[keys]

In [3]:
# flatten for dense NN, from -1 to 1
x_train = (x_train.reshape(x_train.shape[0], -1).astype(np.float32) - 127.5) / 127.5
x_test = (x_test.reshape(x_test.shape[0], -1).astype(np.float32) - 127.5) / 127.5

assert -1 <= x_train.min(), x_train.max() <= 1 # Ensuring that scaling is correct
assert -1 <= x_test.min(), x_test.max() <= 1 

In [4]:
learning_rate = 1e-3
batch_size = 128
epochs = 5

# Training custom model

In [5]:
import sys
sys.path.append("../..")  # path lead to root dir to import module

import nn

In [6]:
model_manual = nn.model.Model()
model_manual.add(nn.layer.Dense(x_train.shape[1], 128, activation=nn.activation.ReLU()))
model_manual.add(nn.layer.Dense(128, 10, activation=nn.activation.Softmax()))

model_manual.set(
    loss = nn.loss.CategoricalCrossentropy(),
    accuracy = nn.accuracy.Categorical(),
    optimizer = nn.optimizer.Adam(learning_rate=learning_rate, decay=5e-5)
)

model_manual.train(x_train, y_train, validation_data = (x_test, y_test), epochs = epochs , batch_size = batch_size , print_every = 100)

epoch: 1
step: 0, acc: 0.078, loss: 10.817 (data_loss: 10.817, reg_loss: 0.000), lr: 0.001
step: 100, acc: 0.836, loss: 1.196 (data_loss: 1.196, reg_loss: 0.000), lr: 0.0009950248756218907
step: 200, acc: 0.820, loss: 1.420 (data_loss: 1.420, reg_loss: 0.000), lr: 0.0009900990099009901
step: 300, acc: 0.820, loss: 1.084 (data_loss: 1.084, reg_loss: 0.000), lr: 0.0009852216748768474
step: 400, acc: 0.766, loss: 1.349 (data_loss: 1.349, reg_loss: 0.000), lr: 0.000980392156862745
step: 468, acc: 0.823, loss: 1.313 (data_loss: 1.313, reg_loss: 0.000), lr: 0.0009771350400625367
training, acc: 0.819, loss: 1.287 (data_loss: 1.287, reg_loss: 0.000), lr: 0.0009771350400625367
epoch: 2
step: 0, acc: 0.836, loss: 1.429 (data_loss: 1.429, reg_loss: 0.000), lr: 0.0009770873027505008
step: 100, acc: 0.867, loss: 1.023 (data_loss: 1.023, reg_loss: 0.000), lr: 0.000972337012008362
step: 200, acc: 0.844, loss: 1.201 (data_loss: 1.201, reg_loss: 0.000), lr: 0.0009676326866321544
step: 300, acc: 0.867, 

In [7]:
model = nn.model.Model('basic', [x_train.shape[1], 128, 10])
model.train(x_train, y_train, validation_data = (x_test, y_test), epochs = epochs , batch_size = batch_size , print_every = 100)

epoch: 1
step: 0, acc: 0.188, loss: 9.169 (data_loss: 9.169, reg_loss: 0.000), lr: 0.001
step: 100, acc: 0.844, loss: 1.335 (data_loss: 1.335, reg_loss: 0.000), lr: 0.0009950248756218907
step: 200, acc: 0.820, loss: 1.476 (data_loss: 1.476, reg_loss: 0.000), lr: 0.0009900990099009901
step: 300, acc: 0.805, loss: 1.130 (data_loss: 1.130, reg_loss: 0.000), lr: 0.0009852216748768474
step: 400, acc: 0.750, loss: 1.229 (data_loss: 1.229, reg_loss: 0.000), lr: 0.000980392156862745
step: 468, acc: 0.823, loss: 1.550 (data_loss: 1.550, reg_loss: 0.000), lr: 0.0009771350400625367
training, acc: 0.819, loss: 1.162 (data_loss: 1.162, reg_loss: 0.000), lr: 0.0009771350400625367
epoch: 2
step: 0, acc: 0.836, loss: 1.314 (data_loss: 1.314, reg_loss: 0.000), lr: 0.0009770873027505008
step: 100, acc: 0.844, loss: 0.910 (data_loss: 0.910, reg_loss: 0.000), lr: 0.000972337012008362
step: 200, acc: 0.844, loss: 1.184 (data_loss: 1.184, reg_loss: 0.000), lr: 0.0009676326866321544
step: 300, acc: 0.859, lo

# Keras model

In [8]:
keras = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=x_train[0].shape),
    tf.keras.layers.Dense(10, activation='softmax')
])

opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)

keras.compile(optimizer=opt,
                 loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                 metrics=['accuracy'])

keras.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)

test_loss, test_acc = keras.evaluate(x_test,  y_test, verbose=2)

print('\nTest accuracy:', test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 0s - loss: 0.3758 - accuracy: 0.8610 - 495ms/epoch - 2ms/step

Test accuracy: 0.8610000014305115


# PyTorch model

In [9]:
import torch
import torch.nn as tnn
import torch.nn.functional as F
from torch.utils.data import Dataset

In [10]:
class TorchNet(tnn.Module):
    def __init__(self):
        super(TorchNet, self).__init__()
        
        self.linear_relu_stack = tnn.Sequential(
            tnn.Linear(784, 128),
            tnn.ReLU(),
            tnn.Linear(128, 10),
            tnn.Softmax(),
        )
        
    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    
class CustomImageDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, idx):
        image = self.x[idx]
        label = self.y[idx]
        return image, label

In [11]:
# Preparing the data.
# PyTorch uses custom DataLoaders and Datasets
# for better pipeline architechture

from torch.utils.data import DataLoader

train_data = CustomImageDataset(x_train, y_train)
test_data = CustomImageDataset(x_test, y_test)

train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [12]:
torch_model = TorchNet().to('cpu')

loss_fn = tnn.CrossEntropyLoss()
optimizer = torch.optim.Adam(torch_model.parameters(), lr=learning_rate)

epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, torch_model, loss_fn, optimizer)
    test_loop(test_dataloader, torch_model, loss_fn)
print("Done!")


Epoch 1
-------------------------------
loss: 2.305379  [    0/60000]


  input = module(input)


loss: 1.739008  [ 6400/60000]
loss: 1.706601  [12800/60000]
loss: 1.764147  [19200/60000]
loss: 1.735661  [25600/60000]
loss: 1.718757  [32000/60000]
loss: 1.707036  [38400/60000]
loss: 1.650356  [44800/60000]
loss: 1.642801  [51200/60000]
loss: 1.560623  [57600/60000]
Test Error: 
 Accuracy: 76.6%, Avg loss: 1.696663 

Epoch 2
-------------------------------
loss: 1.736954  [    0/60000]
loss: 1.695611  [ 6400/60000]
loss: 1.651206  [12800/60000]
loss: 1.815212  [19200/60000]
loss: 1.585290  [25600/60000]
loss: 1.693932  [32000/60000]
loss: 1.696980  [38400/60000]
loss: 1.682546  [44800/60000]
loss: 1.588734  [51200/60000]
loss: 1.662499  [57600/60000]
Test Error: 
 Accuracy: 78.6%, Avg loss: 1.677769 

Epoch 3
-------------------------------
loss: 1.682691  [    0/60000]
loss: 1.694496  [ 6400/60000]
loss: 1.660961  [12800/60000]
loss: 1.700871  [19200/60000]
loss: 1.703589  [25600/60000]
loss: 1.708350  [32000/60000]
loss: 1.667320  [38400/60000]
loss: 1.659667  [44800/60000]
loss: 