In [1]:

import torchvision
import wandb
import numpy as np
from src import (
    FFNN,
    layer,
    activation_functions,
    initializers,
    loss_functions,
    optimizers,
    batchnorm,
    dropout,
    preprocess,
    sweeping,   
    training,  
    wandblogger
)

In [2]:
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=False)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=False)

In [3]:
import numpy as np
from src import preprocess

validation_percent = 0.2
one_hot_encoder = preprocess.OneHotEncoder()


train_size = int((1 - validation_percent) * len(train_dataset))
validation_size = len(train_dataset) - train_size

x_train = train_dataset.data[:train_size].reshape(train_size, -1)
targets_train = np.array(train_dataset.targets[:train_size])
y_train = one_hot_encoder.transform(targets_train)

x_valid = train_dataset.data[train_size:].reshape(validation_size, -1)
targets_valid = np.array(train_dataset.targets[train_size:])
y_valid = one_hot_encoder.transform(targets_valid)

x_test = test_dataset.data.reshape(len(test_dataset), -1)
targets_test = np.array(test_dataset.targets)
y_test = one_hot_encoder.transform(targets_test)

print(type(x_train), type(y_train))
print(x_train.shape, y_train.shape)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
torch.Size([48000, 784]) (48000, 10)


  targets_train = np.array(train_dataset.targets[:train_size])
  targets_valid = np.array(train_dataset.targets[train_size:])
  targets_test = np.array(test_dataset.targets)


In [4]:
#find min and max value of y_train
print("y_train min:", np.min(y_train), "y_train max:", np.max(y_train))

y_train min: 0.0 y_train max: 1.0


In [5]:
loss_fn = loss_functions.CrossEntropyLoss()

weight_init = initializers.HeInitializer()
bias_init = initializers.ConstantInitializer(0.0)

optimizer = optimizers.Adam(learning_rate=0.001)

layers = [
    layer.Layer(
        input_size=28 * 28,
        output_size=1024,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(1024),
    activation_functions.ReLU(),

    layer.Layer(
        input_size=1024,
        output_size=512,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(512),
    activation_functions.ReLU(),

    layer.Layer(
        input_size=512,
        output_size=256,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(256),
    activation_functions.ReLU(),

    layer.Layer(
        input_size=256,
        output_size=128,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
    batchnorm.BatchNorm(128),
    activation_functions.ReLU(),

    layer.Layer(
        input_size=128,
        output_size=10,
        weight_initializer=weight_init,
        bias_initializer=bias_init,
    ),
]

model = FFNN.FFNN(layers=layers, loss_function=loss_fn, optimizer=optimizer)


In [6]:
# activation_func = activation_functions.ReLU()
# loss_func = loss_functions.CrossEntropyLoss()
# optimizer = optimizers.SGD()
# W_initializers = initializers.HeInitializer()
# B_initializer = initializers.ConstantInitializer(0)
# input_size = x_train.shape[1]
# hidden_size = 392
# output_size = 10

# Input_layer = layer.Layer(input_size=input_size, output_size=output_size, activation_function=activation_func, weight_initializer=W_initializers, bias_initializer=B_initializer)
# layers = [Input_layer]
# FeedForward = FFNN.FFNN(layers=layers, loss_function=loss_func, optimizer=optimizer)


In [7]:

epochs = 100

batch_size = 32
early_stopping = training.EarlyStopping(patience=5, min_delta=1e-4)
training.Trainer(model, loss_function=loss_fn, optimizer=optimizer).train(X=x_train, y=y_train, x_val=x_valid, y_val=y_valid, early_stopper=early_stopping, epochs=epochs, batch_size=batch_size, shuffle=True)

Epoch 1/100 - Loss: 0.4910 - Val Loss: 0.4043
Epoch 2/100 - Loss: 0.3695 - Val Loss: 0.3897
Epoch 3/100 - Loss: 0.3325 - Val Loss: 0.3341
Epoch 4/100 - Loss: 0.3040 - Val Loss: 0.3414
Epoch 5/100 - Loss: 0.2788 - Val Loss: 0.3468
Epoch 6/100 - Loss: 0.2634 - Val Loss: 0.3402
Epoch 7/100 - Loss: 0.2453 - Val Loss: 0.3195
Epoch 8/100 - Loss: 0.2297 - Val Loss: 0.3361
Epoch 9/100 - Loss: 0.2173 - Val Loss: 0.3249
Epoch 10/100 - Loss: 0.2011 - Val Loss: 0.3271
Epoch 11/100 - Loss: 0.1921 - Val Loss: 0.3276
Early stopping at epoch 12


{'loss': [np.float64(0.4910067143763543),
  np.float64(0.3695224824339651),
  np.float64(0.33253514079484153),
  np.float64(0.3040444851727392),
  np.float64(0.27878468249548866),
  np.float64(0.26336879992002826),
  np.float64(0.24534696898114014),
  np.float64(0.229667772724024),
  np.float64(0.21725200204576384),
  np.float64(0.2010508183656839),
  np.float64(0.19208310381380997),
  np.float64(0.1778457546924145)],
 'val_loss': [np.float64(0.40431526714646954),
  np.float64(0.38966376686968357),
  np.float64(0.334133038491237),
  np.float64(0.34144957699072037),
  np.float64(0.3467882795649625),
  np.float64(0.34018867802423036),
  np.float64(0.3195197983249777),
  np.float64(0.3360959777229449),
  np.float64(0.32491027871262995),
  np.float64(0.3271344455200647),
  np.float64(0.32756666039291055),
  np.float64(0.3523768543797985)]}

In [8]:
accuracy = loss_functions.Accuracy() 
#accuracy
y_preds = model.forward_pass(x_test)
acc = accuracy.compute(y_test, y_preds)
print(f"Test accuracy: {acc * 100:.2f}%")

Test accuracy: 88.93%
