In [1]:
from dataset.dataset import load_fashion_mnist_dataset

X_train, Y_train, X_test, Y_test = load_fashion_mnist_dataset(y=True)
X_train.shape


2023-12-11 18:15:39.062139: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-11 18:15:39.320913: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-11 18:15:39.323095: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


(60000, 784)

In [None]:

X_train = X_train.reshape(-1, 1, 28, 28)
X_test = X_test.reshape(-1, 1, 28, 28)

In [87]:
import numpy as np
import pickle

from models.optimizer import ConvolutionOptimizer
from models.metrics import MeanSquaredError
from models.neural_network import BasicNetwork
from models.layers import DenseLayer, Conv2D, MaxPool2D, Flatten
from models.activation_functions import Sigmoid, ReLU


def train_and_save(kernel_size, n_of_kernels, input_count):
    convolution_net = BasicNetwork()
    
    convolution_net.add_layer(Conv2D(1, kernel_size, n_of_kernels, 1))
    convolution_net.add_layer(MaxPool2D(2, 2))
    convolution_net.add_layer(Conv2D(n_of_kernels, kernel_size, n_of_kernels, 1))
    convolution_net.add_layer(MaxPool2D(2, 2))
    convolution_net.add_layer(Flatten())
    convolution_net.add_layer(DenseLayer(input_count, 400))
    convolution_net.add_layer(ReLU())
    convolution_net.add_layer(DenseLayer(400, 784))
    convolution_net.add_layer(Sigmoid())
    
    epoch_losses = []
    epochs = 100
    batch_size = 4086
    learning_rate = 10e-3
    
    batch_count = X_train.shape[0] // batch_size
    
    opt = ConvolutionOptimizer(convolution_net, learning_rate=learning_rate)
    loss_fn = MeanSquaredError()

    for epoch in range(epochs):
        print(f"Epoch: {epoch}")
        for batch_no in range(batch_count):
            start_idx = batch_no * batch_size
            end_idx = start_idx + batch_size

            x = X_train[start_idx:end_idx]
            x_cmp = x.reshape(batch_size, 784)

            prediction = convolution_net.forward(x)
            loss = loss_fn.calculate(prediction, x_cmp)
            epoch_losses.append(loss)

            loss_fn.backward(prediction, x_cmp)
            convolution_net.backward(loss_fn.d_inputs)

            opt.fit()
            
        opt.save_loss(np.mean(epoch_losses))
        epoch_losses = []
        
        
    path = "saved_models/"
    conv_path = "conv/"
    with open(f"{path}networks/{conv_path}model_{n_of_kernels}_{kernel_size}", "wb") as pickle_file:
        pickle.dump(convolution_net, pickle_file)

    with open(
        f"{path}optimizers/{conv_path}optimizer_{n_of_kernels}_{kernel_size}", "wb"
    ) as pickle_file:
        pickle.dump(opt, pickle_file)

In [88]:

train_and_save(kernel_size=2, n_of_kernels=8, input_count=128)
train_and_save(kernel_size=2, n_of_kernels=16, input_count=256)
train_and_save(kernel_size=4, n_of_kernels=8, input_count=72)
train_and_save(kernel_size=4, n_of_kernels=16, input_count=144)


Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
