In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import random

from time import time

from silence_tensorflow import silence_tensorflow
silence_tensorflow()
import tensorflow as tf
import tensorflow_probability as tfp

from tensorflow import keras
from keras import models, layers, initializers, optimizers, losses
from keras.utils import to_categorical
from tqdm.keras import TqdmCallback

config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config = config)

import os, shutil
import json
import subprocess

E0000 00:00:1741798693.740530  645670 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741798693.744075  645670 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  from .autonotebook import tqdm as notebook_tqdm
I0000 00:00:1741798695.514933  645670 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4114 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


# Objective

In this notebook, we aim to investigate the Fashion-MNIST problem and formulate a Convolutional Neural Network (CNN) that is capable of prediction the correct class of clothing objects with high accuracy rates. For the model to be similar to what we will be applying in the context of the cure rate models, we shall only consider the first 5 classes of clothing in this case. We are using as a main guide reference, the tutorial: https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-fashion-mnist-clothing-classification/

In [2]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

In [3]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

i_valid_train = pd.Series(train_labels).isin([0,1,2,3,4]).to_numpy()
i_valid_test = pd.Series(test_labels).isin([0,1,2,3,4]).to_numpy()

# Filters to take only the images with labels in [0, 1, 2, 3, 4]
train_images = train_images[i_valid_train]
train_images = train_images / np.max(train_images)
train_shape = train_images.shape
# Adds one more dimension for keras to identify the "colors" dimension
train_images = np.reshape(train_images, (train_shape[0], train_shape[1], train_shape[2], 1))

test_images = test_images[i_valid_test]
test_images = test_images / np.max(test_images)
test_shape = test_images.shape
# Adds one more dimension for keras to identify the "colors" dimension
test_images = np.reshape(test_images, (test_shape[0], test_shape[1], test_shape[2], 1))

train_labels = train_labels[i_valid_train]
test_labels = test_labels[i_valid_test]

# -------------------- Separates the dataset into train, val and test --------------------
val_images = train_images[25000:, :, :, :]
train_images = train_images[:25000, :, :, :]

val_labels = train_labels[25000:]
train_labels = train_labels[:25000]

ohe = OneHotEncoder()
ohe.fit( np.transpose([train_labels]) )
print("OneHot Categories: {}".format(ohe.categories_))

train_y = ohe.transform( np.transpose([train_labels]) ).toarray()
val_y = ohe.transform( np.transpose([val_labels]) ).toarray()
test_y = ohe.transform( np.transpose([test_labels]) ).toarray()

OneHot Categories: [array([0, 1, 2, 3, 4], dtype=uint8)]


First, we will consider the case where there is a single convolutional layer for the images, followed by two dense layers

In [14]:
class ModelMNIST(keras.Model):
    def __init__(self):
        super().__init__()
            
    def define_structure(self, shape_input, seed = 1):
        self.shape_input = shape_input
        
        # Gera uma imagem inteira de zeros com as dimensões do modelo
        dummy_input = keras.layers.Input(shape = self.shape_input)
        
        initializer = initializers.HeUniform(seed = seed)
        
        self.convolution1 = keras.layers.Conv2D(filters = 32, kernel_size = [3,3], padding = "same", activation = tf.nn.leaky_relu,
                                                kernel_initializer = initializer, dtype = tf.float32)
        self.pooling1 = keras.layers.MaxPool2D(pool_size = [3,3], strides = 2)
    
        self.flatten = keras.layers.Reshape(target_shape=(-1,))
        self.dense1 = keras.layers.Dense(units = 100, activation = tf.nn.relu, dtype = tf.float32)
        self.dense2 = keras.layers.Dense(units = 5, dtype = tf.float32, activation = "softmax", use_bias = False)

        # Initialize the model weights (if not called beforehand, the method .get_weights() returns an empty list)
        self(dummy_input)
        
    def call(self, x_input):
        x = self.convolution1(x_input)
        x = self.pooling1(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = tf.cast(x, dtype = tf.float64)
        return x
    
    def copy(self):
        new_model = ModelMNIST()
        new_model.define_structure(shape_input = self.shape_input)
        new_model.set_weights(self.get_weights())
        return new_model

In [22]:
class ModelMNIST(keras.Model):
    def __init__(self):
        super().__init__()
            
    def define_structure(self, shape_input, seed = 1):
        self.shape_input = shape_input
        
        # Gera uma imagem inteira de zeros com as dimensões do modelo
        dummy_input = keras.layers.Input(shape = self.shape_input)
        
        initializer = initializers.HeUniform(seed = seed)
        
        self.convolution1 = keras.layers.Conv2D(filters = 16, kernel_size = [3,3], padding = "same", activation = tf.nn.leaky_relu,
                                                kernel_initializer = "he_uniform", dtype = tf.float32)
        self.pooling1 = keras.layers.MaxPool2D(pool_size = [2,2], strides = 2)
        self.convolution2 = keras.layers.Conv2D(filters = 32, kernel_size = [3,3], padding = "same", activation = tf.nn.leaky_relu,
                                                kernel_initializer = "he_uniform", dtype = tf.float32)
        self.pooling2 = keras.layers.MaxPool2D(pool_size = [2,2], strides = 2)
    
        self.flatten = keras.layers.Reshape(target_shape=(-1,))
        self.dense1 = keras.layers.Dense(units = 64, activation = tf.nn.relu, dtype = tf.float32)
        self.dense2 = keras.layers.Dense(units = 5, dtype = tf.float32, activation = "softmax", use_bias = False)

        # Initialize the model weights (if not called beforehand, the method .get_weights() returns an empty list)
        self(dummy_input)
        
    def call(self, x_input):
        x = self.convolution1(x_input)
        x = self.pooling1(x)
        x = self.convolution2(x)
        x = self.pooling2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = tf.cast(x, dtype = tf.float64)
        return x
    
    def copy(self):
        new_model = ModelMNIST()
        new_model.define_structure(shape_input = self.shape_input)
        new_model.set_weights(self.get_weights())
        return new_model

In [23]:
model = ModelMNIST()
model.define_structure(shape_input = train_images[0].shape, seed = 10)

callbacks = [ TqdmCallback(verbose=0) ]

# Parada precoce (Early stopping) - Evita overfitting e agiliza o treinamento
es = keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                   mode = "min",
                                   min_delta = 0.0,
                                   patience = 5,
                                   restore_best_weights = True)
callbacks.append(es)

model.compile(
    optimizer = optimizers.Adam(learning_rate = 0.001),
    loss = "categorical_crossentropy",
    run_eagerly = False
)

model.fit(
    train_images, train_y,
    epochs = 100,
    verbose = 0,
    callbacks = callbacks,
    batch_size = 1024,
    shuffle = True,
    validation_data = (val_images, val_y)
)

 30%|████████████████████████████████████████▌                                                                                              | 30/100 [00:18<00:43,  1.63epoch/s, loss=0.169, val_loss=0.205]


<keras.src.callbacks.history.History at 0x791900059db0>

In [24]:
train_labels_p_pred = model.predict(train_images, verbose = False)
val_labels_p_pred = model.predict(val_images, verbose = False)
test_labels_p_pred = model.predict(test_images, verbose = False)

train_labels_pred = ohe.inverse_transform( train_labels_p_pred ).flatten()
val_labels_pred = ohe.inverse_transform( val_labels_p_pred ).flatten()
test_labels_pred = ohe.inverse_transform( test_labels_p_pred ).flatten()

In [25]:
print("-------- Accuracy --------")
print("Train: {}".format(accuracy_score(train_labels, train_labels_pred)))
print("Validation: {}".format(accuracy_score(val_labels, val_labels_pred)))
print("Test: {}".format(accuracy_score(test_labels, test_labels_pred)))

print("-------- AUC macro --------")
print("Train: {}".format( roc_auc_score(train_labels, train_labels_p_pred, multi_class = "ovr") ))
print("Validation: {}".format( roc_auc_score(val_labels, val_labels_p_pred, multi_class = "ovr") ))
print("Test: {}".format( roc_auc_score(test_labels, test_labels_p_pred, multi_class = "ovr") ))

-------- Accuracy --------
Train: 0.94672
Validation: 0.9288
Test: 0.9232
-------- AUC macro --------
Train: 0.9956167771495312
Validation: 0.9932464148709608
Test: 0.9921239000000002


In [10]:
class ModelMNIST_cureprobs(keras.Model):
    def __init__(self):
        super().__init__()
            
    def define_structure(self, shape_input, seed = 1):
        self.shape_input = shape_input
        
        # Gera uma imagem inteira de zeros com as dimensões do modelo
        dummy_input = keras.layers.Input(shape = self.shape_input)
        
        initializer = initializers.HeNormal(seed = seed)
        # initializer = tf.random_normal_initializer(stddev = 0.005) # IF CONVERGENCE FAILS, RETURN TO THIS INITIALIZER!!!
        
        self.convolution1 = keras.layers.Conv2D(filters = 4, kernel_size = [5,5], padding = "same", activation = tf.nn.leaky_relu,
                                                kernel_initializer = initializer, dtype = tf.float32)
        self.pooling1 = keras.layers.MaxPool2D(pool_size = [2,2], strides = 2)
        # self.convolution2 = keras.layers.Conv2D(filters = 12, kernel_size = [5,5], padding = "same", activation = tf.nn.leaky_relu,
        #                                         kernel_initializer = initializer, dtype = tf.float32)
        # self.pooling2 = keras.layers.MaxPool2D(pool_size = [2,2], strides = 2)
        # self.convolution3 = keras.layers.Conv2D(filters = 32, kernel_size = [5,5], padding = "same", activation = tf.nn.leaky_relu,
        #                                         kernel_initializer = initializer, dtype = tf.float32)
        # self.pooling3 = keras.layers.MaxPool2D(pool_size = [2,2], strides = 2)
        
        self.flatten = keras.layers.Reshape(target_shape=(-1,))
        self.dense1 = keras.layers.Dense(units = 128, activation = tf.nn.tanh, dtype = tf.float32)
        self.dense2 = keras.layers.Dense(units = 5, dtype = tf.float32, activation = "softmax", use_bias = False)
        
        # Initialize the model weights (if not called beforehand, the method .get_weights() returns an empty list)
        self(dummy_input)

        
    def call(self, x_input):
        x = self.convolution1(x_input)
        x = self.pooling1(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = tf.cast(x, dtype = tf.float64)
        return x
    
    def copy(self):
        new_model = ModelMNIST()
        new_model.define_structure(shape_input = self.shape_input)
        new_model.set_weights(self.get_weights())
        return new_model

In [11]:
model2 = ModelMNIST_cureprobs()
model2.define_structure(shape_input = train_images[0].shape, seed = 10)

callbacks = [ TqdmCallback(verbose=0) ]

# Parada precoce (Early stopping) - Evita overfitting e agiliza o treinamento
es = keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                   mode = "min",
                                   min_delta = 0.0,
                                   patience = 5,
                                   restore_best_weights = True)
callbacks.append(es)

model2.compile(
    optimizer = optimizers.Adam(learning_rate = 0.001),
    loss = "categorical_crossentropy",
    run_eagerly = False
)

model2.fit(
    train_images, train_y,
    epochs = 100,
    verbose = 0,
    callbacks = callbacks,
    batch_size = 1024,
    shuffle = True,
    validation_data = (val_images, val_y)
)

 43%|██████████████████████████████████████████████████████████                                                                             | 43/100 [00:16<00:21,  2.60epoch/s, loss=0.131, val_loss=0.211]


<keras.src.callbacks.history.History at 0x79191c0c9a50>

In [12]:
train_labels_p_pred2 = model2.predict(train_images, verbose = False)
val_labels_p_pred2 = model2.predict(val_images, verbose = False)
test_labels_p_pred2 = model2.predict(test_images, verbose = False)

train_labels_pred2 = ohe.inverse_transform( train_labels_p_pred ).flatten()
val_labels_pred2 = ohe.inverse_transform( val_labels_p_pred ).flatten()
test_labels_pred2 = ohe.inverse_transform( test_labels_p_pred ).flatten()

In [13]:
print("-------- Accuracy --------")
print("Train: {}".format(accuracy_score(train_labels, train_labels_pred2)))
print("Validation: {}".format(accuracy_score(val_labels, val_labels_pred2)))
print("Test: {}".format(accuracy_score(test_labels, test_labels_pred2)))

print("-------- AUC macro --------")
print("Train: {}".format( roc_auc_score(train_labels, train_labels_p_pred2, multi_class = "ovr") ))
print("Validation: {}".format( roc_auc_score(val_labels, val_labels_p_pred2, multi_class = "ovr") ))
print("Test: {}".format( roc_auc_score(test_labels, test_labels_p_pred2, multi_class = "ovr") ))

-------- Accuracy --------
Train: 0.94544
Validation: 0.9238
Test: 0.9266
-------- AUC macro --------
Train: 0.9967475762475388
Validation: 0.9928077362993948
Test: 0.9922133500000001
