## Set up

### imports

In [15]:

import sys
import numpy as np
import keras
from sklearn.metrics import classification_report, accuracy_score
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard, CSVLogger
import pandas as pd
import gc
import json

sys.path.append('../src')
from data.utils import load
from models.create_models import create_cnn_model, create_fcnn_model
from keras import backend as K


keras.utils.set_random_seed(42)

### definitions

In [2]:
data_dir = "../data/"

# input image dimensions
img_rows, img_cols = 28, 28

### Carregar dataset

In [3]:
# Load the data
X_train = load(data_dir + 'kmnist-train-imgs.npz')
X_test = load(data_dir + 'kmnist-test-imgs.npz')
y_train = load(data_dir + 'kmnist-train-labels.npz')
y_test = load(data_dir + 'kmnist-test-labels.npz')


if K.image_data_format() == 'channels_first':
    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('{} train samples, {} test samples'.format(len(X_train), len(X_test)))

# coleta dos valores unicos e das contagens
unique_train, counts_train = np.unique(y_train, return_counts=True)
unique_test, counts_test = np.unique(y_test, return_counts=True)

num_classes = len(unique_train) if list(unique_train) == list(unique_test) else None



# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

60000 train samples, 10000 test samples


## EDA

In [4]:
print("Num class train: ", len(unique_train))
print("Num class test: ", len(unique_test))


print('### Treino ###')
for i in range(len(unique_train)): print(f'classe {unique_train[i]}: {counts_train[i]} observações')
print('\n### Teste ###')
for i in range(len(unique_test)): print(f'classe {unique_test[i]}: {counts_test[i]} observações')

Num class train:  10
Num class test:  10
### Treino ###
classe 0: 6000 observações
classe 1: 6000 observações
classe 2: 6000 observações
classe 3: 6000 observações
classe 4: 6000 observações
classe 5: 6000 observações
classe 6: 6000 observações
classe 7: 6000 observações
classe 8: 6000 observações
classe 9: 6000 observações

### Teste ###
classe 0: 1000 observações
classe 1: 1000 observações
classe 2: 1000 observações
classe 3: 1000 observações
classe 4: 1000 observações
classe 5: 1000 observações
classe 6: 1000 observações
classe 7: 1000 observações
classe 8: 1000 observações
classe 9: 1000 observações


## Classification

In [5]:

PATH_MODELS = "../models/"
PATH_LOGS = "../logs/"
RESULTS_PATH = "../results/"

In [6]:
METRICS =[
        'accuracy',
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc'),
        keras.metrics.TruePositives(name='tp'),
        keras.metrics.TrueNegatives(name='tn'),
        keras.metrics.FalsePositives(name='fp'),
        keras.metrics.FalseNegatives(name='fn')
    ]

I0000 00:00:1725127038.193229  735698 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1725127038.238353  735698 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1725127038.240955  735698 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1725127038.245067  735698 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

### CNN Benchmark

In [7]:
batch_size = 128
epochs = 12

# callbacks do modelo benchmark
callbacks_benchmark = [
    ModelCheckpoint(PATH_MODELS + f'best_model_CNN_benchmark.keras', save_best_only=True, monitor='val_f1_score'),
    TensorBoard(log_dir=PATH_LOGS),
    CSVLogger(PATH_MODELS + f'training_log_CNN_benchmark.csv')
]

In [8]:

# limpa sessao do keras
keras.backend.clear_session()
gc.collect()

# cria modelo
model_benchmark = create_cnn_model(
    input_shape=input_shape,
    num_classes= num_classes,
    metrics= METRICS
)

# treina modelo
history_benchmark = model_benchmark.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test),
          callbacks = callbacks_benchmark
          )

# realiza predicao
train_pred_proba = model_benchmark.predict(X_train)
test_pred_proba = model_benchmark.predict(X_test)

# salva predicao no treino
test_results_benchmark = pd.DataFrame({
    "y_test": y_test.argmax(axis=1),
    "y_test_pred": np.argmax(test_pred_proba, axis =1),
    "y_test_pred_proba": list(test_pred_proba)
    
})
test_results_benchmark.to_csv(RESULTS_PATH + 'test/cnn_benchmark_test_results.csv', index = False)

# salva predicao no teste
train_results_benchmark = pd.DataFrame({
    "y_train": y_train.argmax(axis=1),
    "y_train_pred": np.argmax(train_pred_proba, axis =1),
    "y_train_pred_proba": list(train_pred_proba)
})
train_results_benchmark.to_csv(RESULTS_PATH + 'train/cnn_benchmark_train_results.csv', index = False)

json.dump(history_benchmark.history, open(RESULTS_PATH + "history/benchmark_cnn_history.json", "w"))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-08-31 14:57:19.089254: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.
2024-08-31 14:57:19.250978: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


Epoch 1/12


I0000 00:00:1725127041.903056  735806 service.cc:146] XLA service 0x7b68e400c4d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1725127041.903081  735806 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce GTX 1660 Ti, Compute Capability 7.5
2024-08-31 14:57:21.985896: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-31 14:57:22.232239: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m  6/469[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5s[0m 11ms/step - accuracy: 0.1263 - auc: 0.5114 - fn: 448.0000 - fp: 0.0000e+00 - loss: 2.3039 - precision: 0.0000e+00 - recall: 0.0000e+00 - tn: 4032.0000 - tp: 0.0000e+00 

I0000 00:00:1725127045.298363  735806 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 22ms/step - accuracy: 0.1233 - auc: 0.5476 - fn: 30143.5918 - fp: 0.0000e+00 - loss: 2.2922 - precision: 0.0000e+00 - recall: 0.0000e+00 - tn: 271292.3125 - tp: 0.0000e+00 - val_accuracy: 0.1686 - val_auc: 0.6450 - val_fn: 10000.0000 - val_fp: 0.0000e+00 - val_loss: 2.2700 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 90000.0000 - val_tp: 0.0000e+00
Epoch 2/12
[1m 13/469[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 9ms/step - accuracy: 0.1513 - auc: 0.6207 - fn: 896.0000 - fp: 0.0000e+00 - loss: 2.2668 - precision: 0.0000e+00 - recall: 0.0000e+00 - tn: 8064.0000 - tp: 0.0000e+00

  self._save_model(epoch=epoch, batch=None, logs=logs)


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.1778 - auc: 0.6462 - fn: 30143.5918 - fp: 0.0000e+00 - loss: 2.2544 - precision: 0.0000e+00 - recall: 0.0000e+00 - tn: 271292.3125 - tp: 0.0000e+00 - val_accuracy: 0.2549 - val_auc: 0.7155 - val_fn: 10000.0000 - val_fp: 0.0000e+00 - val_loss: 2.2363 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 90000.0000 - val_tp: 0.0000e+00
Epoch 3/12
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.2548 - auc: 0.7184 - fn: 30143.5918 - fp: 0.0000e+00 - loss: 2.2071 - precision: 0.0000e+00 - recall: 0.0000e+00 - tn: 271292.3125 - tp: 0.0000e+00 - val_accuracy: 0.3457 - val_auc: 0.7546 - val_fn: 10000.0000 - val_fp: 0.0000e+00 - val_loss: 2.1908 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 90000.0000 - val_tp: 0.0000e+00
Epoch 4/12
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.3370 - auc: 0.7691 

2024-08-31 14:58:25.621337: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.
2024-08-31 14:58:25.773806: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


### CNN

In [9]:
batch_size = 128
epochs = 1000

# callbacks cnn
callbacks_cnn = [
    EarlyStopping(monitor='val_loss', patience=10),
    ModelCheckpoint(PATH_MODELS + f'best_model_CNN.keras', save_best_only=True, monitor='val_loss'),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5),
    TensorBoard(log_dir=PATH_LOGS),
    CSVLogger(PATH_MODELS + f'training_log_CNN.csv')
]

In [10]:

# limpa sessao do keras
keras.backend.clear_session()
gc.collect()

# cria modelo
model_cnn = create_cnn_model(
    input_shape=input_shape,
    num_classes= num_classes,
    metrics = METRICS
)

# treina modelo
history_cnn = model_cnn.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test),
          callbacks=callbacks_cnn
          )


# realiza predicao
train_pred_proba = model_cnn.predict(X_train)
test_pred_proba = model_cnn.predict(X_test)

# salva predicao no treino
test_results_cnn = pd.DataFrame({
    "y_test": y_test.argmax(axis=1),
    "y_test_pred": np.argmax(test_pred_proba, axis =1),
    "y_test_pred_proba": list(test_pred_proba)
    
})
test_results_cnn.to_csv(RESULTS_PATH + 'test/cnn_test_results.csv', index = False)

# salva predicao no teste
train_results_cnn = pd.DataFrame({
    "y_train": y_train.argmax(axis=1),
    "y_train_pred": np.argmax(train_pred_proba, axis =1),
    "y_train_pred_proba": list(train_pred_proba)
})
train_results_cnn.to_csv(RESULTS_PATH + 'train/cnn_train_results.csv', index = False)


json.dump(history_cnn.history, open(RESULTS_PATH + "history/cnn_history.json", "w"))



Epoch 1/1000


2024-08-31 14:58:37.676449: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 16ms/step - accuracy: 0.1176 - auc: 0.6618 - fn: 38278.5898 - fp: 160.0000 - loss: 2.2956 - precision: 0.9210 - recall: 0.0602 - tn: 361132.3125 - tp: 1865.0000 - val_accuracy: 0.2012 - val_auc: 0.6421 - val_fn: 10000.0000 - val_fp: 0.0000e+00 - val_loss: 2.2716 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 90000.0000 - val_tp: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/1000
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.1932 - auc: 0.6462 - fn: 30143.5918 - fp: 0.0000e+00 - loss: 2.2558 - precision: 0.0000e+00 - recall: 0.0000e+00 - tn: 271292.3125 - tp: 0.0000e+00 - val_accuracy: 0.3045 - val_auc: 0.7155 - val_fn: 10000.0000 - val_fp: 0.0000e+00 - val_loss: 2.2373 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 90000.0000 - val_tp: 0.0000e+00 - learning_rate: 0.0010
Epoch 3/1000
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9

### Fully Connected

In [11]:
batch_size = 128
epochs = 1000

# callbacks cnn
callbacks_fcnn = [
    EarlyStopping(monitor='val_loss', patience=10),
    ModelCheckpoint(PATH_MODELS + f'best_model_fcnn.keras', save_best_only=True, monitor='val_loss'),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5),
    TensorBoard(log_dir=PATH_LOGS),
    CSVLogger(PATH_MODELS + f'training_log_fcnn.csv')
]

In [12]:
# limpa sessao do keras
keras.backend.clear_session()

# cria modelo
model_fcnn = create_fcnn_model(
    input_shape=input_shape,
    num_classes= num_classes,
    metrics = METRICS
)

# treina modelo
history_fcnn = model_fcnn.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test),
          callbacks=callbacks_fcnn
          )


# realiza predicao
train_pred_proba = model_fcnn.predict(X_train)
test_pred_proba = model_fcnn.predict(X_test)

# salva predicao no treino
test_results_fcnn = pd.DataFrame({
    "y_test": y_test.argmax(axis=1),
    "y_test_pred": np.argmax(test_pred_proba, axis =1),
    "y_test_pred_proba": list(test_pred_proba)
    
})
test_results_fcnn.to_csv(RESULTS_PATH + 'test/fcnn_test_results.csv', index = False)


# salva predicao no teste
train_results_fcnn = pd.DataFrame({
    "y_train": y_train.argmax(axis=1),
    "y_train_pred": np.argmax(train_pred_proba, axis =1),
    "y_train_pred_proba": list(train_pred_proba)
})
train_results_fcnn.to_csv(RESULTS_PATH + 'train/fcnn_train_results.csv', index = False)


json.dump(history_fcnn.history, open(RESULTS_PATH + "history/fcnn_history.json", "w"))

  super().__init__(**kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/1000
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.1020 - auc: 0.7329 - fn: 31732.6836 - fp: 1446.3894 - loss: 2.5561 - precision: 0.8548 - recall: 0.2708 - tn: 359845.9375 - tp: 8410.9082 - val_accuracy: 0.0958 - val_auc: 0.5401 - val_fn: 10000.0000 - val_fp: 0.0000e+00 - val_loss: 2.2963 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 90000.0000 - val_tp: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/1000
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.1064 - auc: 0.5185 - fn: 30082.8945 - fp: 467.4021 - loss: 2.4946 - precision: 0.1167 - recall: 0.0021 - tn: 270824.9062 - tp: 60.6979 - val_accuracy: 0.1187 - val_auc: 0.5759 - val_fn: 10000.0000 - val_fp: 0.0000e+00 - val_loss: 2.2730 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_tn: 90000.0000 - val_tp: 0.0000e+00 - learning_rate: 0.0010
Epoch 3/1000
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 