<left><img src="https://i.ibb.co/zWjkHsWJ/marca-final-rgb-campanha-2025-versao02.png" width="35%" height="35%"></left>

# Tópicos em Matemática Aplicada: Deep Learning (Aula 07)

Data: 23/set/25


In [1]:
import os
os.environ["KERAS_BACKEND"] = "torch"

In [2]:
import keras
import torch
from keras.datasets import cifar10
from keras.models import Sequential
from keras import datasets, layers, models
from keras.utils import to_categorical
from keras import regularizers
from keras.layers import Dense, Dropout, BatchNormalization
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns

# Checa a versão do keras e verifica se está de fato usando o Pytorch
print(f"Versão do Keras: {keras.__version__}")
print(f"Keras está usando o backend: {keras.backend.backend()}")
# Verifica se a GPU está disponível e define o dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando o dispositivo: {device}")

Versão do Keras: 3.10.0
Keras está usando o backend: torch
Usando o dispositivo: cuda


In [3]:
# Use o backend PyTorch

from keras import layers, regularizers
from keras import optimizers
from keras import callbacks
from keras import losses, metrics

# Configurações
NUM_CLASSES = 10
BATCH_SIZE = 128
EPOCHS = 75
WEIGHT_DECAY = 1e-4
SEED = 42

keras.utils.set_random_seed(SEED)

# Dados: CIFAR-10 (numpy arrays, compatível com qualquer backend)
from keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = y_train.squeeze().astype("int32")
y_test = y_test.squeeze().astype("int32")

# Bloco residual (BasicBlock) pós-ativação
def residual_block(x, filters, stride=1, weight_decay=1e-4):
    shortcut = x

    y = layers.Conv2D(
        filters, 3, strides=stride, padding="same",
        use_bias=False, kernel_initializer="he_normal",
        kernel_regularizer=regularizers.l2(weight_decay),
    )(x)
    y = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)(y)
    y = layers.ReLU()(y)

    y = layers.Conv2D(
        filters, 3, strides=1, padding="same",
        use_bias=False, kernel_initializer="he_normal",
        kernel_regularizer=regularizers.l2(weight_decay),
    )(y)
    y = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)(y)

    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = layers.Conv2D(
            filters, 1, strides=stride, padding="same",
            use_bias=False, kernel_initializer="he_normal",
            kernel_regularizer=regularizers.l2(weight_decay),
        )(shortcut)
        shortcut = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)(shortcut)

    out = layers.Add()([y, shortcut])
    out = layers.ReLU()(out)
    return out

def build_resnet20(input_shape=(32, 32, 3), num_classes=10, weight_decay=1e-4):
    inputs = keras.Input(shape=input_shape)

    # Escala para [0,1]
    CIFAR10_MEAN = [0.4914, 0.4822, 0.4465]
    CIFAR10_STD = [0.2023, 0.1994, 0.2010]
    # A camada Normalization usa a VARIÂNCIA (desvio padrão ao quadrado)
    CIFAR10_VARIANCE = [std**2 for std in CIFAR10_STD]

    # Passo 1: Escala para [0,1]
    x = layers.Rescaling(1/255.0)(inputs)
    # Passo 2: Aplica a normalização de média e desvio padrão
    x = layers.Normalization(mean=CIFAR10_MEAN, variance=CIFAR10_VARIANCE)(x)

    # Data augmentation (apenas no treino)
    x = layers.ZeroPadding2D(4)(x)
    x = layers.RandomCrop(32, 32)(x)
    x = layers.RandomFlip("horizontal")(x)
    # x = layers.RandomRotation(0.1)(x)  # opcional

    # Conv inicial 3x3, 16 filtros
    x = layers.Conv2D(
        16, 3, strides=1, padding="same",
        use_bias=False, kernel_initializer="he_normal",
        kernel_regularizer=regularizers.l2(weight_decay),
    )(x)
    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
    x = layers.ReLU()(x)

    # Estágio 1: 3 blocos 16 filtros
    x = residual_block(x, 16, stride=1, weight_decay=weight_decay)
    x = residual_block(x, 16, stride=1, weight_decay=weight_decay)
    x = residual_block(x, 16, stride=1, weight_decay=weight_decay)

    # Estágio 2: 3 blocos 32 filtros (downsample no primeiro)
    x = residual_block(x, 32, stride=2, weight_decay=weight_decay)
    x = residual_block(x, 32, stride=1, weight_decay=weight_decay)
    x = residual_block(x, 32, stride=1, weight_decay=weight_decay)

    # Estágio 3: 3 blocos 64 filtros (downsample no primeiro)
    x = residual_block(x, 64, stride=2, weight_decay=weight_decay)
    x = residual_block(x, 64, stride=1, weight_decay=weight_decay)
    x = residual_block(x, 64, stride=1, weight_decay=weight_decay)

    # Cabeça de classificação
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(
        num_classes, activation="softmax",
        kernel_regularizer=regularizers.l2(weight_decay),
    )(x)

    model = keras.Model(inputs, outputs, name="ResNet20_CIFAR10")
    return model


model = build_resnet20(input_shape=(32, 32, 3), num_classes=NUM_CLASSES, weight_decay=WEIGHT_DECAY)
model.summary()

optimizer = optimizers.Adam(learning_rate=0.001)
loss = losses.SparseCategoricalCrossentropy()
metric_list = [metrics.SparseCategoricalAccuracy(name="acc")]

model.compile(optimizer=optimizer, loss=loss, metrics=metric_list)

# Lista de callbacks
cbs = [
    # Reduz o learning rate quando a val_loss não melhora por 3 épocas
    callbacks.ReduceLROnPlateau(
        monitor="val_loss", # Métrica a ser monitorada
        factor=0.2,         # Fator pelo qual o learning rate será reduzido (new_lr = lr * factor)
        patience=3,         # Número de épocas sem melhora para acionar a redução
        min_lr=1e-6,        # Limite inferior para o learning rate
        verbose=1           # Imprime uma mensagem quando o LR é atualizado
    ),
    # Salva o melhor modelo com base na menor val_loss
    callbacks.ModelCheckpoint(
        "resnet20_cifar10.weights.h5",
        monitor="val_loss",
        save_best_only=True,
        save_weights_only=True,
        mode="min", # "min" porque queremos minimizar a loss
    ),
    # Para o treinamento se a val_loss não melhorar por 10 épocas
    callbacks.EarlyStopping(
        monitor="val_loss",
        patience=10,
        restore_best_weights=True # Garante que o modelo final tenha os melhores pesos
    ),
]

# a função fit contendo o callback
history = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=1,
    shuffle=True,
    callbacks=cbs # Não se esqueça de passar os callbacks para o fit!
)

eval_loss, eval_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Acurácia no teste: {eval_acc:.4f}")


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step


Epoch 1/75
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 75ms/step - acc: 0.3519 - loss: 1.9400 - val_acc: 0.4250 - val_loss: 1.8152 - learning_rate: 0.0010
Epoch 2/75
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 74ms/step - acc: 0.5847 - loss: 1.3134 - val_acc: 0.5942 - val_loss: 1.3202 - learning_rate: 0.0010
Epoch 3/75
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 73ms/step - acc: 0.6691 - loss: 1.0910 - val_acc: 0.6902 - val_loss: 1.0702 - learning_rate: 0.0010
Epoch 4/75
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 73ms/step - acc: 0.7177 - loss: 0.9488 - val_acc: 0.6214 - val_loss: 1.2542 - learning_rate: 0.0010
Epoch 5/75
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 74ms/step - acc: 0.7502 - loss: 0.8653 - val_acc: 0.6614 - val_loss: 1.0796 - learning_rate: 0.0010
Epoch 6/75
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 73ms/step - acc: 0.7688 - loss: 