## Autoencoder convolucional

## Instalando bibliotecas

In [1]:
!pip install -q tensorflow==2.16.1

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib

2025-01-20 15:16:15.870250: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-20 15:16:15.874069: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-20 15:16:15.928504: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
tf.__version__, np.__version__, matplotlib.__version__

('2.16.1', '1.26.4', '3.8.4')

In [4]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import InputLayer, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Reshape
import matplotlib.pyplot as plt

## Carregamento base de dados

In [5]:
(X_treinamento, _ ), (X_teste, _) = mnist.load_data()

In [6]:
X_treinamento.shape, X_teste.shape

((60000, 28, 28), (10000, 28, 28))

## Preprocessamento da base - normalizacao

In [7]:
X_treinamento.shape, X_teste.shape

((60000, 28, 28), (10000, 28, 28))

In [8]:
X_treinamento = X_treinamento.reshape((len(X_treinamento),28,28,1))
X_teste = X_teste.reshape((len(X_teste),28,28,1))

In [9]:
X_treinamento.shape, X_teste.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

In [10]:
X_treinamento = X_treinamento.astype('float32') / 255
X_teste = X_teste.astype('float32') / 255

## Criação do Autoencoder

In [11]:
# 784 -> 128 -> 64 -> 32 -> 64 -> 128 -> 784
autoencoder = Sequential()

# Codificador
autoencoder.add(InputLayer(shape=(28,28,1)))
autoencoder.add(Conv2D(filters=16,kernel_size=(3,3),activation='relu'))
autoencoder.add(MaxPooling2D(pool_size=(2,2)))

autoencoder.add(Conv2D(filters=8,kernel_size=(3,3),activation='relu', padding='same'))
autoencoder.add(MaxPooling2D(pool_size=(2,2), padding='same'))

autoencoder.add(Conv2D(filters=8,kernel_size=(3,3),activation='relu', padding='same',strides=(2,2)))
autoencoder.add(Flatten())

## Decodificador
autoencoder.add(Reshape((4,4,8)))

autoencoder.add(Conv2D(filters=8,kernel_size=(3,3),activation='relu', padding='same'))
autoencoder.add(UpSampling2D(size=(2,2)))

autoencoder.add(Conv2D(filters=8,kernel_size=(3,3),activation='relu', padding='same'))
autoencoder.add(UpSampling2D(size=(2,2)))

autoencoder.add(Conv2D(filters=16,kernel_size=(3,3),activation='relu'))
autoencoder.add(UpSampling2D(size=(2,2)))

autoencoder.add(Conv2D(filters=1,kernel_size=(3,3),activation='sigmoid', padding='same'))

autoencoder.summary()

In [12]:
autoencoder.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
autoencoder.fit(X_treinamento,X_treinamento,epochs=50,batch_size=256,validation_data=(X_teste,X_teste))

Epoch 1/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 273ms/step - accuracy: 0.8002 - loss: 0.3533 - val_accuracy: 0.7900 - val_loss: 0.1689
Epoch 2/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 364ms/step - accuracy: 0.7949 - loss: 0.1614 - val_accuracy: 0.8002 - val_loss: 0.1429
Epoch 3/50
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 405ms/step - accuracy: 0.8025 - loss: 0.1402 - val_accuracy: 0.8055 - val_loss: 0.1305
Epoch 4/50
[1m146/235[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m34s[0m 387ms/step - accuracy: 0.8050 - loss: 0.1297

## Visualizar resultados

In [None]:
encoder = Model(inputs=autoencoder.get_layer('conv2d').input,outputs=autoencoder.get_layer('flatten').output)

encoder.summary()

In [None]:
imagens_codificadas = encoder.predict(X_teste)
imagens_codificadas.shape

In [None]:
imagens_decodificadas = autoencoder.predict(X_teste)
imagens_decodificadas.shape

In [None]:
numero_imagens = 10
imagens_teste = np.random.randint(X_teste.shape[0],size=numero_imagens)

In [None]:
plt.figure(figsize=(18,18))
for i,indice_imagem in enumerate(imagens_teste):
    # Imagem original 
    eixo = plt.subplot(10,10, i+1)
    plt.imshow(X_teste[indice_imagem].reshape(28,28))
    plt.xticks(())
    plt.yticks(())

    # Imagem codificada
    eixo = plt.subplot(10,10,i+1+numero_imagens)
    plt.imshow(imagens_codificadas[indice_imagem].reshape(16,8))
    plt.xticks(())
    plt.yticks(())

    # Imagem reconstruida 
    eixo = plt.subplot(10,10,i+1+numero_imagens*2)
    plt.imshow(imagens_decodificadas[indice_imagem].reshape(28,28))
    plt.xticks(())
    plt.yticks(())