# Livrable 2 - Image processing

## The subject
The goal is to process a set of photographs by denoising them in order to make them better processable by Machine Learning algorithms. In this Jupyter notebook we will explain the pre-processing steps. The algorithms will rely on convolutional auto-encoders, and apply them to improve the image quality.

## Import

### Disable Tensorflow's warnings

In [None]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
RUN_DIR = 'tf/'

In [None]:
import sys
import time
import tensorflow as tf
import numpy as np
from keras.utils import load_img, img_to_array
from matplotlib import pyplot as plt
from skimage.util import random_noise
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile

## Global Variables

In [None]:
BATCH_SIZE: int = 34
IMG_HEIGHT: int = 228
IMG_WIDTH: int = 228
EPOCHS: int = 20
ZIP_PATH: str = 'https://raw.githubusercontent.com/Stan-fld/auto_encoder_data/main/data_ae.zip'
DATASET_PATH: str = RUN_DIR + 'data_ae'

## Progress bar

In [None]:
def progressbar(it, prefix="", size=60, file=sys.stdout):
    count = len(it)

    def show(j):
        x = int(size * j / count)
        file.write("%s[%s%s] %i/%i\r" % (prefix, "#" * x, "." * (size - x), j, count))
        file.flush()
        file.write("\n")

    show(0)
    for i, item in enumerate(it):
        yield item
        show(i + 1)
    file.flush()

## Load dataset

### Import dataset from github

In [None]:
http_response = urlopen(ZIP_PATH)
zipfile = ZipFile(BytesIO(http_response.read()))
zipfile.extractall(path=f'{RUN_DIR}data_ae')

### Normal datasets

In [None]:
def get_images_as_array(folder_dir, name):
    images = []
    for img in progressbar(os.listdir(folder_dir), f'Generate dataset {name} : ', 50):
        time.sleep(0.1)
        if img.endswith(".jpg"):
            image = load_img(f"{folder_dir}/{img}", target_size=(IMG_HEIGHT, IMG_WIDTH))
            images.append(img_to_array(image))
    return np.array(images) / 255


# Train
train_data = get_images_as_array(DATASET_PATH + '/training', 'training')
# Validation
val_data = get_images_as_array(DATASET_PATH + '/validation', 'validation')

### Noisy datasets

In [None]:
def noisy_gauss(images, name):
    noisy_images = []
    for image in progressbar(images, f'Generate dataset {name} : ', 50):
        image = random_noise(image, mode='gaussian', mean=0, var=0.3)
        image = random_noise(image, mode='s&p', amount=0.2, salt_vs_pepper=0.5)
        image = random_noise(image, mode='poisson')
        image = random_noise(image, mode='speckle', mean=0, var=0.1)
        noisy_images.append(image)
    return np.array(noisy_images)


# Train noisy
train_noisy_data = noisy_gauss(train_data.copy(), 'training noisy')

# Validation noisy
val_noisy_data = noisy_gauss(val_data.copy(), 'validation noisy')

x_train = train_noisy_data
y_train = train_data

x_val = val_noisy_data
y_val = val_data

## Implementation of functions to display the images.

In [None]:
def display_single_image(img):
    plt.figure(figsize=(4, 4))
    plt.imshow(img)
    plt.axis("off")


def display_image(x, n):
    plt.figure(figsize=(20, 2))
    for i in range(n):
        ax = plt.subplot(1, n, i + 1)
        plt.imshow(np.array(x[i]), vmax=1)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

In [None]:
#Print an image of both datasets for testing
display_single_image(y_train[0])
display_single_image(x_train[0])

In [None]:
print("Training set")
display_image(y_train, 5)
print("Noisy training set")
display_image(x_train, 5)

print("Validation Set")
display_image(y_val, 5)
print("Noisy validation set")
display_image(x_val, 5)

## Encoder

In [None]:
inputs = tf.keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))

encoder = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
encoder = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(encoder)
encoder = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(encoder)
encoder = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(encoder)

## Decoder

In [None]:
decoder = tf.keras.layers.Conv2DTranspose(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(encoder)
decoder = tf.keras.layers.UpSampling2D((2, 2))(decoder)
decoder = tf.keras.layers.Conv2DTranspose(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(decoder)
decoder = tf.keras.layers.UpSampling2D((2, 2))(decoder)
decoder = tf.keras.layers.Conv2DTranspose(filters=3, kernel_size=(3, 3), activation='sigmoid', padding='same')(decoder)

## Auto encoder

In [None]:
auto_encoder = tf.keras.Model(inputs, decoder, name="auto_encoder")

auto_encoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy')

## Save the best model

In [None]:
filename = RUN_DIR + "models/best_model.h5"
callback_best_model = tf.keras.callbacks.ModelCheckpoint(filepath=filename, verbose=0, save_best_only=True)

## Train the model

In [None]:
# auto_encoder.summary()

with tf.device('/GPU:0'):
    history = auto_encoder.fit(x_train, y_train,
                               batch_size=BATCH_SIZE,
                               epochs=EPOCHS,
                               verbose=1,
                               shuffle=True,
                               validation_data=(x_val, y_val),
                               callbacks=[callback_best_model]
                               )

## Loss curve

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(EPOCHS)
plt.figure(figsize=(16, 8))
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()