# <h1>Denoising using Autoencoder model</h1>

Dataset from: https://github.com/kwcckw/shabby_data_normal_quality

In [None]:
# import libraries

import numpy as np
import cv2
import random
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import glob


# <h1> Data Preparation</h1>

In [None]:
# create dirs

train_input_path = '/kaggle/working/train/input_images/'
train_target_path = '/kaggle/working/train/target_images/'

val_input_path = '/kaggle/working/val/input_images/'
val_target_path = '/kaggle/working/val/target_images/'

test_input_path = '/kaggle/working/test/input_images/'
test_target_path = '/kaggle/working/test/target_images/'

os.makedirs(train_input_path)
os.makedirs(train_target_path)

os.makedirs(val_input_path)
os.makedirs(val_target_path)

os.makedirs(test_input_path)
os.makedirs(test_target_path)


In [None]:
# move data into the correct dirs

# train
!cp -ar /kaggle/input/shabby-data-normal-quality/images_normal_quality/cropped/train/ /kaggle/working/train/input_images/
!cp -ar /kaggle/input/shabby-data-normal-quality/images_normal_quality/cropped/train_cleaned/ /kaggle/working/train/target_images/

# validate
!cp -ar /kaggle/input/shabby-data-normal-quality/images_normal_quality/cropped/validate/ /kaggle/working/val/input_images/
!cp -ar /kaggle/input/shabby-data-normal-quality/images_normal_quality/cropped/validate_cleaned/ /kaggle/working/val/target_images/

# test
# !cp -ar /kaggle/input/shabby-data-normal-quality/images_low_quality/cropped/test/ /kaggle/working/test/input_images/
# !cp -ar /kaggle/input/shabby-data-normal-quality/images_low_quality/cropped/test_cleaned/ /kaggle/working/test/target_images/

In [None]:
# Add new data

os.makedirs('/kaggle/working/denoising-dirty-documents/')

!unzip /kaggle/input/denoising-dirty-documents/train_cleaned.zip -d /kaggle/working/denoising-dirty-documents/
!unzip /kaggle/input/denoising-dirty-documents/train.zip -d /kaggle/working/denoising-dirty-documents/

!cp -ar /kaggle/working/denoising-dirty-documents/train/* /kaggle/working/train/input_images/train
!cp -ar /kaggle/working/denoising-dirty-documents/train_cleaned/* /kaggle/working/train/target_images/train_cleaned



In [None]:
# training params

batch_size = 32
epoch_size = 150

In [None]:
# create training generators

train_input_data_gen = ImageDataGenerator(rescale=1./255)
train_target_data_gen = ImageDataGenerator(rescale=1./255)

train_input_image_generator = train_input_data_gen.flow_from_directory(
    train_input_path,
    batch_size=batch_size,
    color_mode = 'grayscale',
    target_size=(400, 400),
    class_mode=None,
    shuffle=False,
    seed=0)

train_target_image_generator = train_target_data_gen.flow_from_directory(
    train_target_path,
    batch_size=batch_size,
    color_mode = 'grayscale',
    target_size=(400, 400),
    class_mode=None,
    shuffle=False,
    seed=0)

train_generator = zip(train_input_image_generator, train_target_image_generator)


In [None]:
# create validation generator

val_input_data_gen = ImageDataGenerator(rescale=1./255)
val_target_data_gen = ImageDataGenerator(rescale=1./255)

val_input_image_generator = val_input_data_gen.flow_from_directory(
    val_input_path,
    batch_size=batch_size,
    color_mode = 'grayscale',
    target_size=(400, 400),
    class_mode=None,
    shuffle=False,
    seed=0)

val_target_image_generator = val_target_data_gen.flow_from_directory(
    val_target_path,
    batch_size=batch_size,
    color_mode = 'grayscale',
    target_size=(400, 400),
    class_mode=None,
    shuffle=False,
    seed=0)

val_generator = zip(val_input_image_generator, val_target_image_generator)

In [None]:
# Display some training images and target images

from matplotlib import pyplot as plt

n = 0
for train, target in zip(train_input_image_generator, train_target_image_generator):
    plt.figure()
    plt.subplot(121)
    plt.imshow((train[0][:,:,0]*255).astype('uint8'),cmap='gray')
    plt.subplot(122)
    plt.imshow((target[0][:,:,0]*255).astype('uint8'),cmap='gray')
    n+=1
    if n >5:
        break
    

# <h1> Create and train model </h1>

In [None]:
# Create model

def autoencoder():

    model = Sequential()

    # input layer
    model.add(layers.Input(shape=(400,400, 1)))

    # encoder section
    model.add(layers.Conv2D(32, (3, 3), activation='relu',strides=2,padding='same'))
    model.add(layers.Conv2D(64, (3, 3), activation='relu',strides=2,padding='same'))
    model.add(layers.BatchNormalization())
    

    # decoder section
    model.add(layers.Conv2DTranspose(64, (3, 3), activation='relu',strides=2,padding='same'))
    model.add(layers.Conv2DTranspose(32, (3, 3), activation='relu',strides=2,padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2DTranspose(1, (3, 3), activation='sigmoid',strides=1, padding='same'))

    # compile model
    model.compile(optimizer='adam' , loss='mean_squared_error', metrics=['mae'])

    #print model summary
    model.summary()

    return model

# create model
model = autoencoder()

In [None]:
# fit model

training_sample = train_input_image_generator.samples
validate_sample = val_input_image_generator.samples

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


model.fit(
        train_generator,
        steps_per_epoch=np.ceil(training_sample/batch_size),
        epochs=epoch_size,
        validation_data=val_generator,
        validation_steps=np.ceil(validate_sample/batch_size),
        callbacks=[callback]
        )


# <h1> Predict clean image and get submission file</h1>

In [None]:
%cd /kaggle/working/

In [None]:
!git clone https://github.com/kwcckw/shabby_images/

In [None]:
test_input_path = "/kaggle/working/shabby_images/Datasets/test"

In [None]:
def preprocess(path):

    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img = np.asarray(img, dtype="float32")
    img = img/255.0 #Scaling the pixel values
    
    return img.reshape(400,400,1)

In [None]:
# get testing image

img_test_path = sorted(glob.glob(test_input_path+'/input/*'))

test_imgs = []
for file_path in img_test_path:
    test_imgs.append(preprocess(file_path))
test_imgs = np.asarray(test_imgs)

In [None]:
# get cleaned images using trained model

img_predicted = model.predict(test_imgs, batch_size=2)
for i, (predicted, testing_path) in enumerate(zip(img_predicted, img_test_path)):
    predicted_sequeeze = (np.squeeze(predicted) * 255).astype("uint8")
    cv2.imwrite(test_target_path+os.path.basename(testing_path), predicted_sequeeze)

In [None]:
# get cleaned images (optional)

from IPython.display import FileLink

!zip -r output_images.zip /kaggle/working/test/target_images/
FileLink(r'output_images.zip')

In [None]:
# Display some input testing image and cleaned image from the model

from matplotlib import pyplot as plt

n = 0
for noisy_path in img_test_path:
    
    clean_path = test_target_path + os.path.basename(noisy_path)
    
    img_noisy = cv2.imread(noisy_path, cv2.IMREAD_GRAYSCALE)
    img_clean = cv2.imread(clean_path, cv2.IMREAD_GRAYSCALE)
    
    plt.figure()
    plt.subplot(121)
    plt.imshow(img_noisy,cmap='gray')
    plt.subplot(122)
    plt.imshow(img_clean,cmap='gray')
    n+=1
    if n >5:
        break
    

In [None]:
# create submission file


cleaned_images_dir = '/kaggle/working/test/target_images/'

def select_pixels(img):
    y,x = img.shape

    pixels = list()

    for i in range(10000):
        pixel = (random.randrange(y), random.randrange(x))

        if pixel not in pixels:
            pixels.append(pixel)

    return pixels


random.seed(0)

cleaned_images = sorted(os.listdir(cleaned_images_dir))

with open("submission.csv", "w") as submission_file:
    submission_file.write("id,predicted\n")

    print("Processing images...")
    filenum = 1
    for image in tqdm(cleaned_images):
        
        img = cv2.imread(cleaned_images_dir + image, cv2.IMREAD_GRAYSCALE)
        pixels = select_pixels(img)

        for pixel in pixels:
            y,x = pixel
            submission_file.write("{}_{}_{},{}\n".format(filenum, y, x, img[y][x]/255.0))

        filenum += 1
    print('Done!')

In [None]:
# get submission file

from IPython.display import FileLink

FileLink(r'submission.csv')