In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import pandas as pd
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras import Model, regularizers
from tensorflow.keras.layers import Input, Dense, Dropout, Conv2D, MaxPooling2D, UpSampling2D, Conv2DTranspose
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
#os.listdir('/kaggle/input/mnist-in-csv')

In [None]:
train_df = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_train.csv')
train_df.sample(10)

In [None]:
test_df = pd.read_csv('/kaggle/input/mnist-in-csv/mnist_test.csv')
test_df.sample(10)

In [None]:
train_x = train_df.iloc[:, 1:].copy()
test_x = test_df.iloc[:, 1:].copy()

print ('data type: {}'.format(type(train_x)))
print ('train_x shape: {}'.format(train_x.shape))
print ('test_x shape: {}'.format(test_x.shape))

In [None]:
x_train = train_x.to_numpy()
x_train_nor = x_train.astype('float32') / 255

x_test = test_x.to_numpy()
x_test_nor = x_test.astype('float32') / 255

print (type(x_train))
print (x_train_nor.shape)

In [None]:
x_train = np.reshape(x_train, (x_train.shape[0], 28, 28, 1))
x_train_nor = np.reshape(x_train_nor, (x_train_nor.shape[0], 28, 28, 1))

print ('x_train shape: {}'.format(x_train.shape))
print ('x_train_nor shape: {}'.format(x_train_nor.shape))

x_test = np.reshape(x_test, (x_test.shape[0], 28, 28, 1))
x_test_nor = np.reshape(x_test_nor, (x_test_nor.shape[0], 28, 28, 1))

print ('x_test shape: {}'.format(x_test.shape))
print ('x_test_nor shape: {}'.format(x_test_nor.shape))

In [None]:
plt.imshow(x_train[0].reshape(28,28))
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
#-- 設定干擾(噪音)元素 --#
noise_factor = 0.5

#-- 將干擾值加入原始資料 --#
x_train_noisy = x_train_nor + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train_nor.shape)

# np.random.normal >> normal(Gaussian) distribution 隨機常態分佈
# loc = mean 平均值
# scale = std. 標準差
# size >> output shape

x_test_noisy = x_test_nor + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test_nor.shape)

#-- 限制陣列數值範圍於０~１之間 --#
x_train_noisy = np.clip(x_train_noisy, a_min=0., a_max=1.)
x_test_noisy = np.clip(x_test_noisy, a_min=0., a_max=1.)

# clip(limit) the values in an array
# set interval [0, 1]
# if values smaller than 0, that become 0
# if values larger than 1, that become 1

In [None]:
n = 10

fig, axes = plt.subplots(nrows=1, ncols=n, figsize=(20,2))
for i, ax in enumerate(axes.flat):
    ax.imshow(x_train_noisy[i].reshape(28,28), cmap='gray')
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()

In [None]:
input_layer = Input(shape=(28, 28, 1))

x = Conv2D(filters=32,
           kernel_size=(3,3),
           padding='same',
           activation='relu')(input_layer)

x = MaxPooling2D(pool_size=(2,2),
                 strides=(2,2))(x)

x = Conv2D(filters=32,
           kernel_size=(3,3),
           padding='same',
           activation='relu')(x)

encoder_layer = MaxPooling2D(pool_size=(2,2),
                             strides=(2,2))(x)

x = Conv2D(filters=32,
           kernel_size=(3,3),
           padding='same',
           activation='relu')(encoder_layer)

x = UpSampling2D(size=(2,2))(x)

x = Conv2D(filters=32,
           kernel_size=(3,3),
           padding='same',
           activation='relu')(x)

x = UpSampling2D(size=(2,2))(x)

decoder_layer = Conv2D(filters=1,
                       kernel_size=(3,3),
                       padding='same',
                       activation='sigmoid')(x)

#-- 建立 AutoEncoder --#
conv_auto_encoder = Model(input_layer, decoder_layer)

In [None]:
conv_auto_encoder.summary()

In [None]:
conv_auto_encoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
callbacks = ModelCheckpoint(filepath='/kaggle/working/',
                            monitor='val_loss',
                            save_best_only=True)

In [None]:
history = conv_auto_encoder.fit(x_train_noisy, x_train_nor,
                                epochs=200, batch_size=128,
                                validation_data=(x_test_noisy, x_test),
                                shuffle=True, callbacks=[callbacks])

In [None]:
decoded = conv_auto_encoder.predict(x_train_noisy)

In [None]:
n = 10

fig = plt.figure(figsize=(16,4), tight_layout=True)

for i in range(n):
    fig.add_subplot(2,n,i+1)
    plt.imshow(x_train_noisy[i].reshape(28,28), cmap='gray')
    plt.xticks([])
    plt.yticks([])

    fig.add_subplot(2,n,i+11)
    plt.imshow(decoded[i].reshape(28,28), cmap='gray')
    plt.xticks([])
    plt.yticks([])

plt.show()