## Importing the libraries

In [None]:
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from matplotlib.image import imread

import os

import cv2

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix

from tensorflow.keras.utils import to_categorical

from keras.models import Sequential

from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras.optimizers import Adam

## 加載圖片，並轉換成灰色

In [None]:
def load_data(data_dir):
    images = []
    labels = []
    for i in range(10):
        folder = os.path.join(data_dir, str(i))
        for filename in os.listdir(folder):
            img = cv2.imread(os.path.join(folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                images.append(img)
                labels.append(i)
    return np.array(images), np.array(labels)

data_dir = '/kaggle/input/corrupted-mnist/mnist'
images, labels = load_data(data_dir)

## 切分成訓練資料、測試資料，9:1的比例

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size = 0.1, stratify = labels, random_state = 3)

## 隨機顯示訓練資料的9張圖片

In [None]:
def display_rand_images(images, labels):
    plt.figure(1 , figsize = (19 , 10))
    n = 0 
    for i in range(9):
        n += 1 
        r = np.random.randint(0 , images.shape[0] , 1)
        
        plt.subplot(3 , 3 , n)
        plt.subplots_adjust(hspace = 0.3 , wspace = 0.3)
        plt.imshow(images[r[0]])
        
        plt.title('Number : {}'.format(labels[r[0]]))
        plt.xticks([])
        plt.yticks([])
        
    plt.show()
    
display_rand_images(X_train, y_train)

In [None]:
X_train = X_train.reshape(X_train.shape[0], 32, 32, 1).astype('float32')

X_test = X_test.reshape(X_test.shape[0], 32, 32, 1).astype('float32')

print('X_train:', X_train.shape)

print('X_test:', X_test.shape)

## 資料標準化

In [None]:
X_train = X_train / 255

X_test = X_test / 255

In [None]:
y_TrainOneHot = to_categorical(y_train)

y_TestOneHot = to_categorical(y_test)

# y_TrainOneHot[:5]

## 建立CNN模型

In [None]:
# 建立模型

# 建立一個Sequential模型
model = Sequential()

# 第一層卷積層，32個5x5的過濾器
model.add(Conv2D(32, kernel_size=(5, 5), padding='same',activation='relu', input_shape=(32,32,1)))

# 池化層，池化窗口大小為5x5
model.add(MaxPooling2D(pool_size=(2,2)))

# 第二層卷積層，64個3x3的過濾器
model.add(Conv2D(64, kernel_size=(5, 5), activation='relu'))

# 池化層，池化窗口大小為2x2
model.add(MaxPooling2D(pool_size=(2,2)))

# 第三層卷積層，128個5x5的過濾器
model.add(Conv2D(128, kernel_size=(5, 5), activation='relu'))

# 池化層，池化窗口大小為2x2
model.add(MaxPooling2D(pool_size=(2,2)))

# Dropout層，隨機丟棄25%的神經元
model.add(Dropout(0.25))

# Flatten層，將多維輸入一維化，常用在從卷積層到全連接層的過渡
model.add(Flatten())

# 全連接層，256個輸出單元
model.add(Dense(256, activation='relu'))

# 輸出層，有10個類別，所以輸出層的輸出維度為10
model.add(Dense(10, activation='softmax'))

In [None]:
model.summary()      

## 訓練模型

In [None]:
# 選擇損失函數、優化方法及成效衡量方式
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

train_history = model.fit(x=X_train, y=y_TrainOneHot, validation_split=0.2, epochs=20, batch_size=100, verbose=2)

## 評估訓練結果

In [None]:
def show_train_history(train_history, train, validation):

    plt.plot(train_history.history[train])

    plt.plot(train_history.history[validation])

    plt.title('Train History')

    plt.ylabel('train')

    plt.xlabel('Epoch')

    plt.legend(['train', 'validation'], loc='upper left')

    plt.show()
    
# 顯示訓練過程的準確率
show_train_history(train_history, 'accuracy', 'val_accuracy')

# 顯示訓練過程的誤差率
show_train_history(train_history, 'loss', 'val_loss')

## 使用未參與訓練的測試資料測試，模型準確率

In [None]:
scores = model.evaluate(X_test,y_TestOneHot,verbose=0)

print('Accuracy = ', scores[1])

## 混淆矩陣

In [None]:
y_pred = np.argmax(model.predict(X_test), axis=-1)
y_true = np.argmax(y_TestOneHot, axis=-1)
confusion_matrix = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10,7))
sns.heatmap(confusion_matrix, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()