### CNN - Convolutional Neural Networks

Instead of nodes with activation functions (Neural Networks), CNNs learn convolution matrices. Convolution (e.g. edge detection) is a very popular operator on images. A model / network that is adapted to it's problem usually has better results.

In [None]:
# import
import gzip
import numpy as np

import keras.backend as K
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout
from keras.models import Sequential
from keras.utils import to_categorical

In [None]:
# FashionMNIST: https://github.com/zalandoresearch/fashion-mnist
def open_images(filename):
    with gzip.open(filename, "rb") as file:
        data = file.read()
        return np.frombuffer(data, dtype=np.uint8, offset=16)\
            .reshape(-1, 28, 28)\
            .astype(np.float32)


def open_labels(filename):
    with gzip.open(filename, "rb") as file:
        data = file.read()
        return np.frombuffer(data, dtype=np.uint8, offset=8)

In [None]:
# Prepare data
x_train = open_images("../res/fashionmnist/train-images-idx3-ubyte.gz")
y_train = open_labels("../res/fashionmnist/train-labels-idx1-ubyte.gz")

x_test = open_images("../res/fashionmnist/t10k-images-idx3-ubyte.gz")
y_test = open_labels("../res/fashionmnist/t10k-labels-idx1-ubyte.gz")

y_train = to_categorical(y_train) # Train model which detects all classes
y_test = to_categorical(y_test)

In [None]:
# Initialize model
model = Sequential()

# Add convolutional layer
model.add( Conv2D(
    filters = 30, 
    kernel_size = (3, 3), 
    activation = "relu", 
    input_shape = (28, 28, 1)))

# Add layer that 
model.add(Flatten())

# Add output layer
model.add(Dense(10, activation = "softmax"))

In [None]:
# Compile model
model.compile(optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = ["accuracy"])

In [None]:
# Train model
model.fit(
    x_train.reshape(-1, 28, 28, 1),
    y_train,
    epochs = 10,
    batch_size = 1000)

In [None]:
# Evaluate using TRAIN data
print(model.metrics_names)
print(model.evaluate(x_test.reshape(-1, 28, 28, 1), y_test))

### Visualize Learned matrices

In [None]:
# Visualize learned layers
# K.eval loads data to CPU (might be stored on GPU)
learned_matrices = K.eval(model.layers[0].weights[0])
# print(data)

In [None]:
# Visualize learned matrices
import matplotlib.pyplot as plt
columns = int(np.round(learned_matrices.shape[3]/5))
rows = 5

fig=plt.figure(figsize=(2.0*columns, 2.0*rows))
plt.axis('off')

for i in range(1, columns*rows):
    if i == learned_matrices.shape[3]-1:
        break
    img = learned_matrices[:, :, :, i].reshape(3, 3)
    fig.add_subplot(rows, columns, i)
    plt.imshow(img)
    plt.axis('off')

### Visualize result / operation of a convolutional layer

In [None]:
# Initialize second model
model2 = Sequential()

# Copy convolutional layer of first model into second model
model2.add( Conv2D(
    filters = 30,
    kernel_size = (3, 3), 
    activation = "sigmoid", 
    input_shape = (28, 28, 1),
    weights = model.layers[0].get_weights() ))

In [None]:
# Visualize sample image
%matplotlib inline
import matplotlib.pyplot as plt

plt.imshow(x_test[0], cmap = "gray_r")
plt.show()

In [None]:
# Forward sample image throw layer (model)
result = model2.predict(x_test[0].reshape(1, 28, 28, 1))

In [None]:
# Visualize result of all convolutional matrices
columns = int(np.round(result.shape[3]/5))
rows = 5

fig=plt.figure(figsize=(2.0*columns, 2.0*rows))
plt.axis('off')

for i in range(1, columns*rows):
    if i == result.shape[3]-1:
        break
    img = result[0][:, :, i]
    fig.add_subplot(rows, columns, i)
    plt.imshow(img)
    # plt.imshow(img, cmap = "gray_r")
    plt.axis('off')

### Max-Pooling Layer
Operation: Get highest value within matrix.
- Makes model more robust / generic
- Smaller output layer > faster learning rate

In [None]:
# Initialize model
model3 = Sequential()

# Add convolutional layer
model3.add( Conv2D(
    filters = 10, 
    kernel_size = (3, 3), 
    activation = "relu", 
    input_shape = (28, 28, 1)))

# Add MaxPooling layer
model3.add( MaxPooling2D(pool_size = (2, 2) ))

# Add layer that 
model3.add(Flatten())

# Add output layer
model3.add(Dense(10, activation = "softmax"))

In [None]:
# Compile model
model3.compile(optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = ["accuracy"])

In [None]:
# Train model
model3.fit(
    x_train.reshape(-1, 28, 28, 1),
    y_train,
    epochs = 10,
    batch_size = 1000)

In [None]:
# Evaluate using TRAIN data
print(model3.metrics_names)
print(model3.evaluate(x_test.reshape(-1, 28, 28, 1), y_test))

### Visualize CNN
An Interactive Node-Link Visualization of Convolutional Neural Networks
https://www.popsci.com/gaze-inside-mind-artificial-intelligence/

### Dropout
Idea: For each epoch, randomly remove some connections (during training) in net to make it more generic.
Apply a Dropout layer when your model performance is a lot better on training dataset compared to test dataset (possibly overfitting).

In [None]:
# Initialize model
model4 = Sequential()

# Add convolutional layer
model4.add( Conv2D(
    filters = 10, 
    kernel_size = (3, 3), 
    activation = "relu", 
    input_shape = (28, 28, 1)))

# Add MaxPooling layer
model4.add( MaxPooling2D(pool_size = (2, 2) ))

# Add dropout layer (randomly removes some connections in each epoch)
model4.add(Dropout(0.25))

# Add layer that 
model4.add(Flatten())

# Add output layer
model4.add(Dense(100, activation = "sigmoid"))

# Add output layer
model4.add(Dense(10, activation = "softmax"))

In [None]:
# Compile model
model4.compile(optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = ["accuracy"])

In [None]:
# Train model
model4.fit(
    x_train.reshape(-1, 28, 28, 1),
    y_train,
    epochs = 10,
    batch_size = 1000)

In [None]:
# Evaluate using TRAIN data
print(model4.metrics_names)
print(model4.evaluate(x_train.reshape(-1, 28, 28, 1), y_train))
print(model4.evaluate(x_test.reshape(-1, 28, 28, 1), y_test))