# CNN Architectures
- typical architecture: stack a few conv. layers with each followed followed by a ReLU layer, then a pooling layer, then more conv. layers w/ ReLU layer after, then another pooling layer, and so on
- Image gets smaller and smaller but also gets deeper and deeper (more feature maps) as it progresses down the network
- <img src="images/CNNArchitecture.jpeg" width=500/> typical CNN architecture
- **Don't use large kernel sizes (e.g. 5x5) for conv. layers. Instead use two smaller layers (e.g. 3x3) because it uses fewer parameters, has fewer computations, and has better performance**
    - Exception on the first conv. layers because it will reduce the spatial dimensions without losing much information

In [1]:
from tensorflow.keras.datasets import fashion_mnist

(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

In [2]:
from sklearn.model_selection import train_test_split
import numpy as np

X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2)

#normalize values
X_mean = X_train.mean(axis=0, keepdims=True)
X_std = X_train.std(axis=0, keepdims=True) + 1e-7
X_train = (X_train - X_mean) / X_std
X_val = (X_val - X_mean) / X_std
X_test = (X_test - X_mean) / X_std


X_train = X_train[..., np.newaxis] # newaxis puts all grayscale values into an array of size 1 so that the shape will be [height, width, channel]
X_val = X_val[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [3]:
# # Import mlcompute module to use the optional set_mlc_device API for device selection with ML Compute.
# from tensorflow.python.compiler.mlcompute import mlcompute

# # Select CPU device.
# mlcompute.set_mlc_device(device_name='cpu') # Available options are 'cpu', 'gpu', and 'any'.

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout

model = Sequential([
    Conv2D(filters=64, kernel_size=7, activation="relu", padding="same", input_shape=[28, 28, 1]), # account for grayscale channel
    MaxPool2D(pool_size=2),
    #use two conv layers w/ small kernel size (3X3) 
    Conv2D(filters=128, kernel_size=3, activation="relu", padding="same"),
    Conv2D(filters=128, kernel_size=3, activation="relu", padding="same"),
    MaxPool2D(pool_size=2),
    Conv2D(filters=256, kernel_size=3, activation="relu", padding="same"),
    Conv2D(filters=256, kernel_size=3, activation="relu", padding="same"),
    MaxPool2D(pool_size=2),
    #Flatten before feeding outptus to the regular feed-forward portion of the network
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(64, activation="relu"),
    Dropout(0.5),
    Dense(10, activation="softmax")
])

In [None]:
model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=1, validation_data=(X_val, y_val))

Epoch 1/5

In [8]:
score = model.evaluate(X_test, y_test)



In [14]:
score

[2.358581304550171, 0.09520000219345093]