In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# The Convolutional Classifier

* Use modern `deep-learning` networks to build an `image classifier` with Keras
* Design your own `custom convnet` with reusable blocks
* Learn the fundamental ideas behind visual `feature extraction`
* Master the art of `transfer learning` to boost your models
* Utilize `data augmentation` to extend your dataset

Goal is to learn how a neural network can understand a natural image well-enough to solve the same kinds of problems the human visual system can solve. 

`Convolutional neural networks, convnet, CNN` is the best network for this task. Convolution is the mathematical operation that gives the layers of a convnet their unique structure. This `CNN` will be applied to `image classification`. At the end, should learn dvanced applications like `generative adversarial networks` and `image segmentation`.

### The Convolutional Classifier
A convnet used for image classification consists of two parts: a `convolutional base` and a `dense head`.
![](https://i.imgur.com/U0n5xjU.png)
- The base is used to `extract the features` from an image.
- The head is used to `determine the class` of the image. 

Visual feature could be a line, a color, a texture, a shape, a pattern -- or some complicated combination.
![](https://i.imgur.com/UUAafkn.png)

### Training the Classifier
The goal of the network during training is to learn two things:
1. which features to extract from an image (base),
2. which class goes with what features (head).

Convnets are rarely trained form scratch. Mostly `reuse the base of a pretrained model`. To the pretrained base can attach an `untrained head`. 
![](https://imgur.com/E49fsmV.png)
The head usually consists of only a few dense layers, very accurate classifiers can be created from relatively little data. Reusing a pretrained model is a technique known as `transfer learning`. 

### Example - Train a Convnet Classifier
Will create classifiers to solve: `is this a picture of a Car or of a Truck?`. 

In [None]:
# Step1: load the data: training split `ds_train` and validation split `ds_valid`
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed(31415)

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


# Load training and validation sets
ds_train_ = image_dataset_from_directory(
    '../input/car-or-truck/train',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=True,
)
ds_valid_ = image_dataset_from_directory(
    '../input/car-or-truck/valid',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=False,
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
ds_valid = (
    ds_valid_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

The most commonly used dataset for pretraining is [ImageNet](http://image-net.org/about-overview), a large dataset of many kind of natural images. Keras includes a variety models pretrained on ImageNet in its `applications` [module](https://www.tensorflow.org/api_docs/python/tf/keras/applications). The pretrained model will use is called `VGG16` which can import from [CV_Course_Models](https://www.kaggle.com/ryanholbrook/cv-course-models)

`pretrained_base.trainable = False` since in transfer learning, the loaded `pretrained_base` is already trained and no need to train again.

In [None]:
# Step2: Define Pretrained Base
pretrained_base = tf.keras.models.load_model(
    '../input/cv-course-models/cv-course-models/vgg16-pretrained-base',
)
pretrained_base.trainable = False

In [None]:
# Step3: Attach Classifier Head
from tensorflow import keras
from tensorflow.keras import layers

# `Flatten` layer transforms the two dimensional outputs of the base 
# into the one dimensional inputs needed by the head
model = keras.Sequential([
    pretrained_base,
    layers.Flatten(),
    layers.Dense(6, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])

In [None]:
# Step4: Train the model
# use the binary versions of crossentropy and accuracy
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=30,
)

When training a neural network, it's always a good idea to examine the `loss` and `metric` plots. The `history` object contains this information in a dictionary `history.history`.

In [None]:
import pandas as pd

history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

Learned about the structure of a convnet classifier: a `head` to act as a classifier atop of a `base` which performs the feature extraction. The head is an ordinary classifier. It uses those features extracted by the base. This is the basic idea behind convolutional classifiers: that we can attach a unit that performs feature engineering to the classifier itself.



### Exercise: The Convolutional Classifier
Used [car-or-truck](https://www.kaggle.com/ryanholbrook/car-or-truck), [computer-vision-resources](https://www.kaggle.com/ryanholbrook/computer-vision-resources) and [cv-courses-models](https://www.kaggle.com/ryanholbrook/cv-course-models) datasets.

In [None]:
from learntools.core import binder
binder.bind(globals())
from learntools.computer_vision.ex1 import *

# Imports
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


# Load training and validation sets
ds_train_ = image_dataset_from_directory(
    '../input/car-or-truck/train',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=True,
)
ds_valid_ = image_dataset_from_directory(
    '../input/car-or-truck/valid',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=False,
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
ds_valid = (
    ds_valid_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

# Load InceptionV1 model pretrained on ImageNet
import tensorflow_hub as hub

pretrained_base = tf.keras.models.load_model(
    '../input/cv-course-models/cv-course-models/inceptionv1'
)
pretrained_base.trainable = False

from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    pretrained_base,
    layers.Flatten(),
    layers.Dense(6, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
optimizer = tf.keras.optimizers.Adam(epsilon=0.01)
model.compile(
    optimizer=optimizer,
    loss = 'binary_crossentropy',
    metrics=['binary_accuracy'],
)
history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=30,
)

import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

---

# Convolution and ReLU


In [None]:
import numpy as np
from itertools import product

def show_kernel(kernel, label=True, digits=None, text_size=28):
    # Format kernel
    kernel = np.array(kernel)
    if digits is not None:
        kernel = kernel.round(digits)

    # Plot kernel
    cmap = plt.get_cmap('Blues_r')
    plt.imshow(kernel, cmap=cmap)
    rows, cols = kernel.shape
    thresh = (kernel.max()+kernel.min())/2
    # Optionally, add value labels
    if label:
        for i, j in product(range(rows), range(cols)):
            val = kernel[i, j]
            color = cmap(0) if val > thresh else cmap(255)
            plt.text(j, i, val, 
                     color=color, size=text_size,
                     horizontalalignment='center', verticalalignment='center')
    plt.xticks([])
    plt.yticks([])

Convolutional classifier has two parts: a convolutional `base` and a `head` of dense layers. The job of the base is to extract visual features from an image, which the head would then use to classify the image.
.
Will learn about the two most important types of layers that found in the base of  convolutional image classifier: `convolutional layer` with `ReLU activation` and `maximum pooling layer`

### Feature Extraction
The `feature extraction` performed by the base consists of three basic operations:
- `Filter` an image for a particular feature (convolution)
- `Detect` that feature within the filtered image (ReLU)
- `Condense` the image to enhance the features (maximum pooling)

![](https://i.imgur.com/IYO9lqp.png)
The figure shows how these three operations are able to isolate some particular characteristic of the original image.
- Network will perform several extractions in parallel on a single image.
- Not uncommon for the final layer in the base to be producing over 1000 unique visual features.

### Filter with Convolution
A convolutional layer carries out the `filtering` step. Convolutional layer in Keras can be define as:
```
layers.Conv2D(filters=64, kernel_size=3)
```


#### Weights
The `weights`, a convnet learns during training are primarily contained in its convolutional layers. These weights are called `kernels`. ![](https://i.imgur.com/uJfD9r9.png)

A kernel operates by scanning over an image and producing a weighted sum of pixel values. Kernel will act sort of like a polarized lens, emphasizing or deemphasizing certain patterns of information.
<figure>
<img src="https://i.imgur.com/j3lk26U.png" width="300">
</figure>

- Kernels define how a convolutional layer is connected to the layer. In above figure:
    - kernal will connect each neuron in the output to nine neurons in the input.
    - By setting the dimensions of the kernels with `kernel_size`, telling the convnet how to form these connections.
    - Most often, a kernel will have odd-numbered dimensions --> `kernel_size=(3, 3)` or `(5, 5)`

The kernels in a convolutional layer determine what kinds of features it creates. During training, a convnet tries to learn what features it needs to solve the classification problem. 

#### Activations
The `activations` in the network are called `feature maps`. They are what result when apply a filter to an image; contain the visual features the kernel extracts.
<figure>
<img src="https://i.imgur.com/JxBwchH.png" width="650">
</figure>

What a convolution accentuates in its inputs will match the shape of the positive numbers in the kernel.
- left and middle kernels will filter for horizontal shapes.
- With the `filters` parameter, can tell the convolutional layer how many feature maps want it to create as output.

### Detect with ReLU
After filtering, the feature maps pass through the activation function. The `rectifier function` has a graph like this:

<figure>
    <img src="https://i.imgur.com/DxGJuTH.png", width=450>
</figure>

A neuron with a rectifier attached is called a `rectified linear unit`. The `ReLU activation` can be defined in its own `Activation layer`, but most often can include the activation function in `Conv2D`.
```
layers.Conv2D(filters=64, kernel_size=3, activation='relu')
```

`Activation function` as scoring pixel values according to some measure of importance. The `ReLU activation` maps negative values to `0`.
<figure>
    <img src="https://i.imgur.com/dKtwzPY.png", width=650>
</figure>

Like other activation functions, `ReLU` function is nonlinear. Total effect of all the layers in the network becomes different than by just adding effects together. The nonlinearity ensures features will combine in interesting ways as they move deeper into the network.

### Example - Apply Convolution and ReLU
Will do extraction to understand better what convolutional networks are `behind the scenes`. This image will be used.

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')

image_path = '../input/computer-vision-resources/car_feature.jpg'
image = tf.io.read_file(image_path)
image = tf.io.decode_jpeg(image)

plt.figure(figsize=(6, 6))
plt.imshow(tf.squeeze(image), cmap='gray')
plt.axis('off')
plt.show();

In [None]:
'''
For the filtering step, define a kernel and apply convolution.
- kernel is for `edge detection` and define with `tf.constant`
- try to keep the sum of the numbers between 0 and 1
'''

import tensorflow as tf
import learntools.computer_vision.visiontools as visiontools

kernel = tf.constant([
    [-1, -1, -1],
    [-1, 8, -1],
    [-1, -1, -1],
])

# plt.figure(figsize=(3, 3))
visiontools.show_kernel(kernel)

TensorFlow includes many common operations performed by neural networks in its [tf.nn module](https://www.tensorflow.org/api_docs/python/tf/nn). `conv2d` and `relu` will be used. 

In [None]:
# Reformat for batch compatibility.
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.expand_dims(image, axis=0)
kernel = tf.reshape(kernel, [*kernel.shape, 1, 1])
kernel = tf.cast(kernel, dtype=tf.float32)

In [None]:
# conv2d
image_filter = tf.nn.conv2d(
    input=image,
    filters=kernel,

    strides=1,
    padding='SAME',
)

plt.figure(figsize=(6, 6))
plt.imshow(tf.squeeze(image_filter))
plt.axis('off')
plt.show();

In [None]:
# ReLU after conc2d
image_detect = tf.nn.relu(image_filter)
f1 = plt.figure(1)
plt.imshow(tf.squeeze(image_detect))
plt.axis('off')
plt.title("Figure for ReLU after conv2d")

# ----------------extra task--------------- 
# ReLU to original photo
image_detect = tf.nn.relu(image)
f2= plt.figure(2)
plt.imshow(tf.squeeze(image_detect))
plt.axis('off')
plt.title("Figure for ReLU without conv2d")
plt.show();

Images like these are what the head uses to solve its classification problem. 

Feature extraction: filter with `Conv2D` layers and detect with `relu` activation.

### Exercise: Convolution and ReLU


In [None]:
image_path = '../input/computer-vision-resources/car_illus.jpg'
image = tf.io.read_file(image_path)
image = tf.io.decode_jpeg(image, channels=1)
image = tf.image.resize(image, size=[400, 400])

img = tf.squeeze(image).numpy()
plt.figure(figsize=(6, 6))
plt.imshow(img, cmap='gray')
plt.axis('off')
plt.show();


import learntools.computer_vision.visiontools as visiontools
from learntools.computer_vision.visiontools import edge, bottom_sobel, emboss, sharpen

kernels = [edge, bottom_sobel, emboss, sharpen]
names = ["Edge Detect", "Bottom Sobel", "Emboss", "Sharpen"]

plt.figure(figsize=(12, 12))
for i, (kernel, name) in enumerate(zip(kernels, names)):
    plt.subplot(1, 4, i+1)
    visiontools.show_kernel(kernel)
    plt.title(name)
plt.tight_layout()


kernel = tf.constant([
    [-0.5, -1.5, -0.5],
    [1.5, 2, 1.5],
    [-0.5, -1.5, -0.5]
])
visiontools.show_kernel(kernel)


image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.expand_dims(image, axis=0)
kernel = tf.reshape(kernel, [*kernel.shape, 1, 1])
kernel = tf.cast(kernel, dtype=tf.float32)

# -------------------------------------
image_filter = conv_fn(
    input=image,
    filters=kernel,
    strides=1, # or (1, 1)
    padding='SAME',
)
plt.imshow(
    # Reformat for plotting
    tf.squeeze(image_filter)
)
plt.axis('off')
plt.show();

# -------------------------------------
image_detect = relu_fn(image_filter)
plt.imshow(
    # Reformat for plotting
    tf.squeeze(image_detect)
)
plt.axis('off')
plt.show();

#### Observe Convolution on a Numerical Matrix

In [None]:
# Sympy is a python library for symbolic mathematics. It has a nice
# pretty printer for matrices, which is all we'll use it for.
import sympy
sympy.init_printing()
from IPython.display import display

image = np.array([
    [0, 1, 0, 0, 0, 0],
    [0, 1, 0, 0, 0, 0],
    [0, 1, 0, 0, 0, 0],
    [0, 1, 0, 0, 0, 0],
    [0, 1, 0, 1, 1, 1],
    [0, 1, 0, 0, 0, 0],
])

kernel = np.array([
    [1, -1],
    [1, -1],
])

display(sympy.Matrix(image))
display(sympy.Matrix(kernel))
# Reformat for Tensorflow
image = tf.cast(image, dtype=tf.float32)
image = tf.reshape(image, [1, *image.shape, 1])
kernel = tf.reshape(kernel, [*kernel.shape, 1, 1])
kernel = tf.cast(kernel, dtype=tf.float32)

image_filter = tf.nn.conv2d(
    input=image,
    filters=kernel,
    strides=1,
    padding='VALID',
)
image_detect = tf.nn.relu(image_filter)

# The first matrix is the image after convolution, and the second is
# the image after ReLU.
display(sympy.Matrix(tf.squeeze(image_filter).numpy()))
display(sympy.Matrix(tf.squeeze(image_detect).numpy()))

---

# Maxing Pooling

- How `base` in convnet performs feature extraction, 
- How `conv2d` layer and `relu` activation layer process,
- How `MaxPool2D` layer `condense` with `maximum pooling`.

### Condense with Maximum Pooling
```
layers.Conv2D(filters=64, kernel_size=3), # activation is None
layers.MaxPool2D(pool_size=2)
```
A `MaxPool2D` layer is much like a `Conv2D` layer, except that it uses a simple maximum function instead of a kernel, with the `pool_size` parameter analogous to kernel_size. A `MaxPool2D` layer doesn't have any trainable weights like a `convolutional` layer. 

MaxPool2D is the `Condense` step.
<figure>
    <img src='https://i.imgur.com/IYO9lqp.png' width=400>
</figure>

After applying the `ReLU` function (*Detect*) the feature map ends up with a lot of `dead space`, large areas containing only 0's (the black areas in the image). 
- carrying `0` activations through the entire network would increase the size of the model without adding much useful information.
- `condense` the feature map to retain only the most useful part 

`Maximum pooling` takes a patch of activations in the original feature map and replaces them with the maximum activation in that patch.
<figure>
    <img src='https://imgur.com/hK5U2cd.png' width=500>
</figure>

- When applied after `ReLU` activation, it has the effect of `intensifying` features. 
- The pooling step increases the proportion of `active pixels` to `zero pixels`.

### Example - Apply Maximum Pooling


In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import warnings

plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells

# Read image
image_path = '../input/computer-vision-resources/car_feature.jpg'
image = tf.io.read_file(image_path)
image = tf.io.decode_jpeg(image)

# Define kernel
kernel = tf.constant([
    [-1, -1, -1],
    [-1,  8, -1],
    [-1, -1, -1],
], dtype=tf.float32)

# Reformat for batch compatibility.
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.expand_dims(image, axis=0)
kernel = tf.reshape(kernel, [*kernel.shape, 1, 1])

# Filter step
image_filter = tf.nn.conv2d(
    input=image,
    filters=kernel,
    # we'll talk about these two in the next lesson!
    strides=1,
    padding='SAME'
)

# Detect step
image_detect = tf.nn.relu(image_filter)

# Show what we have so far
plt.figure(figsize=(12, 6))
plt.subplot(131)
plt.imshow(tf.squeeze(image), cmap='gray')
plt.axis('off')
plt.title('Input')
plt.subplot(132)
plt.imshow(tf.squeeze(image_filter))
plt.axis('off')
plt.title('Filter')
plt.subplot(133)
plt.imshow(tf.squeeze(image_detect))
plt.axis('off')
plt.title('Detect')
plt.show();

For pooling step, use `tf.nn.pool` function.

In [None]:
import tensorflow as tf

image_condense = tf.nn.pool(
    input=image_detect, # image in the Detect step above
    window_shape=(2, 2),
    pooling_type='MAX',
    strides=(2, 2),
    padding='SAME',
)

plt.figure(figsize=(6, 6))
plt.imshow(tf.squeeze(image_condense))
plt.axis('off')
plt.show();

### Translation Invariance
Zero-pixels is called `unimportant`. Zero-pixels carry informational, `positional information`. The blank space still positions the feature within the image. When `MaxPool2D` removes some of these pixels, it removes some of the positional information in the feature map giving the `translational invariance` property in convnet. 
- convnet with maximum pooling will tend not to distinguish features by their location in the image
    - `Translation` is for changing the position of something without rotating or changing its shape or size
![](https://i.imgur.com/97j8WA1.png)
- The two dots in the original image became indistinguishable after repeated pooling.
- Pooling destroyed some of their positional information.
- Network can no longer distinguish between them in the feature maps, it can't distinguish them in the original image either: it has become invariant to that difference in position.
- Pooling only creates translation invariance in a network over small distances, as with the two dots in the image.
- Features that begin far apart will remain distinct after pooling; only some of the positional information was lost, but not all of it.
![](https://i.imgur.com/kUMWdcP.png)



### Exercise: Maximum Pooling


In [None]:
image_path = '../input/computer-vision-resources/car_illus.jpg'
image = tf.io.read_file(image_path)
image = tf.io.decode_jpeg(image, channels=1)
image = tf.image.resize(image, size=[400, 400])

# Embossing kernel
kernel = tf.constant([
    [-2, -1, 0],
    [-1, 1, 1],
    [0, 1, 2],
])

# Reformat for batch compatibility.
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.expand_dims(image, axis=0)
kernel = tf.reshape(kernel, [*kernel.shape, 1, 1])
kernel = tf.cast(kernel, dtype=tf.float32)

image_filter = tf.nn.conv2d(
    input=image,
    filters=kernel,
    strides=1,
    padding='VALID',
)

image_detect = tf.nn.relu(image_filter)

# Show what we have so far
plt.figure(figsize=(12, 6))
plt.subplot(131)
plt.imshow(tf.squeeze(image), cmap='gray')
plt.axis('off')
plt.title('Input')
plt.subplot(132)
plt.imshow(tf.squeeze(image_filter))
plt.axis('off')
plt.title('Filter')
plt.subplot(133)
plt.imshow(tf.squeeze(image_detect))
plt.axis('off')
plt.title('Detect')
plt.show();

In [None]:
image_condense = tf.nn.pool(
    input=image_detect,
    window_shape=(2,2),
    pooling_type='MAX',
    strides=(2,2),
    padding='SAME'
)

plt.figure(figsize=(8, 6))
plt.subplot(121)
plt.imshow(tf.squeeze(image_detect))
plt.axis('off')
plt.title("Detect (ReLU)")
plt.subplot(122)
plt.imshow(tf.squeeze(image_condense))
plt.axis('off')
plt.title("Condense (MaxPool)")
plt.show();

In [None]:
'''
randomly apply a small shift to a circle and then condense the image several times with maximum pooling
'''


REPEATS = 4
SIZE = [64, 64]

# Create a randomly shifted circle
image = visiontools.circle(SIZE, r_shrink=4, val=1)
image = tf.expand_dims(image, axis=-1)
image = visiontools.random_transform(image, jitter=3, fill_method='replicate')
image = tf.squeeze(image)

plt.figure(figsize=(16, 4))
plt.subplot(1, REPEATS+1, 1)
plt.imshow(image, vmin=0, vmax=1)
plt.title("Original\nShape: {}x{}".format(image.shape[0], image.shape[1]))
plt.axis('off')

# Now condense with maximum pooling several times
for i in range(REPEATS):
    ax = plt.subplot(1, REPEATS+1, i+2)
    image = tf.reshape(image, [1, *image.shape, 1])
    image = tf.nn.pool(image, window_shape=(2,2), strides=(2, 2), padding='SAME', pooling_type='MAX')
    image = tf.squeeze(image)
    plt.imshow(image, vmin=0, vmax=1)
    plt.title("MaxPool {}\nShape: {}x{}".format(i+1, image.shape[0], image.shape[1]))
    plt.axis('off')

#### Global Average Pooling
Average pooling has largely been superceeded by maximum pooling within the convolutional base. *Global average pooling*, `GlobalAvgPool2D` layer is used as an alternative to some or all of the hidden `Dense` layers in the head of the network.
```
pretrained_base,
layers.GlobalAvgPool2D(),
layers.Dense(1, activation='sigmoid'),
```
- Not having the `Flatten` layer that usually comes after the base to transform the 2D feature data to 1D data needed by the classifier. 
- `GlobalAvgPool2D` layer is serving like this function. But, instead of *unstacking* the feature, `Flatten`, it replaces the entire feature map with its average value.

In [None]:
feature_maps = [visiontools.random_map([5, 5], scale=0.1, decay_power=4) for _ in range(8)]

gs = gridspec.GridSpec(1, 8, wspace=0.01, hspace=0.01)
plt.figure(figsize=(18, 2))
for i, feature_map in enumerate(feature_maps):
    plt.subplot(gs[i])
    plt.imshow(feature_map, vmin=0, vmax=1)
    plt.axis('off')
plt.suptitle('Feature Maps', size=18, weight='bold', y=1.1)
plt.show()

# reformat for TensorFlow
feature_maps_tf = [tf.reshape(feature_map, [1, *feature_map.shape, 1])
                   for feature_map in feature_maps]

global_avg_pool = tf.keras.layers.GlobalAvgPool2D()
pooled_maps = [global_avg_pool(feature_map) for feature_map in feature_maps_tf]
img = np.array(pooled_maps)[:,:,0].T

plt.imshow(img, vmin=0, vmax=1)
plt.axis('off')
plt.title('Pooled Feature Maps')
plt.show();

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Load VGG16
pretrained_base = tf.keras.models.load_model(
    '../input/cv-course-models/cv-course-models/vgg16-pretrained-base',
)

model = keras.Sequential([
    pretrained_base,
    # Attach a global average pooling layer after the base
    layers.GlobalAvgPool2D(),
])

# Load dataset
ds = image_dataset_from_directory(
    '../input/car-or-truck/train',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=1,
    shuffle=True,
)
ds_iter = iter(ds)


car = next(ds_iter)
car_tf = tf.image.resize(car[0], size=[128, 128])
car_features = model(car_tf)
car_features = tf.reshape(car_features, shape=(16, 32))
label = int(tf.squeeze(car[1]).numpy())

plt.figure(figsize=(8, 4))
plt.subplot(121)
plt.imshow(tf.squeeze(car[0]))
plt.axis('off')
plt.title(["Car", "Truck"][label])
plt.subplot(122)
plt.imshow(car_features)
plt.title('Pooled Feature Maps')
plt.axis('off')
plt.show();

---

# The Sliding Window

Learned three operations that carry out feature extraction from an image:
1. filter with a convolution layer
2. detect with ReLU activation
3. condense with a maximum pooling layer

Convolution and pooling operations share a common feature: both performed over a `sliding window`. With convolution, `window` is given by dimension of kernel, `kernel_size`. Pooling window is given by `pool_size`.
<figure>
    <img src='https://i.imgur.com/LueNK6b.gif', width=350>
</figure>

There are two additional parameters affecting both convolution and pooling layers: 
- `strides` of the window: how far the window should move at each step,
- `padding` to use at image edges:  describes how to handle the pixels at the edges of the input.
```
layers.Conv2D(filters=64,
              kernel_size=3,
              strides=1,
              padding='same',
              activation='relu'),
layers.MaxPool2D(pool_size=2,
                 strides=1,
                 padding='same')
```

### Stride
The distance the window moves at each step is called `stride`. Need to specify stride in `both dimensions` of the image: one for moving `left to right` and one for moving `top to bottom`. Below animation is for `strides=(2,2)`.
<figure>
    <img src='https://i.imgur.com/Tlptsvt.gif', width=350>
</figure>
Whenever the stride in either direction is greater than `1`, the sliding window will skip over some of the pixels in the input at each step. 
- Since high-quality features will be needed for `classification`, convolutional layers will most often have `strides=(1, 1)`. 
    - Increasing `strides` can miss out . 
- Maximum pooling layers will have `stride` value greater than `1` [`(2,2)` or `(3,3)`], but not larger than window itself.

When the value of `strides` is the same number in both directions, can set with an interger: `strides=(2, 2)` == `strides=2`.

### Padding
What the convolution does with these boundary values is determined by its `padding` parameter. In Tensorflow, parameter can be `padding='same'` or `padding='valid'`. 
- `valid`: the convolution window will stay entirely inside the input. 
    - output shrinks (loses pixels)
    - shrinks more for larger kernels
    - will limit the number of layers the network can contain, especially when inputs are small in size.
- `same`: to pad the input with `0's` around its borders,
    - to make the size of the output the same as the size of the input
    - effect of diluting the influence of pixels at the borders.
    
The following animation is for `same`.
<figure>
    <img src='https://i.imgur.com/RvGM2xb.gif', width=350>
</figure>
Most modern convnets will use some combination of the two.

### Example - Exploring Sliding Windows
To better understand the effect of the sliding window parameters, a feature extraction on a low-resolution image will perform.

In [None]:
import numpy as np
from itertools import product
from skimage import draw, transform

def circle(size, val=None, r_shrink=0):
    circle = np.zeros([size[0]+1, size[1]+1])
    rr, cc = draw.circle_perimeter(
        size[0]//2, size[1]//2,
        radius=size[0]//2 - r_shrink,
        shape=[size[0]+1, size[1]+1],
    )
    if val is None:
        circle[rr, cc] = np.random.uniform(size=circle.shape)[rr, cc]
    else:
        circle[rr, cc] = val
    circle = transform.resize(circle, size, order=0)
    return circle

def show_kernel(kernel, label=True, digits=None, text_size=28):
    # Format kernel
    kernel = np.array(kernel)
    if digits is not None:
        kernel = kernel.round(digits)

    # Plot kernel
    cmap = plt.get_cmap('Blues_r')
    plt.imshow(kernel, cmap=cmap)
    rows, cols = kernel.shape
    thresh = (kernel.max()+kernel.min())/2
    # Optionally, add value labels
    if label:
        for i, j in product(range(rows), range(cols)):
            val = kernel[i, j]
            color = cmap(0) if val > thresh else cmap(255)
            plt.text(j, i, val, 
                     color=color, size=text_size,
                     horizontalalignment='center', verticalalignment='center')
    plt.xticks([])
    plt.yticks([])

def show_extraction(image,
                    kernel,
                    conv_stride=1,
                    conv_padding='valid',
                    activation='relu',
                    pool_size=2,
                    pool_stride=2,
                    pool_padding='same',
                    figsize=(10, 10),
                    subplot_shape=(2, 2),
                    ops=['Input', 'Filter', 'Detect', 'Condense'],
                    gamma=1.0):
    # Create Layers
    model = tf.keras.Sequential([
                    tf.keras.layers.Conv2D(
                        filters=1,
                        kernel_size=kernel.shape,
                        strides=conv_stride,
                        padding=conv_padding,
                        use_bias=False,
                        input_shape=image.shape,
                    ),
                    tf.keras.layers.Activation(activation),
                    tf.keras.layers.MaxPool2D(
                        pool_size=pool_size,
                        strides=pool_stride,
                        padding=pool_padding,
                    ),
                   ])

    layer_filter, layer_detect, layer_condense = model.layers
    kernel = tf.reshape(kernel, [*kernel.shape, 1, 1])
    layer_filter.set_weights([kernel])

    # Format for TF
    image = tf.expand_dims(image, axis=0)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32) 
    
    # Extract Feature
    image_filter = layer_filter(image)
    image_detect = layer_detect(image_filter)
    image_condense = layer_condense(image_detect)
    
    images = {}
    if 'Input' in ops:
        images.update({'Input': (image, 1.0)})
    if 'Filter' in ops:
        images.update({'Filter': (image_filter, 1.0)})
    if 'Detect' in ops:
        images.update({'Detect': (image_detect, gamma)})
    if 'Condense' in ops:
        images.update({'Condense': (image_condense, gamma)})
    
    # Plot
    plt.figure(figsize=figsize)
    for i, title in enumerate(ops):
        image, gamma = images[title]
        plt.subplot(*subplot_shape, i+1)
        plt.imshow(tf.image.adjust_gamma(tf.squeeze(image), gamma))
        plt.axis('off')
        plt.title(title)

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import learntools.computer_vision.visiontools as visiontools

plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')

image = circle([64, 64], val=1.0, r_shrink=3)
image = tf.reshape(image, [*image.shape, 1])
# Bottom sobel
kernel = tf.constant(
    [[-1, -2, -1],
     [0, 0, 0],
     [1, 2, 1]],
)

visiontools.show_kernel(kernel)

In [None]:
show_extraction(
    image, kernel,

    # Window parameters
    conv_stride=1,
    pool_size=2,
    pool_stride=2,

    subplot_shape=(1, 4),
    figsize=(14, 6),
)

In [None]:
# changed the strides of the convolution to 3
show_extraction(
    image, kernel,

    # Window parameters
    conv_stride=3,
    pool_size=2,
    pool_stride=2,

    subplot_shape=(1, 4),
    figsize=(14, 6),    
)

To reduce the quality of the feature extracted. A convolution with strides of 3 is too coarse to produce a good feature map from it. 

Sometimes, a model will use a convolution with a larger stride in it's initial layer. 
- `ResNet50` model uses `7 x 7` kernels with strides of `2` in its first layer. 
- to accelerate production of large-scale features without the sacrifice of too much information from the input.

### Exercise: The Sliding Window
- explore the operations a couple of popular convnet architectures use for feature extraction, 
- learn about how convnets can capture large-scale visual features through stacking layers, and 
- see how convolution can be used on one-dimensional data, in `time series`.

In [None]:
import tensorflow as tf
import learntools.computer_vision.visiontools as visiontools
from learntools.computer_vision.visiontools import edge, blur, bottom_sobel, emboss, sharpen, circle
import matplotlib.pyplot as plt

plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')

image_dir = '../input/computer-vision-resources/'
circle_64 = tf.expand_dims(circle([64, 64], val=1.0, r_shrink=4), axis=-1)
kaggle_k = visiontools.read_image(image_dir + str('k.jpg'), channels=1)
car = visiontools.read_image(image_dir + str('car_illus.jpg'), channels=1)
car = tf.image.resize(car, size=[200, 200])
images = [(circle_64, "circle_64"), (kaggle_k, "kaggle_k"), (car, "car")]

plt.figure(figsize=(14, 4))
for i, (img, title) in enumerate(images):
    plt.subplot(1, len(images), i+1)
    plt.imshow(tf.squeeze(img))
    plt.axis('off')
    plt.title(title)
plt.show();

kernels = [(edge, "edge"), (blur, "blur"), (bottom_sobel, "bottom_sobel"),
           (emboss, "emboss"), (sharpen, "sharpen")]
plt.figure(figsize=(14, 4))
for i, (krn, title) in enumerate(kernels):
    plt.subplot(1, len(kernels), i+1)
    visiontools.show_kernel(krn, digits=2, text_size=20)
    plt.title(title)
plt.show()

In [None]:
kernel = [edge, blur, bottom_sobel, emboss, sharpen]

In [None]:
image = circle_64

for i in range(len(kernel)):
    
    visiontools.show_extraction(
        image, kernel[i],

        # YOUR CODE HERE: set parameters
        conv_stride=1,
        conv_padding='valid',
        pool_size=2,
        pool_stride=2,
        pool_padding='same',

        subplot_shape=(1, 4),
        figsize=(14, 6),
    )

In [None]:
image = kaggle_k

for i in range(len(kernel)):
    
    visiontools.show_extraction(
        image, kernel[i],

        # YOUR CODE HERE: set parameters
        conv_stride=1,
        conv_padding='valid',
        pool_size=2,
        pool_stride=2,
        pool_padding='same',

        subplot_shape=(1, 4),
        figsize=(14, 6),
    )

In [None]:
image = car

for i in range(len(kernel)):
    
    visiontools.show_extraction(
        image, kernel[i],

        # YOUR CODE HERE: set parameters
        conv_stride=1,
        conv_padding='valid',
        pool_size=2,
        pool_stride=2,
        pool_padding='same',

        subplot_shape=(1, 4),
        figsize=(14, 6),
    )

#### Receptive Field
All of the input pixels a neuron is connected to is that neuron's receptive field. The receptive field just tells which parts of the input image a neuron receives information from.

If first layer is convolution with `3x3` kernels, each neuron in that layer gets input from a  3×3  patch of pixels. 
<figure>
<img src="https://i.imgur.com/HmwQm2S.png" width=250>
</figure>

Each neuron in the `3×3` patch in the middle layer is connected to a `3×3` input patch, but they overlap in a `5×5` patch. So that neuron at top has a `5×5` receptive field.

#### One-Dimensional Convolution
Convolutional networks turn out to be useful not only (two-dimensional) images, but also on things like time-series (one-dimensional) and video (three-dimensional).

Time series from [Google Trends](https://trends.google.com/trends/) will use. It measures the popularity of the search term `machine learning` for weeks from January 25, 2015 to January 15, 2020.
- Images are two-dimensional and so kernels were 2D arrays.
- A time-series is one-dimensional, so kernels were 1D arrays.
```
detrend = tf.constant([-1, 1], dtype=tf.float32)
average = tf.constant([0.2, 0.2, 0.2, 0.2, 0.2], dtype=tf.float32)
spencer = tf.constant([-3, -6, -5, 3, 21, 46, 67, 74, 67, 46, 32, 3, -5, -6, -3], dtype=tf.float32) / 320
```

Convolution on a sequence(`Time-series`) works just like convolution on an image. The difference is just that a sliding window on a sequence only has one direction, `left to righ`. 

In [None]:
import pandas as pd

# Load the time series as a Pandas dataframe
machinelearning = pd.read_csv(
    '../input/computer-vision-resources/machinelearning.csv',
    parse_dates=['Week'],
    index_col='Week',
)

machinelearning.plot();

# some 1D kernels sometimes used on time-series data
detrend = tf.constant([-1, 1], dtype=tf.float32)
average = tf.constant([0.2, 0.2, 0.2, 0.2, 0.2], dtype=tf.float32)
spencer = tf.constant([-3, -6, -5, 3, 21, 46, 67, 74, 67, 46, 32, 3, -5, -6, -3], dtype=tf.float32) / 320

# -------- choose kernel type -----------
# kernel = detrend
# kernel = average
kernel = spencer

# Reformat for TensorFlow
ts_data = machinelearning.to_numpy()
ts_data = tf.expand_dims(ts_data, axis=0)
ts_data = tf.cast(ts_data, dtype=tf.float32)
kern = tf.reshape(kernel, shape=(*kernel.shape, 1, 1))

ts_filter = tf.nn.conv1d(
    input=ts_data,
    filters=kern,
    stride=1,
    padding='VALID',
)

# Format as Pandas Series
machinelearning_filtered = pd.Series(tf.squeeze(ts_filter).numpy())

machinelearning_filtered.plot();

`detrend` kernel filters for *changes* in the series, while `average` and `spencer` are both *smoothers* that filter for low-frequency components in the series.

----

# Custom Convnets

### Simple to Refined
Learned how convolutional networks perform `feature extraction` through three operations: `filter, detect, and condense`. A single round of feature extraction can only extract relatively simple features from an image. If convnets repeat this extraction over and over again, features more complex and refined as deeper as into the network.
![](https://i.imgur.com/VqmC1rm.png)

### Convolutional Blocks
Done by passing a long chains of `convolutional blocks` which perform extraction.
<figure>
    <img src="https://i.imgur.com/pr8VwCZ.png" width=700>
</figure>
These convolutional blocks are stacks of `Conv2D` and `MaxPool2D` layers, whose role in feature extraction. 
<figure>
    <img src="https://i.imgur.com/8D6IhEw.png" width=600>
</figure>
Each block represents a round of extraction, and by composing these blocks the convnet can combine and recombine the features produced, growing them and shaping them to better fit the problem at hand. The deep structure of modern convnets is what allows this sophisticated feature engineering and has been largely responsible for their superior performance.

### Example - Design a Convnet
Will create a Keras `Sequence` model and then train for Cars dataset.

In [None]:
'''
-----------------------------------------------------------
---------------------Step1 - Load Data---------------------
-----------------------------------------------------------
'''
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


# Load training and validation sets
ds_train_ = image_dataset_from_directory(
    '../input/car-or-truck/train',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=True,
)
ds_valid_ = image_dataset_from_directory(
    '../input/car-or-truck/valid',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=False,
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
ds_valid = (
    ds_valid_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

![](https://i.imgur.com/U1VdoDJ.png)
Define the model with three blocks of `Conv2D` and `MaxPool2D`(base) followed by head of `Dense` layers. Number of filters doubled block-by-block: `64, 128, 256`, common pattern. Since `MaxPool2D` layer is reducing the size of the feature maps, should afford to increase the quantity to create.

In [None]:
'''
-----------------------------------------------------------
------------------Step2 - Define Model---------------------
-----------------------------------------------------------
'''
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([

    # First Convolutional Block
    layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same', 
                  input_shape=[128, 128, 3]), # [height, width, color channels(RGB)]
    layers.MaxPool2D(),

    # Second Convolutional Block
    layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Third Convolutional Block
    layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Classifier Head
    layers.Flatten(),
    layers.Dense(units=6, activation="relu"),
    layers.Dense(units=1, activation="sigmoid"),
])
model.summary()

In [None]:
'''
-----------------------------------------------------------
----------------------Step3 - Train------------------------
-----------------------------------------------------------
'''
model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=0.01),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)

history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=40,
)

In [None]:
import pandas as pd

history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

### Exercise: Custom Convnets


In [None]:
from learntools.core import binder
binder.bind(globals())
from learntools.computer_vision.ex5 import *

# Imports
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


# Load training and validation sets
ds_train_ = image_dataset_from_directory(
    '../input/car-or-truck/train',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=True,
)
ds_valid_ = image_dataset_from_directory(
    '../input/car-or-truck/valid',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=False,
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
ds_valid = (
    ds_valid_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

#### Design a Convnet
![](https://i.imgur.com/Vko6nCK.png)


In [None]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    # Block One
    layers.Conv2D(filters=32, kernel_size=3, activation='relu', padding='same',
                  input_shape=[128, 128, 3]),
    layers.MaxPool2D(),

    # Block Two
    layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Block Three
    # YOUR CODE HERE
    layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
    layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Head
    layers.Flatten(),
    layers.Dense(6, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid'),
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(epsilon=0.01),
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=50,
)

import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

---

# Data Augmentation

Will learn a trick to boost image classifirs called `data augmentation`.

### The Usefulness of Fake Data
Best way to improve ML performance is to train it on more data. The more examples the model has to learn from, the better it will be able to recognize which differences in images matter and which do not. More data helps the model to generalize better.

One easy way of getting more data is to use the data that already have. If the images in our dataset can be transformed  in that preserve the class, the classifier can be taught to ignore those kinds of transformations. 
- whether a car is facing left or right in a photo doesn't change the fact that it is a Car and not a Truck.
- if we `augment` our training data with flipped images, our classifier will learn that `left or right` is a difference it should ignore.

The idea behind `data augmentation`: add in some extra fake data that looks reasonably like the real data and classifier will improve.

### Using Data Augmentation
Many kinds of transformation are used when augmenting a dataset. These include `rotating images`, `adjusting color or contrast`, `warping images`, or many other things, applied in combination. The following picture shows the different ways of a image that can be transformed.
<figure>
    <img src='https://i.imgur.com/UaOm0ms.png' width=450>
</figure>

Data augmentation is usually done `online`: as the images are being fed into the network for training. Training is usually done on mini-batches of data. This is what a `batch of 16 images` might look like when data augmentation is used.
<figure>
    <img src='https://i.imgur.com/MFviYoE.png' width=450>
</figure>

Each time an image is used during training, a new random transformation is applied. Model is always seeing something a little different than what it's seen before. This extra variance in the training data is what helps the model on new data.

- Not every transformation will be useful on a given problem.
- Whatever transformations used should not mix up the classes.
    - if training [digit recognizer](https://www.kaggle.com/c/digit-recognizer), rotating images would mix up '9's and '6's.
- best approach for finding good augmentations is the same as with most ML problems

### Example - Training with Data Augmentation
Keras lets augment the data in two ways: to include it in the data pipeline with a function like `ImageDataGenerator`, to include it in the model definition by using Keras's `preprocessing layers`. 

The primary advantage is that the imge transormations will be computed on the `GPU` instead of the `CPU`: speeding up training.

In [None]:
# Imports
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


# Load training and validation sets
ds_train_ = image_dataset_from_directory(
    '../input/car-or-truck/train',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=True,
)
ds_valid_ = image_dataset_from_directory(
    '../input/car-or-truck/valid',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=False,
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
ds_valid = (
    ds_valid_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

In [None]:
# Define Model

from tensorflow import keras
from tensorflow.keras import layers
# these are a new feature in TF 2.2
from tensorflow.keras.layers.experimental import preprocessing


pretrained_base = tf.keras.models.load_model(
    '../input/cv-course-models/cv-course-models/vgg16-pretrained-base',
)
pretrained_base.trainable = False

# add a couple of simple transformations in the mdoel
model = keras.Sequential([
    # Preprocessing
    preprocessing.RandomFlip('horizontal'), # flip left-to-right
    preprocessing.RandomContrast(0.5), # contrast change by up to 50%
    # Base
    pretrained_base,
    # Head
    layers.Flatten(),
    layers.Dense(6, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])

In [None]:
# Train and Evaluate

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=15,
)

In [None]:
import pandas as pd

history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

### Exercise: Data Augmentation

In [None]:
# Setup feedback system
from learntools.core import binder
binder.bind(globals())
from learntools.computer_vision.ex6 import *

import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import tensorflow.keras.layers.experimental.preprocessing as preprocessing

# Imports
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


# Load training and validation sets
ds_train_ = image_dataset_from_directory(
    '../input/car-or-truck/train',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=True,
)
ds_valid_ = image_dataset_from_directory(
    '../input/car-or-truck/valid',
    labels='inferred',
    label_mode='binary',
    image_size=[128, 128],
    interpolation='nearest',
    batch_size=64,
    shuffle=False,
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
ds_valid = (
    ds_valid_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)


)

In [None]:
# uncomment to see different kind of Preprocessing

# all of the "factor" parameters indicate a percent-change
augment = keras.Sequential([
    # preprocessing.RandomContrast(factor=0.5),
    preprocessing.RandomFlip(mode='horizontal'), # meaning, left-to-right
    # preprocessing.RandomFlip(mode='vertical'), # meaning, top-to-bottom
    # preprocessing.RandomWidth(factor=0.15), # horizontal stretch
    # preprocessing.RandomRotation(factor=0.20),
    # preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),
])


ex = next(iter(ds_train.unbatch().map(lambda x, y: x).batch(1)))

plt.figure(figsize=(10,10))
for i in range(16):
    image = augment(ex, training=True)
    plt.subplot(4, 4, i+1)
    plt.imshow(tf.squeeze(image))
    plt.axis('off')
plt.show()

[EuroSAT](https://www.kaggle.com/ryanholbrook/eurosat) dataset consists of satellite images of the Earth classified by geographic feature.
![](https://i.imgur.com/LxARYZe.png)

It seems to this author that flips and rotations would be worth trying first since there's no concept of orientation for pictures taken straight overhead. None of the transformations seem likely to confuse classes, however.

The [TensorFlow Flowers](https://www.kaggle.com/ryanholbrook/tensorflow-flowers) dataset consists of photographs of flowers of several species.
![](https://i.imgur.com/Mt7PR2x.png)

It seems to this author that horizontal flips and moderate rotations would be worth trying first. Some augmentation libraries include transformations of hue (like red to blue). Since the color of a flower seems distinctive of its class, a change of hue might be less successful. On the other hand, there is suprising variety in cultivated flowers like roses, so, depending on the dataset, this might be an improvement after all!

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.InputLayer(input_shape=[128, 128, 3]),
    
    # Data Augmentation
    preprocessing.RandomContrast(factor=0.10),
    preprocessing.RandomFlip(mode='horizontal'),
    preprocessing.RandomRotation(factor=0.10),

    # Block One
    layers.BatchNormalization(renorm=True),
    layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Block Two
    layers.BatchNormalization(renorm=True),
    layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Block Three
    layers.BatchNormalization(renorm=True),
    layers.Conv2D(filters=256, kernel_size=3, activation='relu', padding='same'),
    layers.Conv2D(filters=256, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Head
    layers.BatchNormalization(renorm=True),
    layers.Flatten(),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])

optimizer = tf.keras.optimizers.Adam(epsilon=0.01)
model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=50,
)

# Plot learning curves
import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

The learning curves in this model stayed close together for much longer than in previous models. This suggests that the augmentation helped prevent overfitting, allowing the model to continue improving.

And notice that this model achieved the highest accuracy of all the models in the course! This won't always be the case, but it shows that a well-designed custom convnet can sometimes perform as well or better than a much larger pretrained model. Depending on your application, having a smaller model (which requires fewer resources) could be a big advantage.

---

[Petals to the Metal](https://www.kaggle.com/c/tpu-getting-started) competition [submission](https://www.kaggle.com/minyannaing/create-your-first-submission/edit) tutorial.

[Getting Started: TPUs + Cassava Leaf Disease](https://www.kaggle.com/minyannaing/getting-started-tpus-cassava-leaf-disease/edit).