##### Copyright 2020 The TensorFlow Authors.

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Data augmentation

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/tutorials/images/data_augmentation"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/images/data_augmentation.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/tutorials/images/data_augmentation.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/images/data_augmentation.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

## Overview

This tutorial demonstrates data augmentation using tensorflow.image API. This can help the model trainer to overcome small dataset problem.

## Setup

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

In [0]:
!pip install tensorflow_addons

In [0]:
try:
  %tensorflow_version 2.x
except:
  pass

import urllib

import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers
AUTOTUNE = tf.data.experimental.AUTOTUNE

import tensorflow_addons as tfa
import tensorflow_datasets as tfds

import PIL.Image

import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12, 5)

import numpy as np

Let's check the data augmentation feautres on an image and then augment a whole dataset later to train a model.

Download [this image](https://commons.wikimedia.org/wiki/File:Felis_catus-cat_on_snow.jpg), by Von.grzanka, for augmentation.

In [0]:
image_path = tf.keras.utils.get_file("cat.jpg", "https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg")
PIL.Image.open(image_path)

Read and decode the image to tensor format.

In [0]:
image_string=tf.io.read_file(image_path)
image=tf.image.decode_jpeg(image_string,channels=3)

A function to visualize and compare the original and augmented image side by side.

In [0]:
def visualize(original, augmented):
  fig = plt.figure()
  plt.subplot(1,2,1)
  plt.title('Original image')
  plt.imshow(original)

  plt.subplot(1,2,2)
  plt.title('Augmented image')
  plt.imshow(augmented)

## Augment a single image

### Flipping the image
Flip the image either vertically or horizontally.

In [0]:
flipped = tf.image.flip_left_right(image)
visualize(image, flipped)

### Grayscale the image
Grayscale an image.

In [0]:
grayscaled = tf.image.rgb_to_grayscale(image)
visualize(image, tf.squeeze(grayscaled))
plt.colorbar()

### Saturate the image
Saturate an image by providing a saturation factor.

In [0]:
saturated = tf.image.adjust_saturation(image, 3)
visualize(image, saturated)

### Change image brightness
Change the brightness of image by providing a brightness factor.

In [0]:
bright = tf.image.adjust_brightness(image, 0.4)
visualize(image, bright)

### Rotate the image
Rotate an image to your desired angles.

In [0]:
rotated = tf.image.rot90(image)
visualize(image, rotated)

Or rotate by any angle using `tfa.image.rotate`:

In [0]:
rotate_deg = 20
rotate_rads = rotate_deg*(np.pi/180)

rotated = tfa.image.rotate(image, rotate_rads)
visualize(image,rotated)

### Center crop the image
Crop the image from center upto the image part you desire.

In [0]:
cropped = tf.image.central_crop(image, central_fraction=0.5)
visualize(image,cropped)

See the `tf.image` reference for available augmentation options available.

## Augment a dataset and train a model with it

Train the model on mnist dataset.

In [0]:
dataset, info =  tfds.load('mnist', as_supervised=True, with_info=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

num_train_examples= info.splits['train'].num_examples

Use a scale function to augment the image. Pass the dataset to it and it will return augmented datset.

In [0]:
def augment(image,label):
  image = tf.image.resize(image, (28, 28)) / 255.0 #normalizing the image
  image = tf.image.random_crop(image, size=[28,28,1]) #providing random crop to image
  image = tf.image.random_brightness(image, max_delta=0.5) #providing random brightness to image
  image = tf.image.random_flip_left_right(image) #providing random flip to image

  return image,label

BATCH_SIZE = 64

# Creaste a batch dataset that can be directly passed to model for training
train_batches = (
    train_dataset
    .shuffle(num_train_examples//4)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE))

Create and compile the model. The model is a two layered, fully-connected neural network without convolution.

In [0]:
model = tf.keras.Sequential([
            layers.Flatten(input_shape=(28, 28, 1)),
            layers.Dense(256, activation='relu'),
            layers.Dense(128, activation='relu'),
            layers.Dense(10)
])

In [0]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Train the model:

In [0]:
model.fit(train_batches, epochs=5)

## Conclusion:
This model provides ~95% accuracy on training set. This is slightly higher than the model trained without data augmentation. It didn't provide much significance on this model because the dataset already had a large number of samples. But, on a small dataset, you could see a huge difference.