<a href="https://colab.research.google.com/github/proteus21/DATA-SCIENCE-STUDY/blob/main/Computer%20Vision/Computer_vision_exercises_part_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Computer Vision - OpenCV - 10th Task



@author Tomasz Skrzypczyk

Solved by Bogusław Konefał

# Use GPU for this exercise!!!

In [None]:
import tensorflow as tf
import numpy as np
import collections
from six.moves import urllib
import os
import random
import cv2

tf.keras.utils.set_random_seed(1234)
random.seed(10)


## The flowers dataset

The flowers dataset consists of images of flowers with 5 possible class labels.

When training a machine learning model, we split our data into training and test datasets. We will train the model on our training data and then evaluate how well the model performs on data it has never seen - the test set.

Let's download our training and test examples (it may take a while) and split them into train and test sets.

Run the following three cells:

# Importing the data

In [None]:
# just run this cell
FLOWERS_DIR = './flower_photos'
TRAIN_FRACTION = 0.8
RANDOM_SEED = 2018


def download_images():
  """If the images aren't already downloaded, save them to FLOWERS_DIR."""
  if not os.path.exists(FLOWERS_DIR):
    DOWNLOAD_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'
    print('Downloading flower images from %s...' % DOWNLOAD_URL)
    urllib.request.urlretrieve(DOWNLOAD_URL, 'flower_photos.tgz')
    !tar xfz flower_photos.tgz
  print('Flower photos are located in %s' % FLOWERS_DIR)


def make_train_and_test_sets():
  """Split the data into train and test sets and get the label classes."""
  train_examples, val_examples, test_examples = [], [], []
  shuffler = random.Random(RANDOM_SEED)
  is_root = True
  for (dirname, subdirs, filenames) in os.walk(FLOWERS_DIR):
    # The root directory gives us the classes
    if is_root:
      subdirs = sorted(subdirs)
      classes = collections.OrderedDict(enumerate(subdirs))
      label_to_class = dict([(x, i) for i, x in enumerate(subdirs)])
      is_root = False
    # The sub directories give us the image files for training.
    else:
      filenames.sort()
      shuffler.shuffle(filenames)
      full_filenames = [os.path.join(dirname, f) for f in filenames]
      label = dirname.split('/')[-1]
      label_class = label_to_class[label]
      # An example is the image file and it's label class.
      examples = list(zip(full_filenames, [label_class] * len(filenames)))
      num_train = int(len(filenames) * TRAIN_FRACTION)
      num_valid = int((len(filenames) - num_train)/2)
      train_examples.extend(examples[:num_train])
      test_examples.extend(examples[num_train:num_train + num_valid])
      val_examples.extend(examples[num_train + num_valid:])

  shuffler.shuffle(train_examples)
  shuffler.shuffle(test_examples)
  return train_examples,val_examples, test_examples, classes

In [None]:
# Download the images and split the images into train and test sets.
download_images()
TRAIN_EXAMPLES, VAL_EXAMPLES, TEST_EXAMPLES, CLASSES = make_train_and_test_sets()
NUM_CLASSES = len(CLASSES)

print('\nThe dataset has %d label classes: %s' % (NUM_CLASSES, CLASSES.values()))
print('There are %d training images' % len(TRAIN_EXAMPLES))
print('there are %d validation images' % len(VAL_EXAMPLES))
print('there are %d test images' % len(TEST_EXAMPLES))

Downloading flower images from http://download.tensorflow.org/example_images/flower_photos.tgz...
Flower photos are located in ./flower_photos

The dataset has 5 label classes: odict_values(['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'])
There are 2934 training images
there are 369 validation images
there are 367 test images


In [None]:
train_ds = [(cv2.resize(cv2.imread(path), (224,224))/255, label) for path, label in TRAIN_EXAMPLES]
val_ds = [(cv2.resize(cv2.imread(path), (224,224))/255, label) for path, label in VAL_EXAMPLES]
test_ds =  [(cv2.resize(cv2.imread(path), (224,224))/255, label) for path, label in TEST_EXAMPLES]

def get_generator(ds):
  def data_generator():
    for i in range(len(ds)):
      yield ds[i][0], (ds[i][1],)
  return data_generator

train_ds = tf.data.Dataset.from_generator(get_generator(train_ds), output_types=(tf.float32, tf.uint8), output_shapes=((224,224,3),(1,)))
train_ds = train_ds.shuffle(100)
train_ds = train_ds.batch(64)

val_ds = tf.data.Dataset.from_generator(get_generator(val_ds), output_types=(tf.float32, tf.uint8), output_shapes=((224,224,3),(1,)))
val_ds = val_ds.batch(64)

test_ds = tf.data.Dataset.from_generator(get_generator(test_ds), output_types=(tf.float32, tf.uint8), output_shapes=((224,224,3),(1,)))
test_ds = test_ds.batch(64)

# Simple model

Here we have a simple CNN networkwith no normalization added. Try to train it and remember the test accuracy.

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape = [224, 224,3]),
     tf.keras.layers.MaxPooling2D(),
     tf.keras.layers.Conv2D(64, (2, 2), activation='relu'),
     tf.keras.layers.MaxPooling2D(),
     tf.keras.layers.Dropout(0.3),
     tf.keras.layers.Conv2D(64, (2, 2), activation='relu'),
     tf.keras.layers.Conv2D(64, (2, 2), activation='relu'),
     tf.keras.layers.Flatten(),
     tf.keras.layers.Dense(100, activation='relu'),
     tf.keras.layers.Dense(5, activation ='softmax')
])

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=3, restore_best_weights=True)
model.fit(train_ds, validation_data=val_ds, epochs = 15, callbacks=[es])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15


<keras.callbacks.History at 0x7fa60025a160>

In [None]:
evaluation_results = model.evaluate(test_ds)



In [None]:
print(f"Your accuracy: {evaluation_results[1]*100}%")

Your accuracy: 61.30790114402771%


# Batch Norm

We will see now if adding **BatchNormalization** and/or **Dropout** will help with accuracy score here.

In [None]:
# Taks 1:
# Add a BatchNormalization layer after each Convolution layer. Each layer that you may need in the future you may find in tf.keras.layers module.
model = tf.keras.models.Sequential([
     tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape = [224, 224,3]),
     tf.keras.layers.BatchNormalization(  axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer='zeros',
    gamma_initializer='ones',
    moving_mean_initializer='zeros',
    moving_variance_initializer='ones'),
     tf.keras.layers.MaxPooling2D(),
     tf.keras.layers.Conv2D(64, (2, 2), activation='relu'),
     tf.keras.layers.BatchNormalization(  axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer='zeros',
    gamma_initializer='ones',
    moving_mean_initializer='zeros',
    moving_variance_initializer='ones'),
     tf.keras.layers.MaxPooling2D(),
     tf.keras.layers.Dropout(0.5),
     tf.keras.layers.Conv2D(64, (2, 2), activation='relu'),
     tf.keras.layers.BatchNormalization(  axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer='zeros',
    gamma_initializer='ones',
    moving_mean_initializer='zeros',
    moving_variance_initializer='ones'),
    tf.keras.layers.Conv2D(64, (2, 2), activation='relu'),
     tf.keras.layers.Flatten(),
     tf.keras.layers.Dense(100, activation='relu'),
     tf.keras.layers.Dense(5, activation ='softmax')
])

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=3, restore_best_weights=True)
model.fit(train_ds, validation_data=val_ds, epochs = 15, callbacks=[es])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15


<keras.callbacks.History at 0x7fa6188afb80>

In [None]:
evaluation_results = model.evaluate(test_ds)
print(f"Your accuracy: {evaluation_results[1]*100}%")

Your accuracy: 26.430517435073853%


The Batch Norm should have helped in increasing the model's results by 1-3 %.

# Dropout