In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

### 1. Import dataset

In [2]:
# Define the URL of the dataset
dataset_url = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'

# Download and extract the dataset to the specified directory
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=dataset_url,  extract=True)

PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')

## 2. preprocessing data :


In [3]:
# creating training and validation directories: 
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')


BATCH_SIZE = 32
IMG_SIZE = (160, 160)

In [4]:
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,
                                                            shuffle=True,
                                                            batch_size=BATCH_SIZE,
                                                            image_size=IMG_SIZE
                                                            )


Found 2000 files belonging to 2 classes.


In [5]:
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_dir,
                                                            shuffle=True,
                                                            batch_size=BATCH_SIZE,
                                                            image_size=IMG_SIZE)

Found 1000 files belonging to 2 classes.


## 3.Data visualisation: Show first 9 data of training set

### Spliting data into training, validation and test sets:

In [6]:
print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset))
val_batches = tf.data.experimental.cardinality(validation_dataset)
test_dataset = validation_dataset.take(val_batches // 5)
validation_dataset = validation_dataset.skip(val_batches // 5)

print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset))
print('Number of test batches: %d' % tf.data.experimental.cardinality(test_dataset))


Number of validation batches: 32
Number of validation batches: 26
Number of test batches: 6


In [7]:
#By setting the buffer_size argument of prefetch() to AUTOTUNE,
#you are telling TensorFlow to automatically choose the best buffer size for your system and dataset.
# This can save you time and effort, and can also help to improve the performance of your data pipeline.
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

### data augmentation:


In [8]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
])

In [9]:
#Rescale pixel values
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

In [10]:
# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

print(IMG_SHAPE)

(160, 160, 3)


In [11]:
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

(32, 5, 5, 1280)
