In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import vgg16
from keras.applications.vgg16 import VGG16
from keras import Sequential
from keras.layers import Flatten , Dense
import random
import shutil


## 1. Import dataset to get train + validation sets:

In [None]:
# Check if the file exists
if os.path.exists('data_exist.txt'):
    with open('data_exist.txt', 'r') as file:
        # Read and print the contents of the file
        file_contents = file.read().strip()
        if file_contents:  # Check if the file is not empty
            PATH = file_contents
            print(PATH)
        else:
            print("File is empty.")
else:
    dataset_url = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
    # Download and extract the dataset to the specified directory
    path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=dataset_url, extract=True)
    PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')

    # Write the PATH to the file
    with open('data_exist.txt', 'w') as file:
        file.write(PATH)

    print('Dataset downloaded and PATH written to data_exist.txt')



## 2. Adding test set :


In [None]:
# check if test set directory doesnt exist yet :
PATH_to_testSet = os.path.join(PATH, 'test')
PATH_to_testSet_cat = os.path.join(PATH_to_testSet, 'cats')
PATH_to_testSet_dog = os.path.join(PATH_to_testSet, 'dogs')


# if we dont already have a test set
if os.path.isdir(PATH_to_testSet) is False:
    os.makedirs(PATH_to_testSet) # we create the path directory for test
    os.makedirs(PATH_to_testSet_cat) # we create the path directory test/cat
    os.makedirs(PATH_to_testSet_dog) # we create the path directory test/cat



### function to devide validation data to 50% test and 50% validation

In [None]:
def move_half_random_elements(source_directory, destination_directory):
    # Check if the source directory exists
    if not os.path.exists(source_directory):
        return f"Source directory '{source_directory}' does not exist."

    if os.listdir(destination_directory):
        return f"destination directory '{destination_directory}' is not empty."

    # Check if the destination directory exists; if not, create it
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    # List all files and directories in the source directory
    elements = os.listdir(source_directory)

    # Randomly select half of the elements
    num_elements_to_move = len(elements) // 2
    elements_to_move = random.sample(elements, num_elements_to_move)

    # Move the selected elements to the destination directory
    for element in elements_to_move:
        source_path = os.path.join(source_directory, element)
        destination_path = os.path.join(destination_directory, element)
        shutil.move(source_path, destination_path)

    return f"Moved {num_elements_to_move} random elements from '{source_directory}' to '{destination_directory}'."


### calling the function :

In [None]:

PATH_to_validation_cats= os.path.join(PATH, 'validation/cats')
PATH_to_validation_dogs= os.path.join(PATH, 'validation/dogs')

print(move_half_random_elements(PATH_to_validation_cats, PATH_to_testSet_cat))
print(move_half_random_elements(PATH_to_validation_dogs, PATH_to_testSet_dog))



# 3. Image preprocessing:

In [None]:
PATH_to_validation = os.path.dirname(PATH_to_validation_cats)
PATH_to_trainSet = os.path.join(PATH, 'train')
image_h = 224
image_w = 224
image_c = 3
image_shape=(image_h,image_w,image_c)

train_batches = ImageDataGenerator(preprocessing_function=vgg16.preprocess_input).flow_from_directory(
    directory=PATH_to_trainSet,target_size=(image_h,image_w),classes=['cats','dogs'],batch_size=10)

validation_batches = ImageDataGenerator(preprocessing_function=vgg16.preprocess_input).flow_from_directory(
    directory=PATH_to_validation,target_size=(image_h,image_w),classes=['cats','dogs'],batch_size=10)

test_batches = ImageDataGenerator(preprocessing_function=vgg16.preprocess_input).flow_from_directory(
    directory=PATH_to_testSet,target_size=(image_h,image_w),classes=['cats','dogs'],batch_size=10, shuffle=False)


##  4. Visualize the Data

In [None]:
images , lable = next(train_batches)
rows=2
cols = 2
img_count = 0

def plotImg(img_arr):
    fig, axes = plt.subplots(1,10, figsize=(20,20))
    axes=axes.flatten()
    for img ,ax in zip(img_arr, axes):
      ax.imshow(img)
      ax.axis('off')
    plt.tight_layout()
    plt.show()

plotImg(images)
print(lable)

## Using transfer learning:

### Build fine-tuned vgg16 model :

In [None]:
# Load the pre-trained VGG16 model without the top (output) layer
base_model = VGG16( weights='imagenet')

# Define the input shape based on the last layer of the base model
input_shape = base_model.layers[-1].output_shape[1:]

# Create a Sequential model
model = Sequential()


for layer in base_model.layers[:-1]:
  model.add(layer)

# the layers from vgg16 model shoud not be trainble
for layer in model.layers:
    layer.trainable = False


model.add(Dense(units=2,activation='linear',name='outputLayer'))


In [None]:
model.summary()