<a href="https://colab.research.google.com/github/tinsaeabreham45/cat_vs_dog_inceptionv3/blob/main/Transfer_lerning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Basics of transfer learning with real dataset

### dataset is cat vs dog

In [None]:
import urllib.request
import os
import zipfile
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.optimizers import RMSprop
from shutil import copyfile

In [None]:
# dataset link
data_url = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip"

# define dataset name
data_file_name = "cats-and-dogs"
# download directory
download_dir = '/tmp'

# download the dataset using urllib.request
urllib.request.urlretrieve(data_url, data_file_name)

# extract the file on download dir
zip_ref = zipfile.ZipFile(data_file_name, 'r')
zip_ref.extractall(download_dir)
zip_ref.close()



In [None]:
# check the dataset is download

list_dir = os.listdir('/tmp/PetImages/')
print(list_dir)

['Dog', 'Cat']


In [None]:
# number of images per class

print(f"number of cat images: {len(os.listdir('/tmp/PetImages/Cat/'))}")
print(f"number of dog images: {len(os.listdir('/tmp/PetImages/Dog/'))}")

number of cat images: 12501
number of dog images: 12501


In [None]:
# create a folder

try:
    os.mkdir('/tmp/cats-v-dogs')
    os.mkdir('/tmp/cats-v-dogs/training')
    os.mkdir('/tmp/cats-v-dogs/testing')
    os.mkdir('/tmp/cats-v-dogs/training/cats')
    os.mkdir('/tmp/cats-v-dogs/training/dogs')
    os.mkdir('/tmp/cats-v-dogs/testing/cats')
    os.mkdir('/tmp/cats-v-dogs/testing/dogs')
except OSError:
    pass

## split dataset

In [None]:
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
  files = []
  for filename in os.listdir(SOURCE):
    file = SOURCE + filename
    # check if its zero length
    if os.path.getsize(file)>0:
      files.append(filename)
    else:
      print(filename + " is zero length, so ignoring")

  training_length = int(len(files) * SPLIT_SIZE)
  testing_length = int(len(files) - training_length)
  # shuffle the files order
  shuffled_set = random.sample(files, len(files))
  # create a train test dataset
  training_set = shuffled_set[0:training_length]
  testing_set = shuffled_set[-testing_length:]

  for filename in training_set:
    this_file = SOURCE + filename
    destination = TRAINING + filename
    copyfile(this_file, destination)

  for filename in testing_set:
    this_file = SOURCE + filename
    destination = TESTING + filename
    copyfile(this_file, destination)

CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)


666.jpg is zero length, so ignoring
11702.jpg is zero length, so ignoring


In [None]:
# check the length of training and test

print("Number of training cat images", len(os.listdir('/tmp/cats-v-dogs/training/cats/')))
print("Number of training dog images", len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))
print("Number of testing cat images", len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))
print("Number of testing dog images", len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))

Number of training cat images 11250
Number of training dog images 11250
Number of testing cat images 1250
Number of testing dog images 1250


## DATA Augmentation Process

In [None]:
TRAIN_DIR = '/tmp/cats-v-dogs/training/'

train_datagen = ImageDataGenerator(
    rotation_range = 35,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest'
)


train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    batch_size = 20,
    class_mode = 'binary',
    target_size = (150,150)
)

VALIDATION_DIR = "/tmp/cats-v-dogs/testing/"

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                              batch_size=100,
                                                              class_mode='binary',
                                                              target_size=(150, 150))

Found 22498 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


# I will use inception v3 model

In [None]:
weights_url = "https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5"
weights_file = "inception_v3.h5"
urllib.request.urlretrieve(weights_url, weights_file)


# initialized the model
pre_trained_model = InceptionV3(
    include_top=False,
    input_shape=(150, 150, 3),
    weights = None
)

# load pretrained model weight
pre_trained_model.load_weights(weights_file)

# freeze the layers

for layer in pre_trained_model.layers:
  layer.trainable = False

last_layer = pre_trained_model.get_layer('mixed7')

print('last layer output shape: ', last_layer.output.shape)
last_output = last_layer.output

last layer output shape:  (None, 7, 7, 768)


## create a layer

In [None]:
x = layers.Flatten()(last_output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dense(1, activation='sigmoid')(x)

model = Model(pre_trained_model.input, x)

In [None]:
# train the model

model.compile(optimizer=RMSprop(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['acc'])

In [None]:
history = model.fit(
    train_generator,
    validation_data = validation_generator,
    epochs = 2,
    verbose = 1
)

  self._warn_if_super_not_called()


Epoch 1/2
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step - acc: 0.9004 - loss: 0.2444



[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 152ms/step - acc: 0.9004 - loss: 0.2444 - val_acc: 0.9664 - val_loss: 0.0802
Epoch 2/2
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 139ms/step - acc: 0.9437 - loss: 0.1432 - val_acc: 0.9616 - val_loss: 0.0880


In [None]:
import numpy as np
from google.colab import files
from tensorflow.keras.utils import load_img, img_to_array

uploaded = files.upload()

for fn in uploaded.keys():

  # predicting images
  path = '/content/' + fn
  img = load_img(path, target_size=(150, 150))
  x = img_to_array(img)
  x /= 255
  x = np.expand_dims(x, axis=0)

  image_tensor = np.vstack([x])
  classes = model.predict(image_tensor)
  print(classes[0])
  if classes[0]>0.5:
    print(fn + " is a dog")
  else:
    print(fn + " is a cat")

Saving Cat_August_2010-4.jpg to Cat_August_2010-4.jpg


NameError: name 'model' is not defined

In [None]:
# prompt: # save the model

import tempfile

# Save the model to a temporary file
with tempfile.NamedTemporaryFile(suffix='.h5', delete=False) as temp_model_file:
    model.save(temp_model_file.name)
    model_path = temp_model_file.name

# Now you can download the saved model
from google.colab import files
files.download(model_path)






<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>