In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
import random
SEED = 12
random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
import os
from zipfile import ZipFile
WORKING_DIR = "/kaggle/working"

# extracting zip files
for zip_file in ['/kaggle/input/dogs-vs-cats/train.zip', '/kaggle/input/dogs-vs-cats/test1.zip']:
    ZipFile(zip_file, mode = "r").extractall()
    
os.listdir(WORKING_DIR)

In [None]:
import PIL

PRINT_FILE_X_DIR = 5
# exploring situation on extracted files : directories with images and their sizes
def explore_image_on_directory(data_dir):
    directories = {}
    directory_files = os.listdir(data_dir)
    for d in directory_files:
      class_dir = os.path.join(data_dir, d)
      if os.path.isdir(class_dir):
        directories[d] = []
        files = os.listdir(class_dir)
        for f in files:
          file_path = os.path.join(class_dir,f)
          if os.path.isfile(file_path) and os.path.getsize(file_path):
            directories[d].append(os.path.join(class_dir,f))
    # print analysis directories
    tot = 0
    for c in directories.keys():
      print(c, len(directories[c]))
      tot += len(directories[c])
      for f in directories[c][:PRINT_FILE_X_DIR]:
        image = PIL.Image.open(f)
        print(c, f, image.size)
    print(f"TOT:{tot}")
    
explore_image_on_directory("./")

In [None]:
TRAIN_PATH = os.path.join(WORKING_DIR, 'train')
TEST_PATH = os.path.join(WORKING_DIR, 'test1')

In [None]:
# creating Dataframe for training: category is on filename
train_filenames = os.listdir(TRAIN_PATH)
train_categories = [ f.split('.')[0] for f in train_filenames ]

train_df = pd.DataFrame({
    'image': train_filenames,
    'class': train_categories})

train_df.head()

In [None]:
# show the first images of cats and dogs
def show_images(df, images_path):
    fig, axs = plt.subplots(2, 15, figsize=(30, 4))
    for i in range(30):
        ax = axs[i // 15, i % 15]
        ax.set_axis_off()
        if i<len(df['image']):            
            image = PIL.Image.open(os.path.join(images_path, df['image'].iloc[i]))
            ax.set_title(df['class'].iloc[i])
            ax.imshow(image)
        
cats_df = train_df.loc[train_df['class'] == 'cat']
dogs_df = train_df.loc[train_df['class'] == 'dog']
show_images(cats_df, TRAIN_PATH)
show_images(dogs_df, TRAIN_PATH)


In [None]:
BATCH_SIZE = 150
IMG_WIDTH = 160
IMG_HEIGHT = 160

In [None]:
rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)
# rescaling on MobileV2 with preprocess_input
# Add our data-augmentation parameters to ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(validation_split=0.2,
                                   rotation_range = 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True
                                   )

# rescaling on MobileV2 with preprocess_input
# Note that the validation data should not be augmented!
test_datagen = ImageDataGenerator(validation_split=0.2)

# Flow training images in batches of BATCH_SIZE using train_datagen generator
train_generator = train_datagen.flow_from_dataframe(train_df,
                                                    directory=TRAIN_PATH,
                                                    x_col="image",
                                                    y_col="class",
                                                    subset="training",
                                                    batch_size = BATCH_SIZE,
                                                    class_mode = 'binary', 
                                                    target_size = (IMG_WIDTH, IMG_HEIGHT)
                                                   )

# Flow validation images in batches of BATCH_SIZE using test_datagen generator
validation_generator =  test_datagen.flow_from_dataframe(train_df,
                                                         directory=TRAIN_PATH,
                                                         x_col="image",
                                                         y_col="class",
                                                         subset="validation",
                                                         batch_size = BATCH_SIZE,
                                                         class_mode = 'binary', 
                                                         target_size = (IMG_WIDTH, IMG_HEIGHT)
                                                        )

In [None]:
# Create the base model from the pre-trained model MobileNet V2 (feature extraction without TOP layers)
base_model = tf.keras.applications.MobileNetV2(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3),
                                               include_top=False,
                                               weights='imagenet')

# first pass of transfer learning with no trainable layers
base_model.trainable = False
base_model.summary()

In [None]:
# Our model definition with our Dropout and Dense TOP layers
inputs = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs, outputs)

In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()
len(model.trainable_variables)

In [None]:
# only for test tf version
tf.test.gpu_device_name()
tf.__version__

In [None]:
# training
initial_epochs = 10
history = model.fit(train_generator,
                    epochs=initial_epochs,
                    validation_data=validation_generator,
                    batch_size=BATCH_SIZE)

In [None]:
# show loss and accuracy
def show_loss_accuracy(history):
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs_range = range(len(acc))

  plt.figure(figsize=(20, 5))
  plt.subplot(1, 2, 1)
  plt.plot(epochs_range, acc, label='Training Accuracy')
  plt.plot(epochs_range, val_acc, label='Validation Accuracy')
  plt.legend(loc='lower right')
  plt.title('Training and Validation Accuracy')
  plt.subplot(1, 2, 2)
  plt.plot(epochs_range, loss, label='Training Loss')
  plt.plot(epochs_range, val_loss, label='Validation Loss')
  plt.legend(loc='upper right')
  plt.title('Training and Validation Loss')
  plt.show()

show_loss_accuracy(history)

In [None]:
# fine tuning
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False


In [None]:
# fine tuning with low learning_rate 1/10 from base
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate/10),
              metrics=['accuracy'])
len(model.trainable_variables)

In [None]:
# running fine tuning
FINE_TUNING = True
if FINE_TUNING:
    fine_tune_epochs = 10
    total_epochs =  initial_epochs + fine_tune_epochs

    # fine tuning training 
    history_fine = model.fit(train_generator,
                             batch_size=BATCH_SIZE,
                             epochs=total_epochs,
                             initial_epoch=history.epoch[-1],
                             validation_data=validation_generator)

In [None]:
if FINE_TUNING:
    # show complete accuracy and loss
    history.history['accuracy'] += history_fine.history['accuracy']
    history.history['val_accuracy'] += history_fine.history['val_accuracy']
    history.history['loss'] += history_fine.history['loss']
    history.history['val_loss'] += history_fine.history['val_loss']

    show_loss_accuracy(history)

In [None]:
# building test dataset
test_files = os.listdir(TEST_PATH)
test_df = pd.DataFrame({'image':test_files})

test_datagen = ImageDataGenerator()
# NB. shuffle disabled on test set in order to join predictions with test_df
test_dataset = test_datagen.flow_from_dataframe(
    test_df, 
    directory=TEST_PATH, 
    x_col='image',
    y_col=None,
    class_mode=None,
    batch_size = BATCH_SIZE,
    target_size = (IMG_WIDTH, IMG_HEIGHT),
    shuffle=False
)

test_df.head()

In [None]:
# class map => { logit : class name }
class_map = { v: k for k, v in train_generator.class_indices.items() }
# predict test images set
predictions = model.predict(test_dataset)
# apply a sigmoid because our model returns logits
predictions = tf.nn.sigmoid(predictions)
predictions = tf.where(predictions < 0.5, 0, 1)
test_df['label'] = predictions.numpy()
test_df['class'] = test_df['label'].map(lambda x: class_map[x])

In [None]:
# show random test images predictions
show_images(test_df.sample(frac=1).head(30), TEST_PATH)

In [None]:
# model evaluation on test_dataset
#test_loss, test_acc = model.evaluate(test_dataset)
#print('Test Loss:', test_loss)
#print('Test Accuracy:', test_acc)

In [None]:
# submission csv creation
submission_df = test_df.copy()
submission_df['id'] = submission_df['image'].str.split('.').str[0]
submission_df['label'] = submission_df['class']
submission_df.drop(['image', 'class'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)