In [1]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"koushalsanjaymodi","key":"08f98bce9fb8eb0c3626fc71db0aa6f7"}'}

In [2]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle competitions download -c dogs-vs-cats

Downloading dogs-vs-cats.zip to /content
 96% 779M/812M [00:08<00:00, 204MB/s]
100% 812M/812M [00:08<00:00, 95.3MB/s]


In [4]:
!unzip -qq dogs-vs-cats.zip

In [5]:
!unzip -qq train.zip

In [6]:
"""
cats_vs_dogs_small:
train: cats (1000) and dogs (1000)
val: cats (500) and dogs (500)
test: cats (1000) and dogs (1000)
"""

'\ncats_vs_dogs_small:\ntrain: cats (1000) and dogs (1000)\nval: cats (500) and dogs (500)\ntest: cats (1000) and dogs (1000)\n'

In [7]:
import os, shutil, pathlib

original_dir = pathlib.Path("train")
new_base_dir = pathlib.Path("cats_vs_dogs_small")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)

In [8]:
from tensorflow.keras import layers
from tensorflow import keras

In [9]:
from tensorflow.keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    new_base_dir / "train",
    image_size = (180, 180),
    batch_size = 32
)

validation_dataset = image_dataset_from_directory(
    new_base_dir / "validation",
    image_size = (180, 180),
    batch_size = 32
)

test_dataset = image_dataset_from_directory(
    new_base_dir / "test",
    image_size = (180, 180),
    batch_size = 32
)

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


In [10]:
conv_base = keras.applications.vgg16.VGG16(
    weights = "imagenet",
    include_top = False
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [11]:
conv_base.trainable = True
print("This is the number of trainable weights before freezing the conv base:", len(conv_base.trainable_weights))

conv_base.trainable = False
print("This is the number of trainable weights after freezing the conv base:", len(conv_base.trainable_weights))

This is the number of trainable weights before freezing the conv base: 26
This is the number of trainable weights after freezing the conv base: 0


In [12]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ]
)

In [13]:
import matplotlib.pyplot as plt

In [14]:
inputs = keras.Input(shape = (180, 180, 3))
x = data_augmentation(inputs)
x = keras.applications.vgg16.preprocess_input(x)
x = conv_base(x)
x = layers.Flatten()(x)
x = layers.Dense(256)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation= "sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(
    optimizer = "rmsprop",
    loss = "binary_crossentropy",
    metrics = ["accuracy"]
)

In [20]:
# Fine-tuning a pretrained model

# Add our custom network on top of an already-trained base network
# Freeze the base network
# Train the part we added
# Unfreeze some layers in the base network (upto block4_pool)
# Jointly train both these layers and the part we added


In [21]:
conv_base.trainable = True

for layer in conv_base.layers[:-4]:
  layer.trainable = False


model.compile(optimizer = keras.optimizers.RMSprop(learning_rate = 1e-5),
              loss = "binary_crossentropy",
              metrics = ["accuracy"])


callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath = "fine_tuning.keras",
        save_best_only = True,
        monitor = "val_loss"
    )
]

history = model.fit(
    train_dataset,
    epochs = 30,
    validation_data = validation_dataset,
    callbacks = callbacks
)

Epoch 1/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 217ms/step - accuracy: 0.9940 - loss: 0.2498 - val_accuracy: 0.9770 - val_loss: 1.8225
Epoch 2/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 173ms/step - accuracy: 0.9911 - loss: 0.4166 - val_accuracy: 0.9760 - val_loss: 2.1068
Epoch 3/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 181ms/step - accuracy: 0.9939 - loss: 0.1936 - val_accuracy: 0.9810 - val_loss: 1.6517
Epoch 4/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 174ms/step - accuracy: 0.9891 - loss: 0.2529 - val_accuracy: 0.9780 - val_loss: 2.5161
Epoch 5/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 204ms/step - accuracy: 0.9913 - loss: 0.2397 - val_accuracy: 0.9810 - val_loss: 1.9186
Epoch 6/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 169ms/step - accuracy: 0.9911 - loss: 0.4055 - val_accuracy: 0.9820 - val_loss: 1.9004
Epoch 7/30
[1m63/63[

In [23]:
model = keras.models.load_model(
    "fine_tuning.keras"
)
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 92ms/step - accuracy: 0.9786 - loss: 1.2568
Test accuracy: 0.975
