### features extraction

In [7]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [8]:
!kaggle competitions download -c dogs-vs-cats

Downloading dogs-vs-cats.zip to /content
 99% 802M/812M [00:09<00:00, 65.5MB/s]
100% 812M/812M [00:09<00:00, 90.7MB/s]


In [9]:
!unzip -qq dogs-vs-cats.zip

In [10]:
!unzip -qq train.zip

In [11]:
import os, shutil, pathlib

original_dir = pathlib.Path("train")
new_base_dir = pathlib.Path("cats_vs_dogs_small")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir, exist_ok=True)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)

In [20]:
from tensorflow import keras
from keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory("/content/cats_vs_dogs_small/train", image_size=(180,180), batch_size=32)
validation_dataset = image_dataset_from_directory("/content/cats_vs_dogs_small/validation", image_size=(180,180), batch_size=32)
test_dataset = image_dataset_from_directory("/content/cats_vs_dogs_small/test", image_size=(180,180), batch_size=32)

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


In [21]:
conv_base = keras.applications.vgg16.VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(180,180,3)
)

In [12]:
conv_base.summary()

In [13]:
import numpy as np

def extractfeaturesandlabels(dataset):
  allfeatures=[]
  alllabels=[]
  for images, labels in dataset:
    preprocessed_images = keras.applications.vgg16.preprocess_input(images)
    features = conv_base.predict(preprocessed_images)
    allfeatures.append(features)
    alllabels.append(labels)
  return np.concatenate(allfeatures), np.concatenate(alllabels)

train_features, train_labels = extractfeaturesandlabels(train_dataset)
val_features, val_labels = extractfeaturesandlabels(validation_dataset)
test_features, test_labels = extractfeaturesandlabels(test_dataset)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1

In [14]:
print(train_features.shape, train_labels.shape, val_features.shape, val_labels.shape, test_features.shape, test_labels.shape)

(2000, 5, 5, 512) (2000,) (1000, 5, 5, 512) (1000,) (2000, 5, 5, 512) (2000,)


In [17]:
from keras import layers
inputs = keras.Input(shape=(5,5,512))
x = layers.Flatten()(inputs)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs, outputs)
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
callbacks = [keras.callbacks.ModelCheckpoint(filepath='featureextractionwithpretrained.keras', save_best_only=True, monitor='val_loss')]
history = model.fit(train_features, train_labels, epochs=30, validation_data=(val_features, val_labels), callbacks=callbacks)

Epoch 1/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 53ms/step - accuracy: 0.8696 - loss: 12.4228 - val_accuracy: 0.9680 - val_loss: 0.9709
Epoch 2/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 61ms/step - accuracy: 0.9658 - loss: 1.3333 - val_accuracy: 0.9780 - val_loss: 0.4405
Epoch 3/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - accuracy: 0.9872 - loss: 0.4308 - val_accuracy: 0.9720 - val_loss: 0.7177
Epoch 4/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - accuracy: 0.9915 - loss: 0.1606 - val_accuracy: 0.9750 - val_loss: 0.6673
Epoch 5/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - accuracy: 0.9953 - loss: 0.1011 - val_accuracy: 0.9750 - val_loss: 0.7827
Epoch 6/30
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - accuracy: 0.9944 - loss: 0.1728 - val_accuracy: 0.9770 - val_loss: 0.8961
Epoch 7/30
[1m63/63[0m [32m━━━

In [18]:
model.evaluate(test_features, test_labels)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9693 - loss: 1.2604


[1.0073158740997314, 0.9714999794960022]

### features extraction together with data augmentation

In [22]:
conv_base.trainable = False
len(conv_base.trainable_weights)

0

In [26]:
from keras import layers

inputs = keras.Input(shape=(180,180,3))

data_augmentation = keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.2)
])

x = data_augmentation(inputs)
x = keras.applications.vgg16.preprocess_input(x)
x = conv_base(x)
x = layers.Flatten()(x)
x = layers.Dense(216, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs, outputs)

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

callbacks = [keras.callbacks.ModelCheckpoint(filepath='featureextractionwithpretrained.keras', save_best_only=True, monitor='val_loss')]

history = model.fit(train_dataset, epochs=50, validation_data=validation_dataset, callbacks=callbacks)

Epoch 1/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 213ms/step - accuracy: 0.8183 - loss: 10.2116 - val_accuracy: 0.9720 - val_loss: 0.4010
Epoch 2/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 204ms/step - accuracy: 0.9426 - loss: 0.6472 - val_accuracy: 0.9740 - val_loss: 0.1579
Epoch 3/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 197ms/step - accuracy: 0.9505 - loss: 0.3152 - val_accuracy: 0.9760 - val_loss: 0.2320
Epoch 4/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 172ms/step - accuracy: 0.9626 - loss: 0.2864 - val_accuracy: 0.9770 - val_loss: 0.1914
Epoch 5/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 171ms/step - accuracy: 0.9530 - loss: 0.3786 - val_accuracy: 0.9790 - val_loss: 0.1859
Epoch 6/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 184ms/step - accuracy: 0.9663 - loss: 0.2436 - val_accuracy: 0.9800 - val_loss: 0.1360
Epoch 7/50
[1m63/63

In [27]:
model.evaluate(test_dataset)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 107ms/step - accuracy: 0.9813 - loss: 0.2152


[0.27688685059547424, 0.9794999957084656]