This script loads the Caltech101 dataset (stored as JPEG images in subfolders), and then uses a pre-trained VGG16 network to obtain neural code features (from the fc2 layer) for each images (see fc2_VGG16.ipynb for more explanation). Those features are stored in pickle files, along with the corresponding class.

The dataset consists of images divided into 101 classes, as well as an extra background/clutter class with random images. We will save one pickle file for all 101 classes, and another for the background set.

In [0]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model
import os
import glob
import pickle
import numpy as np

In [0]:
# mount the data needed to drive folder so we can use them in colab, see the data download link in Practical 4a.1
from google.colab import drive
!mkdir drive
drive.mount('drive')

mkdir: cannot create directory ‘drive’: File exists
Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


In [0]:
#  list all the data in your drive folder to see if mount successfully.
!ls "drive/My Drive/"

data_DL_practical  DL_practical  models


In [0]:
# load pre-trained VGG16 from Keras
base_model = VGG16(weights="imagenet")
fc2_model = Model(inputs=base_model.input, outputs=base_model.get_layer("fc2").output)
fold_path=os.path.join("drive","My Drive","data_DL_practical", "caltech101")

# obtain Caltech101 filepaths, classes, and labels. Convert images to fc2 neural codes
path = os.path.join(fold_path, "classes")
data_paths = [filepath for filepath in glob.glob(os.path.join(path, "**"), recursive=True) if filepath.lower().endswith(".jpg")]
feature_size = int(fc2_model.output.shape[1])  # = 4096 for the fc2-layer of VGG16
data_classes = os.listdir(path)
n_examples = len(data_paths)
data_fc2 = np.empty((n_examples, feature_size))
data_labels = np.empty(n_examples, dtype=int)
i = 0
for y, c in enumerate(data_classes):
    print("loading class {}: {}".format(y, c))
    class_path = os.path.join(path, c)
    for filename in os.listdir(class_path):
        image_path = os.path.join(class_path, filename)
        img = image.load_img(image_path, target_size=(224, 224))
        array = image.img_to_array(img)
        x = np.expand_dims(array, axis=0)
        x = preprocess_input(x)
        fc2 = fc2_model.predict(x)
        data_fc2[i] = fc2
        data_labels[i] = y
        i += 1

# obtain the Caltech101 "BACKGROUND_Google" filepaths and fc2 neural codes.
bg_path = os.path.join(fold_path, "BACKGROUND_Google")
bg_data_paths = [os.path.join(bg_path, filepath) for filepath in os.listdir(bg_path)]
n_bg_examples = len(bg_data_paths)
bg_data_fc2 = np.empty((n_bg_examples, feature_size))
for i, image_path in enumerate(bg_data_paths):
    img = image.load_img(image_path, target_size=(224, 224))
    array = image.img_to_array(img)
    x = np.expand_dims(array, axis=0)
    x = preprocess_input(x)
    fc2 = fc2_model.predict(x)
    bg_data_fc2[i] = fc2

# save neural codes, labels, paths, and list of classes to pickle file
with open(os.path.join(fold_path,"caltech101_VGG16_fc2.p"), "wb") as f:
    pickle.dump((data_fc2, data_labels, data_paths, data_classes), f)

# save neural codes & paths of BACKGROUND_Google class to pickle file
with open(os.path.join(fold_path,"caltech101_VGG16_fc2_bg.p"), "wb") as f:
    pickle.dump((bg_data_fc2, bg_data_paths), f)

loading class 0: stapler
loading class 1: binocular
loading class 2: Faces
loading class 3: strawberry
loading class 4: pizza
loading class 5: crayfish
loading class 6: mayfly
loading class 7: ewer
loading class 8: trilobite
loading class 9: Motorbikes
loading class 10: electric_guitar
loading class 11: emu
loading class 12: sunflower
loading class 13: brontosaurus
loading class 14: watch
loading class 15: cup
loading class 16: rhino
loading class 17: tick
loading class 18: chair
loading class 19: barrel
loading class 20: headphone
loading class 21: wheelchair
loading class 22: kangaroo
loading class 23: hedgehog
loading class 24: dalmatian
loading class 25: nautilus
loading class 26: metronome
loading class 27: dolphin
loading class 28: Leopards
loading class 29: laptop
loading class 30: beaver
loading class 31: mandolin
loading class 32: ferry
loading class 33: elephant
loading class 34: wrench
loading class 35: octopus
loading class 36: anchor
loading class 37: rooster
loading class