This script loads the Caltech101 dataset (stored as JPEG images in subfolders), and then uses a pre-trained VGG16 network to obtain neural code features (from the fc2 layer) for each images (see fc2_VGG16.ipynb for more explanation). Those features are stored in pickle files, along with the corresponding class.

The dataset consists of images divided into 101 classes, as well as an extra background/clutter class with random images. We will save one pickle file for all 101 classes, and another for the background set.

In [1]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model
import os
import glob
import pickle
import numpy as np

In [2]:
# load pre-trained VGG16 from Keras
base_model = VGG16(weights="imagenet")
fc2_model = Model(inputs=base_model.input, outputs=base_model.get_layer("fc2").output)
fold_path = os.path.join("data", "caltech101")

# obtain Caltech101 filepaths, classes, and labels. Convert images to fc2 neural codes
path = os.path.join(fold_path, "classes")
data_paths = [filepath for filepath in glob.glob(os.path.join(path, "**"), recursive=True) if filepath.lower().endswith(".jpg")]
feature_size = int(fc2_model.output.shape[1])  # = 4096 for the fc2-layer of VGG16
data_classes = os.listdir(path)
n_examples = len(data_paths)
data_fc2 = np.empty((n_examples, feature_size))
data_labels = np.empty(n_examples, dtype=int)
i = 0
for y, c in enumerate(data_classes):
    print("loading class {}: {}".format(y, c))
    class_path = os.path.join(path, c)
    for filename in os.listdir(class_path):
        image_path = os.path.join(class_path, filename)
        img = image.load_img(image_path, target_size=(224, 224))
        array = image.img_to_array(img)
        x = np.expand_dims(array, axis=0)
        x = preprocess_input(x)
        fc2 = fc2_model.predict(x)
        data_fc2[i] = fc2
        data_labels[i] = y
        i += 1

# obtain the Caltech101 "BACKGROUND_Google" filepaths and fc2 neural codes.
bg_path = os.path.join(fold_path, "BACKGROUND_Google")
bg_data_paths = [os.path.join(bg_path, filepath) for filepath in os.listdir(bg_path)]
n_bg_examples = len(bg_data_paths)
bg_data_fc2 = np.empty((n_bg_examples, feature_size))
for i, image_path in enumerate(bg_data_paths):
    img = image.load_img(image_path, target_size=(224, 224))
    array = image.img_to_array(img)
    x = np.expand_dims(array, axis=0)
    x = preprocess_input(x)
    fc2 = fc2_model.predict(x)
    bg_data_fc2[i] = fc2

# save neural codes, labels, paths, and list of classes to pickle file
with open(os.path.join(fold_path,"caltech101_VGG16_fc2.p"), "wb") as f:
    pickle.dump((data_fc2, data_labels, data_paths, data_classes), f)

# save neural codes & paths of BACKGROUND_Google class to pickle file
with open(os.path.join(fold_path,"caltech101_VGG16_fc2_bg.p"), "wb") as f:
    pickle.dump((bg_data_fc2, bg_data_paths), f)

loading class 0: stapler
loading class 1: dalmatian
loading class 2: rooster
loading class 3: anchor
loading class 4: starfish
loading class 5: lamp
loading class 6: soccer_ball
loading class 7: joshua_tree
loading class 8: kangaroo
loading class 9: stop_sign
loading class 10: nautilus
loading class 11: crocodile_head
loading class 12: barrel
loading class 13: ibis
loading class 14: mandolin
loading class 15: crab
loading class 16: airplanes
loading class 17: hedgehog
loading class 18: octopus
loading class 19: camera
loading class 20: wheelchair
loading class 21: elephant
loading class 22: platypus
loading class 23: euphonium
loading class 24: cougar_body
loading class 25: menorah
loading class 26: ant
loading class 27: wrench
loading class 28: umbrella
loading class 29: dolphin
loading class 30: Faces_easy
loading class 31: Faces
loading class 32: minaret
loading class 33: hawksbill
loading class 34: sea_horse
loading class 35: panda
loading class 36: Motorbikes
loading class 37: may