In [2]:
import kagglehub

# Download latest version
path = kagglehub.model_download("google/mobilenet-v2/tensorFlow2/100-224-feature-vector")

print("Path to model files:", path)

Downloading 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading from https://www.kaggle.com/api/v1/models/google/mobilenet-v2/tensorFlow2/100-224-feature-vector/2/download/variables/variables.index...


100%|██████████| 5.13k/5.13k [00:00<00:00, 1.58MB/s]

Downloading from https://www.kaggle.com/api/v1/models/google/mobilenet-v2/tensorFlow2/100-224-feature-vector/2/download/variables/variables.data-00000-of-00001...





Downloading from https://www.kaggle.com/api/v1/models/google/mobilenet-v2/tensorFlow2/100-224-feature-vector/2/download/saved_model.pb...



[A
100%|██████████| 1.24M/1.24M [00:00<00:00, 1.60MB/s]
100%|██████████| 8.65M/8.65M [00:01<00:00, 6.69MB/s]

Path to model files: /home/miguel/.cache/kagglehub/models/google/mobilenet-v2/tensorFlow2/100-224-feature-vector/2





# 1. Data Exploration and feature extraction

From the documentation we see that:
"The input images are expected to have color values in the range [0,1], following the common image input conventions. For this model, the size of the input images is fixed to height x width = 224 x 224 pixels."

In [10]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image size and scaling based on the pretrained model's requirements
SCALE = 1.0 / 255 

# Set up ImageDataGenerators with scaling and resizing
train_generator = ImageDataGenerator(rescale=SCALE)
valid_generator = ImageDataGenerator(rescale=SCALE)
test_generator = ImageDataGenerator(rescale=SCALE)

# Load images from train, validation, and test directories
trainset = train_generator.flow_from_directory(
    './train',  # Replace with the path to your training data
    target_size=(224, 224),
    batch_size=32,
    shuffle = True
)

validset = valid_generator.flow_from_directory(
    './valid',  # Replace with the path to your validation data
    target_size=(224, 224),
    batch_size=32,
    shuffle = False
)

testset = test_generator.flow_from_directory(
    './test',  # Replace with the path to your test data
    target_size=(224, 224),
    batch_size=32,
    shuffle=False
)


Found 280 images belonging to 6 classes.
Found 139 images belonging to 6 classes.
Found 50 images belonging to 6 classes.


In [43]:
img, label = trainset.next()

In [None]:
# Collect images for each category
for _ in range(5):  # Iterate to ensure we collect a few examples for each category
    images, labels = next(trainset)
    for i, label in enumerate(labels):
        category = CATEGORIES[np.argmax(label)]
        if len(samples_per_category[category]) < 3:  # Store up to 3 samples per category
            samples_per_category[category].append(images[i])

array([0., 0., 1., 0., 0., 0.], dtype=float32)

In [15]:
validset.class_indices == trainset.class_indices == testset.class_indices

True

In [31]:
trainset.labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5], dtype=int32)

In [30]:
categories = list(validset.class_indices.keys())
categories

['bike', 'car', 'motorcycle', 'other', 'truck', 'van']

In [27]:
image, label = trainset[0]

In [None]:
from matplotlib import pyplot as plt

# Plot images
fig, axes = plt.subplots(len(categories), 3, figsize=(10, 15))
fig.suptitle("Sample Images from Each Category", fontsize=16)

for i, category in enumerate(categories):
    for j in range(3):
        axes[i, j].imshow(trainset[category][j])
        axes[i, j].set_title(f"{category}")
        axes[i, j].axis('off')

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()