In [1]:
#Importing required libraries
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
import numpy as np
import sys

In [2]:
#configurations
MODEL_URL = "https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5" #used mobilenetv3 model instead of ViT because of compatibility issue
IMG_SIZE = 224
NUM_CLASSES = 100
BATCH_SIZE = 64
LEARNING_RATE = 0.01
NUM_EPOCHS = 1

MobileNetV3-Large (100% width, 224×224 input) — a lightweight convolutional neural network trained on ImageNet that outputs a feature vector (no classification head) for transfer learning.

Data Loading & preprocessing

In [3]:
def preprocess_data(image, label): #resizes & normalizes image data for the ViT model
  #converting image and label to float
  image = tf.cast(image, tf.float32)

  #resizing the 32X32 CIFAR-100 images to 224x224 size
  image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))

  #Normalizing the image
  image = image/255.0

  #one-hot encode the label
  label = tf.one_hot(label, NUM_CLASSES)

  return image, label

In [4]:
def load_cifar100(): #loads & prepares CIFAR-100 dataset
  #load data
  (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()

  #converting labels to 1D array
  y_train = np.squeeze(y_train)
  y_test = np.squeeze(y_test)

  #create tensorflow datasets
  train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
  test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))

  #applying preprocessing, shuffling & batching
  AUTOTUNE = tf.data.AUTOTUNE

  train_ds = train_ds.map(preprocess_data, num_parallel_calls=AUTOTUNE) #tells TensorFlow to use as many CPU threads as it thinks optimal for running this mapping in parallel.
  train_ds = train_ds.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(AUTOTUNE) #lets TensorFlow prepare the next batch while the GPU is still training on the current one.

  test_ds = test_ds.map(preprocess_data, num_parallel_calls=AUTOTUNE)
  test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

  return train_ds, test_ds

tf.data.AUTOTUNE is a TensorFlow constant that lets the tf.data pipeline automatically decide how many parallel threads to use for operations like map() and how many batches to prefetch.

Model definition & Transfer learning

In [9]:
def build_transfer_model(): #builds a keras sequential model using the ViT feature extractor from TF hub
  #load the pre-trained ViT feature extractor
  feature_extractor_layer = hub.KerasLayer(
      MODEL_URL,
      input_shape = (IMG_SIZE, IMG_SIZE, 3),
      trainable = False, #freezing the feature extraction layer
      name = 'vit_feature_extractor'
  )

  #input layer
  inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3), name='input_image')

  #wrapping the feature extractor in lambda to ensure TF graph compatibility/had to add it because of the error
  x = tf.keras.layers.Lambda(lambda img: feature_extractor_layer(img))(inputs)

  #classification head
  x = tf.keras.layers.Dropout(0.3)(x)
  outputs = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name='classification_head')(x)

  #model
  model = tf.keras.Model(inputs=inputs, outputs=outputs, name='cifar100_tl_tf_vit')

  #compile the model
  model.compile(
      optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
      loss = tf.keras.losses.CategoricalCrossentropy(),
      metrics = ['accuracy']
  )

  model.summary(print_fn=lambda x: print(x, file=sys.stderr, flush=True))
  return model

Training

In [10]:
train_ds, test_ds = load_cifar100()
model = build_transfer_model()

history = model.fit(
    train_ds,
    epochs = NUM_EPOCHS,
    validation_data = test_ds,
    verbose = 1
)

Model: "cifar100_tl_tf_vit"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_image (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lambda_2 (Lambda)               │ (None, 1280)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 1280)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ classification_head (Dense)     │ (None, 100)            │       128,100 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 128,100 (500.39 KB)
 Trainable params: 128,100 (500.39 KB)
 Non-trainable params: 0 (0.00 B)



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1800s[0m 2s/step - accuracy: 0.5225 - loss: 2.5734 - val_accuracy: 0.6398 - val_loss: 2.3893


In [11]:
loss, accuracy = model.evaluate(test_ds)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy*100:.2f}%")

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 2s/step - accuracy: 0.6359 - loss: 2.4254
Test Loss: 2.3893, Test Accuracy: 63.98%
