In [5]:
from google.colab import drive
import os
import tarfile
import numpy as np
import tensorflow as tf
from sklearn.metrics import average_precision_score

# 1. Mount Google Drive and extract dataset

try:
  drive.flush_and_unmount()
  print('Drive unmounted.')
except ValueError:
  pass

drive.mount('/content/drive', force_remount=True)

dataset_tar_path = '/content/drive/MyDrive/VOC2008/VOCtrainval_14-Jul-2008.tar'
extract_path = '/content/'

if not os.path.exists(os.path.join(extract_path, 'VOCdevkit')):
  print("Extracting dataset...")
  with tarfile.open(dataset_tar_path, 'r') as tar:
    tar.extractall(path=extract_path)
  print("Extraction complete.")
else:
  print("Dataset already extracted.")

# 2. Prepare annotations using VOC development kit guidelines

classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
num_classes = len(classes)

voc_dir = os.path.join(extract_path, 'VOCdevkit', 'VOC2008')
images_dir = os.path.join(voc_dir, 'JPEGImages')
imagesets_dir = os.path.join(voc_dir, 'ImageSets', 'Main')

def get_ids(file_path):
  ids = []
  with open(file_path, 'r') as f:
    for line in f:
      parts = line.strip().split()
      if parts:
        ids.append(parts[0])
  return set(ids)

train_ids = get_ids(os.path.join(imagesets_dir, 'aeroplane_train.txt'))
val_ids   = get_ids(os.path.join(imagesets_dir, 'aeroplane_val.txt'))

annotations = {}
for cls_idx, cls in enumerate(classes):
  for split in ['train', 'val']:
    file_path = os.path.join(imagesets_dir, f'{cls}_{split}.txt')
    with open(file_path, 'r') as f:
      for line in f:
        parts = line.strip().split()
        if len(parts) < 2:
          continue
        image_id, label = parts[0], int(parts[1])
        label = 1 if label == 1 else 0
        if image_id not in annotations:
          annotations[image_id] = [0] * num_classes
        annotations[image_id][cls_idx] = label

train_samples = []
val_samples = []
for image_id, label_vec in annotations.items():
  img_path = os.path.join(images_dir, image_id + '.jpg')
  if image_id in train_ids:
    train_samples.append((img_path, np.array(label_vec, dtype=np.float32)))
  elif image_id in val_ids:
    val_samples.append((img_path, np.array(label_vec, dtype=np.float32)))

print(f"Number of training samples: {len(train_samples)}")
print(f"Number of validation samples: {len(val_samples)}")

# 3. Create Dataset pipelines

def load_and_preprocess_image(path, label, target_size):
  image = tf.io.read_file(path)
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.resize(image, target_size)
  image = tf.cast(image, tf.float32)
  return image, label

def create_dataset(samples, target_size, batch_size=32, shuffle=True):
  paths = [s[0] for s in samples]
  labels = [s[1] for s in samples]
  ds = tf.data.Dataset.from_tensor_slices((paths, labels))
  ds = ds.map(lambda p, l: load_and_preprocess_image(p, l, target_size), num_parallel_calls=tf.data.AUTOTUNE)
  if shuffle:
    ds = ds.shuffle(buffer_size=len(paths))
  ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
  return ds

# 4. Build a transfer learning model function

def build_transfer_model(base_model_fn, input_shape, preprocess_input_fn, num_classes, fine_tune=False):
  inputs = tf.keras.Input(shape=input_shape)
  x = preprocess_input_fn(inputs)
  base_model = base_model_fn(weights='imagenet', include_top=False, input_shape=input_shape)
  base_model.trainable = False
  x = base_model(x, training=False)
  x = tf.keras.layers.GlobalAveragePooling2D()(x)
  x = tf.keras.layers.Dense(1024, activation='relu')(x)
  outputs = tf.keras.layers.Dense(num_classes, activation='sigmoid')(x)
  model = tf.keras.Model(inputs, outputs)

  if fine_tune:
    base_model.trainable = True
    for layer in base_model.layers[:-20]:
      layer.trainable = False

  return model

# 5. Define model configurations for three architectures (using input sizes per model)

model_configs = {
  'VGG16': {'base_model_fn': tf.keras.applications.VGG16, 'input_size': (224, 224), 'preprocess_fn': tf.keras.applications.vgg16.preprocess_input},
  'ResNet50': {'base_model_fn': tf.keras.applications.ResNet50, 'input_size': (224, 224), 'preprocess_fn': tf.keras.applications.resnet50.preprocess_input},
  'InceptionV3': {'base_model_fn': tf.keras.applications.InceptionV3, 'input_size': (299, 299), 'preprocess_fn': tf.keras.applications.inception_v3.preprocess_input}
}

# 6. Train each model and evaluate using average precision (VOC-style evaluation)

results = {}
epochs = 3
batch_size = 32

for model_name, config in model_configs.items():
  print(f"\nTraining model: {model_name}")
  input_size = config['input_size'] + (3,)
  preprocess_fn = config['preprocess_fn']
  base_model_fn = config['base_model_fn']

  train_ds = create_dataset(train_samples, target_size=config['input_size'], batch_size=batch_size, shuffle=True)
  val_ds = create_dataset(val_samples, target_size=config['input_size'], batch_size=batch_size, shuffle=False)

  model = build_transfer_model(base_model_fn, input_shape=input_size, preprocess_input_fn=preprocess_fn, num_classes=num_classes, fine_tune=False)

  model.compile(optimizer=tf.keras.optimizers.Adam(), loss='binary_crossentropy', metrics=['accuracy'])

  history = model.fit(train_ds, epochs=epochs, validation_data=val_ds)

  all_gt = []
  all_preds = []
  for images, labels in val_ds:
    preds = model.predict(images)
    all_gt.append(labels.numpy())
    all_preds.append(preds)
  all_gt = np.concatenate(all_gt, axis=0)
  all_preds = np.concatenate(all_preds, axis=0)

  aps = []
  print("\nAverage precision per class:")
  for i in range(num_classes):
    ap = average_precision_score(all_gt[:, i], all_preds[:, i])
    aps.append(ap)
    print(f"{classes[i]}: {ap:.4f}")
  mAP = np.mean(aps)
  print(f"mAP for {model_name}: {mAP:.4f}")

  results[model_name] = {'mAP': mAP}

# 7. Summary Comparison of Models

print("\nFinal mAP Comparison for the three architectures:")
for model_name, metrics in results.items():
  print(f"{model_name}: mAP = {metrics['mAP']:.4f}")

Drive unmounted.
Mounted at /content/drive
Dataset already extracted.
Number of training samples: 2111
Number of validation samples: 2221

Training model: VGG16
Epoch 1/3
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 502ms/step - accuracy: 0.4090 - loss: 0.4192 - val_accuracy: 0.6425 - val_loss: 0.1113
Epoch 2/3
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 367ms/step - accuracy: 0.7103 - loss: 0.0703 - val_accuracy: 0.6385 - val_loss: 0.1044
Epoch 3/3
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 498ms/step - accuracy: 0.7635 - loss: 0.0413 - val_accuracy: 0.6533 - val_loss: 0.1027
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 820ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 220ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m