Design an object detection model using deep neural networks for simple objects.
a.	 Select appropriate dataset and perform data pre-processing 
b.	 Define architecture in terms of layers 
c.	 Evaluate Model performance Label the object with appropriate text.


In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np


In [None]:
# Correct way to load Oxford-IIIT Pet dataset
import tensorflow_datasets as tfds

dataset, info = tfds.load("oxford_iiit_pet", with_info=True, as_supervised=False)

train_data = dataset['train']
test_data = dataset['test']

# Print dataset info
print(info)




[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\Shlok Sonkusare\tensorflow_datasets\oxford_iiit_pet\4.0.0...[0m


  from .autonotebook import tqdm as notebook_tqdm
Dl Completed...: 0 url [00:00, ? url/s]
Dl Completed...:   0%|          | 0/1 [00:00<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:00<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:02<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:02<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:13<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:14<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:18<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:24<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:24<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:29<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:29<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:30<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:32<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:33<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:35<?, ? url/s]
Dl Completed...:   0%|          | 0/2 [00:37<?, ? url/s

In [None]:
IMG_SIZE = 224

def preprocess(sample):
    image = tf.image.resize(sample['image'], (IMG_SIZE, IMG_SIZE)) / 255.0
    bbox = sample['objects']['bbox'][0]  # Use only the first object per image for simplicity
    label = sample['objects']['label'][0]

    # Convert relative bbox to absolute
    ymin, xmin, ymax, xmax = bbox
    bbox = tf.stack([ymin * IMG_SIZE, xmin * IMG_SIZE, ymax * IMG_SIZE, xmax * IMG_SIZE])
    
    return image, {'bbox': bbox, 'label': label}

train_data = train_data.map(preprocess).batch(32).prefetch(tf.data.AUTOTUNE)
test_data = test_data.map(preprocess).batch(32).prefetch(tf.data.AUTOTUNE)


In [None]:
from tensorflow.keras import layers, Model

def build_model():
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)

    # Output 1: Bounding Box
    bbox_output = layers.Dense(4, name='bbox')(x)

    # Output 2: Label classification
    label_output = layers.Dense(info.features['objects']['label'].num_classes, activation='softmax', name='label')(x)

    model = Model(inputs=inputs, outputs=[bbox_output, label_output])
    return model

model = build_model()
model.summary()


In [None]:
model.compile(
    optimizer='adam',
    loss={
        'bbox': 'mse',
        'label': 'sparse_categorical_crossentropy'
    },
    metrics={
        'bbox': 'mae',
        'label': 'accuracy'
    }
)


In [None]:
history = model.fit(
    train_data,
    validation_data=test_data,
    epochs=10
)


In [None]:
model.evaluate(test_data)


In [None]:
class_names = info.features['objects']['label'].names

def draw_bbox(image, bbox, label):
    ymin, xmin, ymax, xmax = bbox
    fig, ax = plt.subplots(1)
    ax.imshow(image)

    # Draw rectangle
    rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color='red')
    ax.add_patch(rect)
    plt.text(xmin, ymin - 10, class_names[label], color='red', fontsize=12)
    plt.axis('off')
    plt.show()

# Predict on one batch
for images, targets in test_data.take(1):
    pred_bbox, pred_labels = model.predict(images)

    for i in range(3):
        img = images[i].numpy()
        bbox = pred_bbox[i]
        label = np.argmax(pred_labels[i])
        draw_bbox(img, bbox, label)
