In [None]:
%pip install tensorflow tensorflow-hub tensorflow-datasets matplotlib

In [None]:
# Import necessary libraries
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2
from PIL import Image
import requests
from io import BytesIO

print("TensorFlow version:", tf.__version__)
print("TensorFlow Hub version:", hub.__version__)

In [None]:
def load_tiny_coco():
    """
    Loads a very small portion of COCO dataset using direct slicing.
    This approach is simpler and more reliable than trying to configure the download size.
    """
    # Load just the first 50 training examples and 20 validation examples
    train_ds, ds_info = tfds.load(
        'coco/2017',
        split='train[:50]',  # Take only first 50 training examples
        with_info=True,
        shuffle_files=True
    )
    
    val_ds = tfds.load(
        'coco/2017',
        split='validation[:20]',  # Take only first 20 validation examples
        shuffle_files=True
    )
    
    print("Dataset loaded successfully!")
    return train_ds, val_ds, ds_info

# Load our tiny datasets
print("Loading tiny subset of COCO dataset...")
train_dataset, validation_dataset, dataset_info = load_tiny_coco()

# Get class names
class_names = dataset_info.features['objects']['label'].names

In [None]:
def display_sample_images(dataset, num_images=2):
    """
    Displays sample images from our dataset with their bounding boxes and labels.
    This helps us verify that we've loaded the data correctly.
    """
    for i, example in enumerate(dataset.take(num_images)):
        # Get and convert the image
        image = example["image"].numpy()
        
        # Create a new figure for this image
        plt.figure(figsize=(10, 10))
        plt.imshow(image)
        plt.title(f"Sample image {i+1}")
        
        # Draw each object's bounding box and label
        for box, label in zip(example["objects"]["bbox"], example["objects"]["label"]):
            # Get coordinates and convert from relative to absolute
            ymin, xmin, ymax, xmax = box.numpy()
            height, width = image.shape[0:2]
            
            # Create and add the rectangle
            rect = patches.Rectangle(
                (xmin * width, ymin * height),
                (xmax - xmin) * width,
                (ymax - ymin) * height,
                linewidth=2,
                edgecolor='r',
                facecolor='none'
            )
            plt.gca().add_patch(rect)
            
            # Add the label
            class_name = class_names[label.numpy()]
            plt.text(
                xmin * width,
                ymin * height - 5,
                class_name,
                color='red',
                bbox=dict(facecolor='white', alpha=0.7)
            )
        plt.show()

# Display some examples to verify our data loading worked
print("\nDisplaying sample images from our tiny subset:")
display_sample_images(train_dataset)

In [None]:
#Load a pre-trained object detection model
detector = hub.load("https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2")

In [None]:
# Run Detector and Visualize
def run_detector_and_visualize(example):
    image = example["image"]
    ground_truth_boxes = example["objects"]["bbox"]

    # Preprocess and run detection
    converted_img = tf.image.convert_image_dtype(image, tf.uint8)[tf.newaxis, ...]
    result = detector(converted_img)
    result = {key: value.numpy() for key, value in result.items()}

    # Visualize results (with ground truth for comparison)
    plt.figure(figsize=(10, 7))
    plt.imshow(image)

    # Ground truth boxes (VOC format is [xmin, ymin, xmax, ymax])
    for box in ground_truth_boxes:
        ymin, xmin, ymax, xmax = box
        rect = patches.Rectangle((xmin * image.shape[1], ymin * image.shape[0]),
                                (xmax - xmin) * image.shape[1], (ymax - ymin) * image.shape[0],
                                linewidth=1, edgecolor='g', facecolor='none', label='Ground Truth')
        plt.gca().add_patch(rect)

    # Predicted boxes
    for i, score in enumerate(result['detection_scores'][0]):
        if score > 0.5:  # Confidence threshold
            ymin, xmin, ymax, xmax = result['detection_boxes'][0][i]
            class_id = int(result['detection_classes'][0][i])

            # Handle invalid class IDs (classes outside the VOC dataset)
            if class_id < len(class_names):
                label = class_names[class_id]

            rect = patches.Rectangle((xmin * image.shape[1], ymin * image.shape[0]),
                                    (xmax - xmin) * image.shape[1], (ymax - ymin) * image.shape[0],
                                    linewidth=1, edgecolor='r', facecolor='none', label='Predicted')
            plt.gca().add_patch(rect)

            # Moved plt.text to the correct loop for the predicted box
            plt.text(xmin * image.shape[1], ymin * image.shape[0] - 5, f'{label}: {score:.2f}', color='white', backgroundcolor='r')

    plt.legend()
    plt.show()





In [None]:
# take a few examples from the training set
for example in train_dataset.take(2):  # Process 2 images
    run_detector_and_visualize(example)

In [None]:
print("\nProcessing sample images from the dataset:")
for i, example in enumerate(train_dataset.take(3)):
    print(f"\nSample image {i+1}")
    image = example['image'].numpy()

    # Convert image to the correct format for the detector
    converted_img = tf.image.convert_image_dtype(image, tf.uint8)[tf.newaxis, ...]

    # Run detector on the image
    detections = detector(converted_img)

    # Convert the detections to numpy for plotting
    result = {key: value.numpy() for key, value in detections.items()}

    # Plot the detections
    plt.figure(figsize=(10, 10))
    plt.imshow(image)

    # Draw boxes for detections with confidence > 0.5
    for i, score in enumerate(result['detection_scores'][0]):
        if score > 0.5:
            ymin, xmin, ymax, xmax = result['detection_boxes'][0][i]
            class_id = int(result['detection_classes'][0][i])

            # Convert normalized coordinates to pixel coordinates
            h, w = image.shape[0:2]
            box = [xmin * w, ymin * h, (xmax - xmin) * w, (ymax - ymin) * h]

            # Draw rectangle
            rect = patches.Rectangle(
                (box[0], box[1]), box[2], box[3],
                linewidth=2, edgecolor='r', facecolor='none'
            )
            plt.gca().add_patch(rect)

            # Add label
            if class_id < len(class_names):
                plt.text(box[0], box[1]-5,
                        f'{class_names[class_id]}: {score:.2f}',
                        color='red',
                        bbox=dict(facecolor='white', alpha=0.8))

    plt.axis('off')
    plt.show()

In [None]:
def evaluate_model_performance(dataset, detector, iou_threshold=0.5):
    """
    Enhanced evaluation function that uses a more sophisticated detection strategy
    to improve both precision and recall.
    """
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    
    # Track performance metrics for analysis
    detection_stats = {
        'confidence_scores': [],
        'iou_values': [],
        'image_sizes': []
    }

    for example in dataset:
        # Prepare image with proper preprocessing
        image = example["image"].numpy()
        gt_boxes = example["objects"]["bbox"].numpy()
        gt_labels = example["objects"]["label"].numpy()

        # Store image size for analysis
        detection_stats['image_sizes'].append(image.shape[:2])

        # Ensure proper image formatting
        if image.dtype != np.uint8:
            image = tf.clip_by_value(image, 0, 255)
            image = tf.cast(image, tf.uint8)
        
        # Add batch dimension and normalize
        image_tensor = tf.expand_dims(image, 0)

        # Run detection with error handling
        try:
            result = detector(image_tensor)
            result = {key: value.numpy() for key, value in result.items()}
        except Exception as e:
            print(f"Detection failed: {e}")
            continue

        # Get predictions and apply dynamic confidence thresholding
        boxes = result['detection_boxes'][0]
        scores = result['detection_scores'][0]
        classes = result['detection_classes'][0].astype(int)

        # Calculate dynamic confidence threshold based on score distribution
        if len(scores) > 0:
            # Use a more sophisticated threshold based on score distribution
            score_mean = np.mean(scores)
            score_std = np.std(scores)
            dynamic_threshold = min(0.45, max(0.3, score_mean - score_std))
        else:
            dynamic_threshold = 0.45

        # Filter predictions
        mask = scores >= dynamic_threshold
        boxes = boxes[mask]
        scores = scores[mask]
        classes = classes[mask]

        # Store confidence scores for analysis
        detection_stats['confidence_scores'].extend(scores)

        # Track matched ground truth boxes
        matched_gt = set()

        # Sort predictions by confidence for better matching
        sort_idx = np.argsort(scores)[::-1]
        boxes = boxes[sort_idx]
        scores = scores[sort_idx]
        classes = classes[sort_idx]

        # Compare predictions with ground truth
        for i, (box, score, pred_class) in enumerate(zip(boxes, scores, classes)):
            best_iou = 0
            best_gt_idx = -1
            
            # Find best matching ground truth box
            for j, gt_box in enumerate(gt_boxes):
                if j not in matched_gt:
                    iou = calculate_iou(gt_box, box)
                    if iou > best_iou:
                        best_iou = iou
                        best_gt_idx = j

            # Store IoU values for analysis
            detection_stats['iou_values'].append(best_iou)

            # Classify the detection using adaptive thresholding
            if best_iou > iou_threshold and best_gt_idx >= 0:
                if pred_class == gt_labels[best_gt_idx] + 1:
                    true_positives += 1
                    matched_gt.add(best_gt_idx)
                else:
                    false_positives += 1
            else:
                false_positives += 1

        false_negatives += len(gt_boxes) - len(matched_gt)

    # Calculate metrics
    precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0

    # Print detailed performance analysis
    print("\nDetailed Performance Analysis:")
    print(f"IoU Threshold: {iou_threshold:.2f}")
    print(f"\nDetection Counts:")
    print(f"True Positives: {true_positives} (correctly identified objects)")
    print(f"False Positives: {false_positives} (incorrect detections)")
    print(f"False Negatives: {false_negatives} (missed objects)")
    
    print(f"\nPerformance Metrics:")
    print(f"Precision: {precision:.2f} (accuracy of positive predictions)")
    print(f"Recall: {recall:.2f} (proportion of actual objects detected)")
    print(f"F1 Score: {f1:.2f} (balanced measure of precision and recall)")

    # Analyze detection statistics if we have data
    if detection_stats['confidence_scores']:
        mean_confidence = np.mean(detection_stats['confidence_scores'])
        mean_iou = np.mean(detection_stats['iou_values'])
        print(f"\nDetection Statistics:")
        print(f"Average Confidence Score: {mean_confidence:.2f}")
        print(f"Average IoU Score: {mean_iou:.2f}")

    return precision, recall, f1, detection_stats

def calculate_iou(box1, box2):
    """
    Calculates the Intersection over Union between two bounding boxes.
    Box format: [ymin, xmin, ymax, xmax]
    """
    # Calculate intersection coordinates
    y1 = max(box1[0], box2[0])
    x1 = max(box1[1], box2[1])
    y2 = min(box1[2], box2[2])
    x2 = min(box1[3], box2[3])
    
    # Calculate areas
    intersection_area = max(0, y2 - y1) * max(0, x2 - x1)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - intersection_area
    
    # Calculate IoU
    iou = intersection_area / union_area if union_area > 0 else 0
    return iou

# Evaluate model performance
print("Evaluating model performance...")
evaluate_model_performance(validation_dataset, detector)
