**Mount drive to colab**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**unzipping the dataset**

In [None]:
!unzip /content/drive/MyDrive/dataset.zip

**Installing necessary libraries**

In [None]:
!apt install tesseract-ocr
!pip install pytesseract

**import necessary libraries**

In [None]:
import os
import torch
import torchvision
from torchvision import transforms as T
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import pytesseract
from collections import defaultdict

**Loading pre existing model**

In [None]:
def get_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

# Transform function to convert image to tensor
def get_transform():
    return T.Compose([T.ToTensor()])

# Function to perform inference
def detect_objects(image, model, transform):
    image_tensor = transform(image).unsqueeze(0)
    with torch.no_grad():
        outputs = model(image_tensor)
    return outputs[0]

**Extracting text from images**

In [None]:
# Function to extract text from image using Tesseract OCR
def extract_text(image):
    return pytesseract.image_to_string(image, lang='eng')

**Adding text back to the images**

In [None]:
def add_text(image, text, position=(10, 10)):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()

    try:
        draw.text(position, text, fill="white", font=font)
    except UnicodeEncodeError:
        # Handle encoding issues by replacing problematic characters
        safe_text = text.encode('ascii', 'ignore').decode('ascii')
        draw.text(position, safe_text, fill="white", font=font)
    return image

**Performing average detection ratio**

In [None]:
def compare_detections(original_outputs, captioned_outputs, threshold=0.5):
    original_boxes = original_outputs['boxes'].cpu().numpy()
    original_scores = original_outputs['scores'].cpu().numpy()
    captioned_boxes = captioned_outputs['boxes'].cpu().numpy()
    captioned_scores = captioned_outputs['scores'].cpu().numpy()

    # Detection count consistency
    original_count = len([s for s in original_scores if s > threshold])
    captioned_count = len([s for s in captioned_scores if s > threshold])
    detection_ratio = captioned_count / original_count if original_count else 0

**Performing IOU**

In [None]:
 def iou(box1, box2):
        x1, y1, x2, y2 = box1
        x1_, y1_, x2_, y2_ = box2
        xi1, yi1, xi2, yi2 = max(x1, x1_), max(y1, y1_), min(x2, x2_), min(y2, y2_)
        inter_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0)
        box1_area = (x2 - x1) * (y2 - y1)
        box2_area = (x2_ - x1_) * (y2_ - y1_)
        union_area = box1_area + box2_area - inter_area
        return inter_area / union_area if union_area else 0

    ious = [max(iou(ob, cb) for cb in captioned_boxes) for ob in original_boxes]
    average_iou = np.mean(ious)

    return detection_ratio, average_iou

**Main function to process the dataset**

In [None]:
def main(dataset_dir):
    model = get_model()
    transform = get_transform()

    detection_ratios = []
    average_ious = []

    for image_name in os.listdir(dataset_dir):
        image_path = os.path.join(dataset_dir, image_name)
        if image_path.endswith(('.png', '.jpg', '.jpeg')):
            image = Image.open(image_path).convert("RGB")
            original_outputs = detect_objects(image, model, transform)

            # Extract text using OCR
            extracted_text = extract_text(image)

            # Add extracted text back to the image
            captioned_image = image.copy()
            captioned_image = add_text(captioned_image, extracted_text)
            captioned_outputs = detect_objects(captioned_image, model, transform)

            detection_ratio, average_iou = compare_detections(original_outputs, captioned_outputs)
            detection_ratios.append(detection_ratio)
            average_ious.append(average_iou)

    # Print the results
    print(f"Average Detection Ratio: {np.mean(detection_ratios):.2f}")
    print(f"Average IoU: {np.mean(average_ious):.2f}")

if __name__ == "__main__":
    dataset_dir = '/content/dataset'  # Update this path
    main(dataset_dir)