In [1]:
import numpy as np
import cv2
import keras_ocr
from transformers import AutoImageProcessor, AutoModelForObjectDetection
from PIL import Image
import torch
import os
from collections import defaultdict

In [2]:
image_processor = AutoImageProcessor.from_pretrained("microsoft/conditional-detr-resnet-50")
model = AutoModelForObjectDetection.from_pretrained("microsoft/conditional-detr-resnet-50")

In [3]:
import keras_ocr
pipeline = keras_ocr.pipeline.Pipeline()

Looking for C:\Users\rohit\.keras-ocr\craft_mlt_25k.h5
Looking for C:\Users\rohit\.keras-ocr\crnn_kurapan.h5


In [4]:
def preprocess_image(image):
    """Preprocess the image to enhance text detection."""
    image = cv2.bilateralFilter(image, 5, 55, 60)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, image = cv2.threshold(image, 240, 255, cv2.THRESH_BINARY)
    return image

In [5]:
def midpoint(x1, y1, x2, y2):
    """Calculate the midpoint between two points."""
    return int((x1 + x2) / 2), int((y1 + y2) / 2)

In [6]:
def inpaint_text(img_path, pipeline):
    """Inpaint the text in the image based on OCR detection."""
    img = cv2.imread(img_path)
    preprocessed_img = preprocess_image(img)
    # Convert the preprocessed image back to three channels before passing it to OCR
    preprocessed_img_color = cv2.cvtColor(preprocessed_img, cv2.COLOR_GRAY2BGR)
    prediction_groups = pipeline.recognize([preprocessed_img_color])
    mask = np.zeros(img.shape[:2], dtype="uint8")
    
    for box in prediction_groups[0]:
        pts = np.array([box[1]], dtype=np.int32)
        cv2.fillPoly(mask, pts, (255, 255, 255))
    
    inpainted_img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
    return inpainted_img

In [7]:
def perform_object_detection(image, image_processor, model):
    """Perform object detection on the given image."""
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    inputs = image_processor(images=image_pil, return_tensors="pt", input_data_format="channels_last")
    outputs = model(**inputs)
    target_sizes = torch.tensor([image_pil.size[::-1]])
    results = image_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    return results

In [8]:
def save_image(image, output_folder, original_filename):
    """Save the inpainted image to the specified output folder."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    output_path = os.path.join(output_folder, f"inpainted_{original_filename}")
    cv2.imwrite(output_path, image)

In [9]:
def process_images(img_folder, output_file_path, output_img_folder):
    """Process images in the folder for object detection after inpainting text and save them."""
    with open(output_file_path, "w") as output_file:
        for filename in os.listdir(img_folder):
            if filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg"):
                img_path = os.path.join(img_folder, filename)
                inpainted_img = inpaint_text(img_path, pipeline)
                results = perform_object_detection(inpainted_img, image_processor, model)

                save_image(inpainted_img, output_img_folder, filename)

                output_file.write(f"Image - {img_path} :\n")
                for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
                    box = [round(i, 2) for i in box.tolist()]
                    output_file.write(f"Detected {model.config.id2label[label.item()]} with confidence {round(score.item(), 3)} at location {box}\n")
                output_file.write("\n")

    print(f"Detection results saved to {output_file_path}")
    print(f"Inpainted images saved to {output_img_folder}")

In [10]:
# Directory containing images
img_folder = r"C:\Users\rohit\Desktop\hate\hateful_memes\img\train\hateful"

# Output file path for detection results
output_file_path = r"C:\Users\rohit\Desktop\hate\hateful_memes\Caption Impact Assessment\Object Detection and Caption Impact Assesssment\objectDetectionWithoutText.txt"

# Output folder for saving inpainted images
output_img_folder = "imagesWithTextRemoved"

process_images(img_folder, output_file_path, output_img_folder)

Detection results saved to C:\Users\rohit\Desktop\hate\hateful_memes\Caption Impact Assessment\Object Detection and Caption Impact Assesssment\objectDetectionWithoutText.txt
Inpainted images saved to imagesWithTextRemoved
