In [None]:
import cv2 as cv
import numpy as np

def load_alphabet(alphabet_file):
    with open(alphabet_file, 'r') as file:
        alphabet = file.read().splitlines()
    return alphabet

def detect_and_recognize_text(image_path, target_word, detection_model_path, recognition_model_path, alphabet_path, rgb=True):
    # Load the pre-trained text detection model
    detection_net = cv.dnn.readNet(detection_model_path)
    detection_model = cv.dnn_TextDetectionModel(detection_net)

    # Load the pre-trained text recognition model
    recognition_net = cv.dnn.readNet(recognition_model_path)
    recognition_model = cv.dnn_TextRecognitionModel(recognition_net)
    recognition_model.setDecodeType("CTC-greedy")
    recognition_vocab = load_alphabet(alphabet_path)
    recognition_model.setVocabulary(recognition_vocab)

    # Load the input image
    image = cv.imread(image_path)
    if rgb:
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

    # Text detection
    detections, confidences = detection_model.detectTextRectangles(image)

    # Initialize an empty list to hold recognized texts and corresponding boxes
    recognized_texts = []

    # Process detected text region
    for rect in detections:
        box = cv.boxPoints(rect)
        box = np.int0(box)

        # Extract the ROI from the image
        x_min, y_min = np.min(box[:, 0]), np.min(box[:, 1])
        x_max, y_max = np.max(box[:, 0]), np.max(box[:, 1])
        roi = image[y_min:y_max, x_min:x_max]

        # Prepare the input blob for text recognition
        blob = cv.dnn.blobFromImage(roi, scalefactor=1.0, size=(100, 32), mean=(127.5, 127.5, 127.5), swapRB=True, crop=True)
        recognition_model.setInput(blob)

        # Recognize text using the recognition model
        recognized_text = recognition_model.recognize()

        # Add the recognized text and box to the list if it contains the target word
        if target_word.lower() in recognized_text.lower():
            recognized_texts.append((recognized_text, box))

    # Draw the recognized text boxes on the image
    for text, box in recognized_texts:
        cv.drawContours(image, [box], 0, (0, 255, 0), 2)
        cv.putText(image, text, (box[0][0], box[0][1] - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv.LINE_AA)

    # Display the image with detected text rectangles
    cv.imshow('Detected Text', image)
    cv.waitKey(0)
    cv.destroyAllWindows()


image_path = '15901687_5.png'
target_word = 'figure'  # The specific word to highlight
detection_model_path = 'frozen_east_text_detection.pb'  # Update with the downloaded text detection model path
recognition_model_path = 'crnn_cs.onnx'  # Update with text recognition model path
alphabet_path = 'alphabet_94.txt'  # Update with recognition model vocabulary path
rgb = False  # Set to True if using a model that requires RGB input

detect_and_recognize_text(image_path, target_word, detection_model_path, recognition_model_path, alphabet_path, rgb)
