In [None]:
!pip install pytesseract
!sudo apt update
!sudo apt install tesseract-ocr
!sudo apt install libtesseract-dev

In [None]:
# Printing the images from the dataset
import os
from matplotlib import pyplot as plt
from PIL import Image

def display_images_in_folder(folder_path):
    """
    Displays all images in a specified folder.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Display each image
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open and display the image
            img = Image.open(image_path)
            plt.figure()
            plt.imshow(img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error opening image '{image_file}': {e}")

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/J"
display_images_in_folder(folder_path)


In [None]:
# padded all the images in same size 300x300
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps

def display_images_in_folder(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Desired size for the padded images
    target_size = (300, 300)

    # Display each image
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, target_size, color=(255, 255, 255))

            # Display the padded image
            plt.figure()
            plt.imshow(padded_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/J"
display_images_in_folder(folder_path)

In [None]:
# enhanced the padded images
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance

def display_images_in_folder(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels and enhancing their clarity.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Desired size for the padded images
    target_size = (300, 300)

    # Display each image
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, target_size, color=(255, 255, 255))

            # Enhance the image clarity
            enhancer = ImageEnhance.Sharpness(padded_img)
            enhanced_img = enhancer.enhance(2.0)  # Increase sharpness

            # Display the enhanced image
            plt.figure()
            plt.imshow(enhanced_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/J"
display_images_in_folder(folder_path)


In [None]:
# made bounding boxes on enhnaced images
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance, ImageDraw
import pytesseract

# Ensure pytesseract is set up correctly if needed
# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

def display_images_in_folder(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels, enhancing their clarity,
    and detecting text with bounding boxes.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Desired size for the padded images
    target_size = (300, 300)

    # Display each image
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, target_size, color=(255, 255, 255))

            # Enhance the image clarity
            enhancer = ImageEnhance.Sharpness(padded_img)
            enhanced_img = enhancer.enhance(2.0)  # Increase sharpness

            # Get image dimensions
            width, height = enhanced_img.size

            # Use pytesseract to detect text and get bounding boxes
            boxes = pytesseract.image_to_boxes(enhanced_img)

            # Draw bounding boxes around detected text
            draw = ImageDraw.Draw(enhanced_img)
            for b in boxes.splitlines():
                b = b.split()
                x1, y1, x2, y2 = int(b[1]), int(b[2]), int(b[3]), int(b[4])
                # Adjust y-coordinates to match PIL's coordinate system
                y1, y2 = height - y2, height - y1
                draw.rectangle([x1, y1, x2, y2], outline="red", width=2)

            # Display the enhanced image with bounding boxes
            plt.figure()
            plt.imshow(enhanced_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/J"
display_images_in_folder(folder_path)


In [None]:
# detectd the text from the bounding boxes individually
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance, ImageDraw
import pytesseract

def display_images_in_folder(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels, enhancing their clarity,
    and detecting text with bounding boxes and overlayed text.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Desired size for the padded images
    target_size = (300, 300)

    # Display each image
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, target_size, color=(255, 255, 255))

            # Enhance the image clarity
            enhancer = ImageEnhance.Sharpness(padded_img)
            enhanced_img = enhancer.enhance(2.0)  # Increase sharpness

            # Get image dimensions
            width, height = enhanced_img.size

            # Use pytesseract to detect text and get bounding boxes
            boxes = pytesseract.image_to_boxes(enhanced_img)
            draw = ImageDraw.Draw(enhanced_img)

            # Draw bounding boxes and overlay text
            for b in boxes.splitlines():
                b = b.split()
                x1, y1, x2, y2 = int(b[1]), int(b[2]), int(b[3]), int(b[4])
                text = b[0]  # Extract the recognized character
                # Adjust y-coordinates to match PIL's coordinate system
                y1, y2 = height - y2, height - y1
                draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
                draw.text((x1, y1 - 10), text, fill="red")  # Overlay text above the bounding box

            # Display the enhanced image with bounding boxes and text
            plt.figure()
            plt.imshow(enhanced_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/J"
display_images_in_folder(folder_path)


In [None]:
# recognized and printed the whole word with the help of detected text from the bounding boxes
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance, ImageDraw
import pytesseract

def display_images_and_print_text(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels, enhancing their clarity,
    detecting text with bounding boxes, and prints the recognized text.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Desired size for the padded images
    target_size = (300, 300)

    # Store recognized text from all images
    all_text = {}

    # Process each image
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, target_size, color=(255, 255, 255))

            # Enhance the image clarity
            enhancer = ImageEnhance.Sharpness(padded_img)
            enhanced_img = enhancer.enhance(2.0)  # Increase sharpness

            # Use pytesseract to detect text
            recognized_text = pytesseract.image_to_string(enhanced_img)

            # Store recognized text for the current image
            all_text[image_file] = recognized_text.strip()

            # Get image dimensions
            width, height = enhanced_img.size

            # Use pytesseract to detect text bounding boxes
            boxes = pytesseract.image_to_boxes(enhanced_img)
            draw = ImageDraw.Draw(enhanced_img)

            # Draw bounding boxes and overlay text
            for b in boxes.splitlines():
                b = b.split()
                x1, y1, x2, y2 = int(b[1]), int(b[2]), int(b[3]), int(b[4])
                text = b[0]  # Extract the recognized character
                # Adjust y-coordinates to match PIL's coordinate system
                y1, y2 = height - y2, height - y1
                draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
                draw.text((x1, y1 - 10), text, fill="red")  # Overlay text above the bounding box

            # Display the enhanced image with bounding boxes and text
            plt.figure()
            plt.imshow(enhanced_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Print all recognized text
    print("\nRecognized Text from Images:")
    for image_file, text in all_text.items():
        print(f"\nImage: {image_file}")
        print(text)

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/J"
display_images_and_print_text(folder_path)


trying to add cnn model in it


In [None]:
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance, ImageDraw
import pytesseract
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Bidirectional, LSTM, Dense, Flatten, Input

def build_cnn_bilstm_model():
    """
    Builds a CNN-BiLSTM model for text feature extraction and recognition.
    """
    input_layer = Input(shape=(300, 300, 1))

    # CNN layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)

    # Flatten and BiLSTM layers
    x = Flatten()(x)
    x = tf.expand_dims(x, axis=1)  # Add a time dimension for LSTM
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Bidirectional(LSTM(64))(x)

    # Fully connected layer
    output_layer = Dense(1, activation='sigmoid')(x)

    # Build model
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

def preprocess_image(image):
    """
    Converts an image to grayscale and resizes it to 300x300 for CNN processing.
    """
    image = image.convert("L")  # Convert to grayscale
    image = image.resize((300, 300), Image.ANTIALIAS)  # Resize to match the model input
    image_array = np.array(image) / 255.0  # Normalize pixel values
    image_array = np.expand_dims(image_array, axis=-1)  # Add channel dimension
    return np.expand_dims(image_array, axis=0)  # Add batch dimension

def display_images_and_print_text(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels, enhancing their clarity,
    detecting text with bounding boxes, and prints the recognized text using a hybrid CNN-BiLSTM + Tesseract approach.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Load CNN-BiLSTM model
    model = build_cnn_bilstm_model()
    # Load pre-trained weights here if available (replace 'model_weights.h5' with your weights file)
    # model.load_weights('model_weights.h5')

    # Store recognized text from all images
    all_text = {}

    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, (300, 300), color=(255, 255, 255))

            # Enhance the image clarity
            enhancer = ImageEnhance.Sharpness(padded_img)
            enhanced_img = enhancer.enhance(2.0)  # Increase sharpness

            # Preprocess image for CNN-BiLSTM
            preprocessed_image = preprocess_image(enhanced_img)

            # Pass the image through the CNN-BiLSTM model (dummy prediction for now)
            # Add real prediction logic or decoding mechanism as per your requirements
            cnn_prediction = model.predict(preprocessed_image)
            cnn_text = f"Predicted CNN output: {cnn_prediction[0][0]:.2f}"  # Example output

            # Use pytesseract to detect text
            recognized_text = pytesseract.image_to_string(enhanced_img)

            # Combine Tesseract and CNN-BiLSTM predictions
            final_text = f"Tesseract: {recognized_text.strip()}\nCNN-BiLSTM: {cnn_text}"
            all_text[image_file] = final_text

            # Get image dimensions
            width, height = enhanced_img.size

            # Use pytesseract to detect text bounding boxes
            boxes = pytesseract.image_to_boxes(enhanced_img)
            draw = ImageDraw.Draw(enhanced_img)

            # Draw bounding boxes and overlay text
            for b in boxes.splitlines():
                b = b.split()
                x1, y1, x2, y2 = int(b[1]), int(b[2]), int(b[3]), int(b[4])
                text = b[0]  # Extract the recognized character
                y1, y2 = height - y2, height - y1  # Adjust y-coordinates to match PIL's coordinate system
                draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
                draw.text((x1, y1 - 10), text, fill="red")  # Overlay text above the bounding box

            # Display the enhanced image with bounding boxes and text
            plt.figure()
            plt.imshow(enhanced_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Print all recognized text
    print("\nRecognized Text from Images:")
    for image_file, text in all_text.items():
        print(f"\nImage: {image_file}")
        print(text)

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/n"
display_images_and_print_text(folder_path)


In [None]:
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance, ImageDraw
import pytesseract
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Bidirectional, LSTM, Dense, Flatten, Input, Reshape

def build_cnn_bilstm_model():
    """
    Builds a CNN-BiLSTM model for text feature extraction and recognition.
    """
    input_layer = Input(shape=(300, 300, 1))

    # CNN layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)

    # Flatten and Reshape for LSTM
    x = Flatten()(x)
    x = Reshape((-1, 128))(x)  # Reshape into (time_steps, features) for LSTM

    # BiLSTM layers
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Bidirectional(LSTM(64))(x)

    # Fully connected layer
    output_layer = Dense(1, activation='sigmoid')(x)

    # Build model
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

def preprocess_image(image):
    """
    Converts an image to grayscale and resizes it to 300x300 for CNN processing.
    """
    image = image.convert("L")  # Convert to grayscale
    image = image.resize((300, 300), Image.ANTIALIAS)  # Resize to match the model input
    image_array = np.array(image) / 255.0  # Normalize pixel values
    image_array = np.expand_dims(image_array, axis=-1)  # Add channel dimension
    return np.expand_dims(image_array, axis=0)  # Add batch dimension

def display_images_and_print_text(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels, enhancing their clarity,
    detecting text with bounding boxes, and prints the recognized text using a hybrid CNN-BiLSTM + Tesseract approach.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Load CNN-BiLSTM model
    model = build_cnn_bilstm_model()
    # Load pre-trained weights here if available (replace 'model_weights.h5' with your weights file)
    # model.load_weights('model_weights.h5')

    # Store recognized text from all images
    all_text = {}

    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, (300, 300), color=(255, 255, 255))

            # Enhance the image clarity
            enhancer = ImageEnhance.Sharpness(padded_img)
            enhanced_img = enhancer.enhance(2.0)  # Increase sharpness

            # Preprocess image for CNN-BiLSTM
            preprocessed_image = preprocess_image(enhanced_img)

            # Pass the image through the CNN-BiLSTM model (dummy prediction for now)
            # Add real prediction logic or decoding mechanism as per your requirements
            cnn_prediction = model.predict(preprocessed_image)
            cnn_text = f"Predicted CNN output: {cnn_prediction[0][0]:.2f}"  # Example output

            # Use pytesseract to detect text
            recognized_text = pytesseract.image_to_string(enhanced_img)

            # Combine Tesseract and CNN-BiLSTM predictions
            final_text = f"Tesseract: {recognized_text.strip()}\nCNN-BiLSTM: {cnn_text}"
            all_text[image_file] = final_text

            # Get image dimensions
            width, height = enhanced_img.size

            # Use pytesseract to detect text bounding boxes
            boxes = pytesseract.image_to_boxes(enhanced_img)
            draw = ImageDraw.Draw(enhanced_img)

            # Draw bounding boxes and overlay text
            for b in boxes.splitlines():
                b = b.split()
                x1, y1, x2, y2 = int(b[1]), int(b[2]), int(b[3]), int(b[4])
                text = b[0]  # Extract the recognized character
                y1, y2 = height - y2, height - y1  # Adjust y-coordinates to match PIL's coordinate system
                draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
                draw.text((x1, y1 - 10), text, fill="red")  # Overlay text above the bounding box

            # Display the enhanced image with bounding boxes and text
            plt.figure()
            plt.imshow(enhanced_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Print all recognized text
    print("\nRecognized Text from Images:")
    for image_file, text in all_text.items():
        print(f"\nImage: {image_file}")
        print(text)

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/n"
display_images_and_print_text(folder_path)


In [None]:
import os
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance, ImageDraw
import pytesseract
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Bidirectional, LSTM, Dense, Flatten, Input, Reshape

def build_cnn_bilstm_model():
    """
    Builds a CNN-BiLSTM model for text feature extraction and recognition.
    """
    input_layer = Input(shape=(300, 300, 1))

    # CNN layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)

    # Flatten and Reshape for LSTM
    x = Flatten()(x)
    x = Reshape((-1, 128))(x)  # Reshape into (time_steps, features) for LSTM

    # BiLSTM layers
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Bidirectional(LSTM(64))(x)

    # Fully connected layer
    output_layer = Dense(1, activation='sigmoid')(x)

    # Build model
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

def preprocess_image(image):
    """
    Converts an image to grayscale and resizes it to 300x300 for CNN processing.
    """
    image = image.convert("L")  # Convert to grayscale
    image = image.resize((300, 300), resample=Image.Resampling.LANCZOS)  # Use LANCZOS for resizing
    image_array = np.array(image) / 255.0  # Normalize pixel values
    image_array = np.expand_dims(image_array, axis=-1)  # Add channel dimension
    return np.expand_dims(image_array, axis=0)  # Add batch dimension

def display_images_and_print_text(folder_path):
    """
    Displays all images in a specified folder after padding them to 300x300 pixels, enhancing their clarity,
    detecting text with bounding boxes, and prints the recognized text using a hybrid CNN-BiLSTM + Tesseract approach.

    Parameters:
    folder_path (str): Path to the folder containing the images.
    """
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    # Get a list of files in the folder
    files = os.listdir(folder_path)

    # Filter image files based on common extensions
    image_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']
    image_files = [f for f in files if os.path.splitext(f)[1].lower() in image_extensions]

    if not image_files:
        print("No images found in the folder.")
        return

    # Load CNN-BiLSTM model
    model = build_cnn_bilstm_model()
    # Load pre-trained weights here if available (replace 'model_weights.h5' with your weights file)
    # model.load_weights('model_weights.h5')

    # Store recognized text from all images
    all_text = {}

    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        try:
            # Open the image
            img = Image.open(image_path)

            # Pad the image to the target size
            padded_img = ImageOps.pad(img, (300, 300), color=(255, 255, 255))

            # Enhance the image clarity
            enhancer = ImageEnhance.Sharpness(padded_img)
            enhanced_img = enhancer.enhance(2.0)  # Increase sharpness

            # Preprocess image for CNN-BiLSTM
            preprocessed_image = preprocess_image(enhanced_img)

            # Pass the image through the CNN-BiLSTM model (dummy prediction for now)
            # Add real prediction logic or decoding mechanism as per your requirements
            cnn_prediction = model.predict(preprocessed_image)
            cnn_text = f"Predicted CNN output: {cnn_prediction[0][0]:.2f}"  # Example output

            # Use pytesseract to detect text
            recognized_text = pytesseract.image_to_string(enhanced_img)

            # Combine Tesseract and CNN-BiLSTM predictions
            final_text = f"Tesseract: {recognized_text.strip()}\nCNN-BiLSTM: {cnn_text}"
            all_text[image_file] = final_text

            # Get image dimensions
            width, height = enhanced_img.size

            # Use pytesseract to detect text bounding boxes
            boxes = pytesseract.image_to_boxes(enhanced_img)
            draw = ImageDraw.Draw(enhanced_img)

            # Draw bounding boxes and overlay text
            for b in boxes.splitlines():
                b = b.split()
                x1, y1, x2, y2 = int(b[1]), int(b[2]), int(b[3]), int(b[4])
                text = b[0]  # Extract the recognized character
                y1, y2 = height - y2, height - y1  # Adjust y-coordinates to match PIL's coordinate system
                draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
                draw.text((x1, y1 - 10), text, fill="red")  # Overlay text above the bounding box

            # Display the enhanced image with bounding boxes and text
            plt.figure()
            plt.imshow(enhanced_img)
            plt.axis('off')
            plt.title(image_file)
        except Exception as e:
            print(f"Error processing image '{image_file}': {e}")

    # Print all recognized text
    print("\nRecognized Text from Images:")
    for image_file, text in all_text.items():
        print(f"\nImage: {image_file}")
        print(text)

    # Show all images
    plt.show()

# Example usage
folder_path = "/content/n"
display_images_and_print_text(folder_path)
