In [7]:
import pytesseract
import concurrent.futures
import os

In [8]:
def process_image_to_text(image_file):
    # Perform OCR on the image
    extracted_text = pytesseract.image_to_string(image_file)

    # Define the output text file path using the image file name
    text_file_path = os.path.splitext(image_file)[0] + '.txt'

    # Save the extracted text to a text file in the same directory as the image
    with open(text_file_path, 'w', encoding='utf-8') as text_file:
        text_file.write(extracted_text)

    print(f'Extracted text from image {image_file} and saved it to {text_file_path}')


In [9]:
def main():
    # Directory containing the saved images
    image_dir = 'extracted_images'

    # List of image files with '.png' extension
    image_files = [os.path.join(image_dir, image_file) for image_file in os.listdir(image_dir) if image_file.endswith('.png')]

    # Number of maximum workers
    max_workers = 12

    # Create a ThreadPoolExecutor with the specified maximum workers
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit the image processing tasks in parallel
        executor.map(process_image_to_text, image_files)

    print('Text has been extracted from the images and saved as text files in the same directory as the images.')

if __name__ == "__main__":
    main()

Extracted text from image extracted_images/image_17.png and saved it to extracted_images/image_17.txt
Extracted text from image extracted_images/image_14.png and saved it to extracted_images/image_14.txt
Extracted text from image extracted_images/image_11.png and saved it to extracted_images/image_11.txt
Extracted text from image extracted_images/image_29.png and saved it to extracted_images/image_29.txt
Extracted text from image extracted_images/image_38.png and saved it to extracted_images/image_38.txt
Extracted text from image extracted_images/image_39.png and saved it to extracted_images/image_39.txt
Extracted text from image extracted_images/image_12.png and saved it to extracted_images/image_12.txt
Extracted text from image extracted_images/image_15.png and saved it to extracted_images/image_15.txt
Extracted text from image extracted_images/image_62.png and saved it to extracted_images/image_62.txt
Extracted text from image extracted_images/image_8.png and saved it to extracted_i