In [31]:
import os
from PIL import Image
import easyocr

def process_images(image_directory, output_directory):
    """
    Process all images in the specified directory using OCR and save the extracted text to text files.

    Args:
        image_directory (str): Path to the directory containing the image files.
        output_directory (str): Path to the directory where the output text files will be saved.
    """
    # Initialize the OCR reader
    reader = easyocr.Reader(['de'])

    # Loop through all files in the directory
    for filename in os.listdir(image_directory):
        # Check if the file is an image
        if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
            # Construct the full path to the image file
            image_path = os.path.join(image_directory, filename)

            # Open the image for OCR
            image = Image.open(image_path)

            # Extract text from the image using easyocr
            result = reader.readtext(image)

            # Extracted text
            text = '\n'.join([entry[1] for entry in result])

            # Define the output path to save the extracted text
            output_path = os.path.join(output_directory, f'{os.path.splitext(filename)[0]}.txt')

            # Save the extracted text to a text file
            with open(output_path, 'w', encoding='utf-8') as file:
                file.write(text)

            # Print a message confirming the location where the text has been saved
            print(f'Text from {filename} has been saved in: {output_path}.')

# Define the path to the directory containing the image files
image_directory = '/Users/carstenvolland/code/katia-si/better-letter/text_extractor/data_images/'

# Define the path to the directory where the output text files will be saved
output_directory = '/Users/carstenvolland/code/katia-si/better-letter/raw_data/'

# Process images in the specified directory
process_images(image_directory, output_directory)


Text from Ist-oder-soll.jpeg has been saved in: /Users/carstenvolland/code/katia-si/better-letter/raw_data/Ist-oder-soll.txt.
Text from letter_arbeitsamt_internet.jpeg has been saved in: /Users/carstenvolland/code/katia-si/better-letter/raw_data/letter_arbeitsamt_internet.txt.
