# GUI

In [1]:
import os
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from openpyxl import load_workbook
import fitz
from PIL import Image, ImageEnhance
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from tqdm import tqdm
import re
import time
import io
import pandas as pd
from openpyxl import load_workbook
from datetime import datetime
from openpyxl.styles import PatternFill
from openpyxl.utils import get_column_letter
from openpyxl.styles import PatternFill
import re

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_month_from_week(year, week):
    # Calculate the first day of the week using the year and week number
    first_day_of_week = datetime.strptime(f'{year}-W{int(week)}-1', "%Y-W%W-%w")
    return first_day_of_week.strftime("%B")

In [3]:
def is_valid_time(time_str):
    """Check if the given time string is a valid time in the format HH:MM."""
    return bool(re.match(r'^([01]\d|2[0-3]):([0-5]\d)$', time_str))

In [5]:
def process_time_text(text):
    """
    Process and format recognized text to match time patterns.
    
    Parameters:
    text (str): The recognized text.
    
    Returns:
    str: The text formatted as a valid time or None if invalid.
    """
    # Replace common misinterpreted characters with colon
    normalized_text = re.sub(r'[ \-%,.]', ':', text)
    
    # Remove any characters that are not digits or colon
    normalized_text = re.sub(r'[^\d:]', '', normalized_text)

    
   # Handle specific case like "09." -> "09:00"
    if re.match(r'^\d{1,2}:$', normalized_text):
        normalized_text = normalized_text.zfill(3) + "00"
    
    # Handle extra digit scenarios like "515. 5" -> "05:15"
    digits_only = re.sub(r'[^\d]', '', normalized_text)
    
    if len(digits_only) == 5:
        # Assume first digit is an extra and reformat as HHMM
        normalized_text = f"0{digits_only[1:3]}:{digits_only[3:5]}"
    elif len(digits_only) == 4:
        # Assume standard HHMM format
        normalized_text = f"{digits_only[:2]}:{digits_only[2:4]}"
    elif len(digits_only) == 3:
        # Assume HMM format and add leading zero to the hour
        normalized_text = f"0{digits_only[0]}:{digits_only[1:3]}"
    
    # If there is already a colon and the pattern matches H:MM or HH:MM
    if re.match(r'^\d{1,2}:\d{2}$', normalized_text):
        parts = normalized_text.split(':')
        hour = parts[0].zfill(2)
        minute = parts[1]
        # Handle the specific case of "60" minutes
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    
    # Handle other cases by attempting to split into hour and minute
    parts = re.findall(r'\d+', normalized_text)
    if len(parts) >= 2:
        hour = parts[0].zfill(2)
        minute = parts[1]
        if len(minute) == 1:
            minute = minute + '0'
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    
    # If format is unexpected, return as is
    return text.strip()

def recognize_number_in_bbox(pdf_path, bbox_rect, meaning):
    """
    Recognize the number or time in a specific bounding box from a PDF page using TrOCR.

    Parameters:
    pdf_path (str): Path to the PDF file.
    bbox_rect (tuple): A tuple defining the bounding box (x0, y0, x1, y1).
    meaning (str): The meaning or label of the bounding box (e.g., "Start Time", "End Time").

    Returns:
    str: The recognized number or time, formatted appropriately, or an empty string if the text is invalid.
    """
    
    # Open the PDF document
    pdf_document = fitz.open(pdf_path)

    # Load the single page (assuming a single-page PDF)
    page = pdf_document.load_page(0)

    # Set the crop box to the bounding box
    rect = fitz.Rect(*bbox_rect)
    page.set_cropbox(rect)

    # Render the cropped page to an image
    pix = page.get_pixmap()
    img = Image.open(io.BytesIO(pix.tobytes())).convert("RGB")

    # Optional Preprocessing Steps
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(2)

    # Perform OCR using TrOCR
    pixel_values = processor(images=img, return_tensors="pt").pixel_values
    #generated_ids = model.generate(pixel_values)
    generated_ids = model.generate(pixel_values, max_new_tokens=20)
    extract_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    
    print("Extracted Text: ", extract_text)
    
    extract_text = re.sub(r'g', '9', extract_text) #g with 9

    # Close the PDF document
    pdf_document.close()
    

    # Check if the extracted text is just noise (e.g., too long, non-numeric)
    if len(extract_text) > 6 or not re.search(r'\d', extract_text):
        return ""

    # Apply post-processing logic based on the meaning
    if meaning != "kalenderwoche":
        extract_text = process_time_text(extract_text)
        if extract_text == "00:00":
            return ""

    return extract_text

def fill_excel_from_dict(path, data_dict, progress_callback):
    workbook = load_workbook(path)
    sheet = workbook["01"]
    year = sheet['D3'].value
    kalenderwoche = int(data_dict.get("kalenderwoche", "").strip('.'))
    month = get_month_from_week(year, kalenderwoche)
    sheet_number = {
        "January": "01",
        "February": "02",
        "March": "03",
        "April": "04",
        "May": "05",
        "June": "06",
        "July": "07",
        "August": "08",
        "September": "09",
        "October": "10",
        "November": "11",
        "December": "12"
    }.get(month, "04")
    sheet_name = f"{sheet_number}"
    sheet = workbook[sheet_name]
    target_row = None
    for row in sheet.iter_rows(min_col=1, max_col=1):
        if row[0].value == kalenderwoche:
            target_row = row[0].row
            break
    if target_row is None:
        messagebox.showerror("Error", f"Kalenderwoche {kalenderwoche} cannot be found in column A.")
        return
    base_row = target_row
    red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
    yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
    columns = {
        "start": "D",
        "ende": "K",
        "pause_1_start": "E",
        "pause_1_ende": "F",
        "pause_2_start": "G",
        "pause_2_ende": "H",
    }
    days = ["montag", "dienstag", "mittwoch", "donnerstag", "freitag", "samstag", "sonntag"]
    for i, day in enumerate(days):
        row = base_row + i
        for key_suffix, col in columns.items():
            key = f"{key_suffix}_{day}"
            time_value = data_dict.get(key, "")
            cell = sheet[f"{col}{row}"]
            if time_value or time_value == "":
                fill_color = red_fill if time_value and not is_valid_time(time_value) else yellow_fill
                for merged_range in sheet.merged_cells.ranges:
                    if cell.coordinate in merged_range:
                        top_left_cell = sheet.cell(row=merged_range.min_row, column=merged_range.min_col)
                        top_left_cell.value = time_value
                        top_left_cell.fill = fill_color
                        break
                else:
                    cell.value = time_value
                    cell.fill = fill_color
        progress_callback(i + 1, len(days))
    output_path = path.replace(".xlsm", "_modified.xlsx")
    workbook.save(output_path)
    return output_path

In [None]:
# Initialize the OCR model and processor
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')

In [6]:
def process_files(progress_callback):
    folder_path = os.getcwd()
    pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
    if not pdf_files:
        messagebox.showinfo("No PDFs found", "No PDF files found in the folder.")
        return
    pdf_path = os.path.join(folder_path, pdf_files[0])
    data_dict = {}  # Implement your logic to populate this dictionary
    
    # Assume bounding_boxes is defined
    bounding_boxes = {
        "kalenderwoche": (450, 700, 700, 770),
        "start_montag": (510, 1160, 730, 1250) ,
        "ende_montag": (510, 1270, 730, 1370),
        "start_dienstag": (510, 1380, 730, 1480),
        "ende_dienstag": (510, 1490, 730, 1580), 
        "start_mittwoch": (510, 1600, 730, 1690),
        "ende_mittwoch": (510, 1710, 730, 1800), 
        "start_donnerstag": (510, 1815, 730, 1915),
        "ende_donnerstag": (510, 1925, 730, 2020), 
        "start_freitag": (510, 2038, 730, 2130),
        "ende_freitag": (510, 2145, 730, 2240),
        "start_samstag": (510, 2260, 730, 2350),
        "ende_samstag":(510, 2365, 730, 2465),

        "pause_1_start_montag": (810, 1160, 980, 1250),
        "pause_1_ende_montag": (810, 1270, 980, 1370),
        "pause_1_start_dienstag": (810, 1380, 980, 1480),
        "pause_1_ende_dienstag": (810, 1490, 980, 1580),
        "pause_1_start_mittwoch": (810, 1600, 980, 1690),
        "pause_1_ende_mittwoch": (810, 1710, 980, 1800),
        "pause_1_start_donnerstag": (810, 1815, 980, 1915),
        "pause_1_ende_donnerstag": (810, 1925, 980, 2020),
        "pause_1_start_freitag": (810, 2038, 980, 2130),
        "pause_1_ende_freitag": (810, 2145, 980, 2240),
        "pause_1_start_samstag": (810, 2260, 980, 2350),
        "pause_1_ende_samstag": (810, 2365, 980, 2465),

        "pause_2_start_montag": (1055, 1160, 1210, 1250),
        "pause_2_ende_montag": (1055, 1270, 1210, 1370),
        "pause_2_start_dienstag": (1055, 1380, 1210, 1480),
        "pause_2_ende_dienstag": (1055, 1490, 1210, 1580),
        "pause_2_start_mittwoch": (1055, 1600, 1210, 1690),
        "pause_2_ende_mittwoch": (1055, 1710, 1210, 1800),
        "pause_2_start_donnerstag": (1055, 1815, 1210, 1915),
        "pause_2_ende_donnerstag": (1055, 1925, 1210, 2020),
        "pause_2_start_freitag": (1055, 2038, 1210, 2130),
        "pause_2_ende_freitag": (1055, 2145, 1210, 2240),
        "pause_2_start_samstag": (1055, 2260, 1210, 2350),
        "pause_2_ende_samstag": (1055, 2365, 1210, 2465)
    }
    
    for meaning, bbox in bounding_boxes.items():
        recognized_number = recognize_number_in_bbox(pdf_path, bbox, meaning)
        data_dict[meaning] = recognized_number
        print(f"Extracted {meaning}: {recognized_number}") 
    excel_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsm')]
    if not excel_files:
        messagebox.showinfo("No Excel files found", "No Excel files found in the folder.")
        return
    excel_path = os.path.join(folder_path, excel_files[0])
    output_path = fill_excel_from_dict(excel_path, data_dict, progress_callback)
    return output_path

def run():
    root = tk.Tk()
    root.title("PDF to Excel Processor")
    root.geometry("400x200")

    label = tk.Label(root, text="Processing PDF and Excel files...", font=("Arial", 12))
    label.pack(pady=10)

    progress = ttk.Progressbar(root, orient="horizontal", length=300, mode="determinate")
    progress.pack(pady=20)

    def progress_callback(current, total):
        progress['value'] = (current / total) * 100
        root.update_idletasks()

    def start_process():
        label.config(text="Starting process...")
        root.update_idletasks()
        time.sleep(1)
        output_path = process_files(progress_callback)
        if output_path:
            label.config(text=f"Completed! Output saved at {output_path}")
        else:
            label.config(text="Processing failed.")
        progress['value'] = 100
        root.update_idletasks()

    start_button = tk.Button(root, text="Start", command=start_process)
    start_button.pack(pady=20)

    root.mainloop()

if __name__ == "__main__":
    run()



Extracted Text:  16.
Extracted Text:  6.6.
Extracted Text:  1330
Extracted Text:  6 30
Extracted Text:  1630
Extracted Text:  6 30
Extracted Text:  1500
Extracted Text:  830
Extracted Text:  1700
Extracted Text:  515. 5
Extracted Text:  14 00.
Extracted Text:  The first generation was the first ever ever ever ever ever ever ever ever ever ever ever ever ever
Extracted Text:  Furname that has been used to be a number of articles
Extracted Text:  11 00
Extracted Text:  1130
Extracted Text:  0g.
Extracted Text:  0g 30
Extracted Text:  100 0
Extracted Text:  1030
Extracted Text:  11 00
Extracted Text:  1 1 30
Extracted Text:  8 830
Extracted Text:  9 00
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  1961 62
Extracted Text:  1 430
Extracted Text:  1 445
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  1961 62
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  1961 62
Extracted Text:  16.
Extracted T

In [15]:
import os
import tkinter as tk
from tkinter import ttk, messagebox
from openpyxl import load_workbook
import fitz
from PIL import Image, ImageEnhance
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import re
import time

def process_time_text(text):
    normalized_text = re.sub(r'[ \-%,.]', ':', text)
    #normalized_text = re.sub(r'g', '9', normalized_text)
    normalized_text = re.sub(r'[^\d:]', '', normalized_text)
    if normalized_text == "00" or normalized_text == "0:0":
        return ""
    if re.match(r'^\d{1,2}:$', normalized_text):
        normalized_text = normalized_text.zfill(3) + "00"
    digits_only = re.sub(r'[^\d]', '', normalized_text)
    if len(digits_only) == 5:
        normalized_text = f"0{digits_only[1:3]}:{digits_only[3:5]}"
    elif len(digits_only) == 4:
        normalized_text = f"{digits_only[:2]}:{digits_only[2:4]}"
    elif len(digits_only) == 3:
        normalized_text = f"0{digits_only[0]}:{digits_only[1:3]}"
    if re.match(r'^\d{1,2}:\d{2}$', normalized_text):
        parts = normalized_text.split(':')
        hour = parts[0].zfill(2)
        minute = parts[1]
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    parts = re.findall(r'\d+', normalized_text)
    if len(parts) >= 2:
        hour = parts[0].zfill(2)
        minute = parts[1]
        if len(minute) == 1:
            minute = minute + '0'
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    return text.strip()

def recognize_number_in_bbox(pdf_path, bbox_rect, meaning, processor, model):
    pdf_document = fitz.open(pdf_path)
    page = pdf_document.load_page(0)
    rect = fitz.Rect(*bbox_rect)
    page.set_cropbox(rect)
    pix = page.get_pixmap()
    img = Image.open(io.BytesIO(pix.tobytes())).convert("RGB")
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(2)
    pixel_values = processor(images=img, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values, max_new_tokens=20)
    extract_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    extract_text = re.sub(r'g', '9', extract_text) # Replace 'g' with '9' here
    pdf_document.close()
    if len(extract_text) > 6 or not re.search(r'\d', extract_text):
        return ""
    if meaning != "kalenderwoche":
        extract_text = process_time_text(extract_text)
    return extract_text

def fill_excel_from_dict(path, data_dict, progress_callback):
    workbook = load_workbook(path)
    sheet = workbook["01"]
    year = sheet['D3'].value
    kalenderwoche = int(data_dict.get("kalenderwoche", "").strip('.'))
    month = get_month_from_week(year, kalenderwoche)
    sheet_number = {
        "January": "01",
        "February": "02",
        "March": "03",
        "April": "04",
        "May": "05",
        "June": "06",
        "July": "07",
        "August": "08",
        "September": "09",
        "October": "10",
        "November": "11",
        "December": "12"
    }.get(month, "04")
    sheet_name = f"{sheet_number}"
    sheet = workbook[sheet_name]
    target_row = None
    for row in sheet.iter_rows(min_col=1, max_col=1):
        if row[0].value == kalenderwoche:
            target_row = row[0].row
            break
    if target_row is None:
        messagebox.showerror("Error", f"Kalenderwoche {kalenderwoche} cannot be found in column A.")
        return
    base_row = target_row
    red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
    yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
    columns = {
        "start": "D",
        "ende": "K",
        "pause_1_start": "E",
        "pause_1_ende": "F",
        "pause_2_start": "G",
        "pause_2_ende": "H",
    }
    days = ["montag", "dienstag", "mittwoch", "donnerstag", "freitag", "samstag", "sonntag"]
    for i, day in enumerate(days):
        row = base_row + i
        for key_suffix, col in columns.items():
            key = f"{key_suffix}_{day}"
            time_value = data_dict.get(key, "")
            cell = sheet[f"{col}{row}"]
            if time_value or time_value == "":
                fill_color = red_fill if time_value and not is_valid_time(time_value) else yellow_fill
                for merged_range in sheet.merged_cells.ranges:
                    if cell.coordinate in merged_range:
                        top_left_cell = sheet.cell(row=merged_range.min_row, column=merged_range.min_col)
                        top_left_cell.value = time_value
                        top_left_cell.fill = fill_color
                        break
                    else:
                        cell.value = time_value
                        cell.fill = fill_color
        progress_callback(i + 1, len(days))
    output_path = path.replace(".xlsm", "_modified.xlsx")
    workbook.save(output_path)
    return output_path

def load_model(progress_callback, label, root):
    label.config(text="Model für Handschrifterkennung wird geladen...")
    root.update_idletasks()  # Update the UI
    time.sleep(1)
    
    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
    
    progress_callback(1, 3)  # Move the progress bar to the first stage
    return processor, model

def process_files(progress_callback, processor, model, root, label):
    folder_path = os.getcwd()
    pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
    if not pdf_files:
        messagebox.showinfo("Keine PDFs gefunden", "Keine PDF-Dateien im Ordner gefunden.")
        return
    pdf_path = os.path.join(folder_path, pdf_files[0])
    data_dict = {}  # Implement your logic to populate this dictionary
    bounding_boxes = {
        "kalenderwoche": (450, 700, 700, 770),
        "start_montag": (510, 1160, 730, 1250) ,
        "ende_montag": (510, 1270, 730, 1370),
        "start_dienstag": (510, 1380, 730, 1480),
        "ende_dienstag": (510, 1490, 730, 1580), 
        "start_mittwoch": (510, 1600, 730, 1690),
        "ende_mittwoch": (510, 1710, 730, 1800), 
        "start_donnerstag": (510, 1815, 730, 1915),
        "ende_donnerstag": (510, 1925, 730, 2020), 
        "start_freitag": (510, 2038, 730, 2130),
        "ende_freitag": (510, 2145, 730, 2240),
        "start_samstag": (510, 2260, 730, 2350),
        "ende_samstag":(510, 2365, 730, 2465),

        "pause_1_start_montag": (810, 1160, 980, 1250),
        "pause_1_ende_montag": (810, 1270, 980, 1370),
        "pause_1_start_dienstag": (810, 1380, 980, 1480),
        "pause_1_ende_dienstag": (810, 1490, 980, 1580),
        "pause_1_start_mittwoch": (810, 1600, 980, 1690),
        "pause_1_ende_mittwoch": (810, 1710, 980, 1800),
        "pause_1_start_donnerstag": (810, 1815, 980, 1915),
        "pause_1_ende_donnerstag": (810, 1925, 980, 2020),
        "pause_1_start_freitag": (810, 2038, 980, 2130),
        "pause_1_ende_freitag": (810, 2145, 980, 2240),
        "pause_1_start_samstag": (810, 2260, 980, 2350),
        "pause_1_ende_samstag": (810, 2365, 980, 2465),

        "pause_2_start_montag": (1055, 1160, 1210, 1250),
        "pause_2_ende_montag": (1055, 1270, 1210, 1370),
        "pause_2_start_dienstag": (1055, 1380, 1210, 1480),
        "pause_2_ende_dienstag": (1055, 1490, 1210, 1580),
        "pause_2_start_mittwoch": (1055, 1600, 1210, 1690),
        "pause_2_ende_mittwoch": (1055, 1710, 1210, 1800),
        "pause_2_start_donnerstag": (1055, 1815, 1210, 1915),
        "pause_2_ende_donnerstag": (1055, 1925, 1210, 2020),
        "pause_2_start_freitag": (1055, 2038, 1210, 2130),
        "pause_2_ende_freitag": (1055, 2145, 1210, 2240),
        "pause_2_start_samstag": (1055, 2260, 1210, 2350),
        "pause_2_ende_samstag": (1055, 2365, 1210, 2465)
    }
    for meaning, bbox in bounding_boxes.items():
        recognized_number = recognize_number_in_bbox(pdf_path, bbox, meaning, processor, model)
        data_dict[meaning] = recognized_number
    excel_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsm')]
    if not excel_files:
        messagebox.showinfo("Keine Excel-Dateien gefunden", "Keine Excel-Dateien im Ordner gefunden.")
        return
    excel_path = os.path.join(folder_path, excel_files[0])
    output_path = fill_excel_from_dict(excel_path, data_dict, progress_callback)
    return output_path

def start_process(progress_callback, label, root):
    try:
        # Disable the start button to prevent re-clicks during processing
        start_button.config(state=tk.DISABLED)
        
        # Update the UI to indicate that the model is being loaded
        processor, model = load_model(progress_callback, label, root)

        # Once the model is loaded, proceed with processing
        label.config(text="PDF und Excel-Dateien werden verarbeitet...")
        root.update_idletasks()  # Force UI update
        
        output_path = process_files(progress_callback, processor, model, root, label)
        if output_path:
            label.config(text=f"Fertig! Ausgabe gespeichert unter {output_path}")
        else:
            label.config(text="Verarbeitung fehlgeschlagen.")
    except Exception as e:
        label.config(text=f"Fehler: {str(e)}")
    finally:
        progress_callback(3, 3)  # Ensure progress bar is filled
        root.update_idletasks()  # Force UI update
        
        # Re-enable the start button after completion
        start_button.config(state=tk.NORMAL)
        
        # Close the application if you want it to automatically end
        root.quit()

def run():
    root = tk.Tk()
    root.title("PDF zu Excel Verarbeitung")
    root.geometry("400x200")

    label = tk.Label(root, text="PDF-Datei wird verarbeitet ...", font=("Arial", 12))
    label.pack(pady=10)

    progress = ttk.Progressbar(root, orient="horizontal", length=300, mode="determinate")
    progress.pack(pady=20)

    def progress_callback(current, total):
        progress['value'] = (current / total) * 100
        root.update_idletasks()

    start_button = tk.Button(root, text="Start", command=lambda: start_process(progress_callback, label, root))
    start_button.pack(pady=20)

    root.mainloop()

if __name__ == "__main__":
    run()

Exception in Tkinter callback
Traceback (most recent call last):
  File "/Users/sandradening/anaconda3/envs/Sandra_Coding/lib/python3.8/tkinter/__init__.py", line 1892, in __call__
    return self.func(*args)
  File "/var/folders/nd/xc3t3y5n57jcb763d6d9rksw0000gn/T/ipykernel_39411/2892898633.py", line 243, in <lambda>
    start_button = tk.Button(root, text="Start", command=lambda: start_process(progress_callback, label, root))
  File "/var/folders/nd/xc3t3y5n57jcb763d6d9rksw0000gn/T/ipykernel_39411/2892898633.py", line 223, in start_process
    start_button.config(state=tk.NORMAL)
NameError: name 'start_button' is not defined
Exception in Tkinter callback
Traceback (most recent call last):
  File "/Users/sandradening/anaconda3/envs/Sandra_Coding/lib/python3.8/tkinter/__init__.py", line 1892, in __call__
    return self.func(*args)
  File "/var/folders/nd/xc3t3y5n57jcb763d6d9rksw0000gn/T/ipykernel_39411/2892898633.py", line 243, in <lambda>
    start_button = tk.Button(root, text="Star

In [3]:
def process_time_text(text):
    normalized_text = re.sub(r'[ \-%,.]', ':', text)
    normalized_text = re.sub(r'[^\d:]', '', normalized_text)
    if normalized_text == "00" or normalized_text == "0:0":
        return ""
    if re.match(r'^\d{1,2}:$', normalized_text):
        normalized_text = normalized_text.zfill(3) + "00"
    digits_only = re.sub(r'[^\d]', '', normalized_text)
    if len(digits_only) == 5:
        normalized_text = f"0{digits_only[1:3]}:{digits_only[3:5]}"
    elif len(digits_only) == 4:
        normalized_text = f"{digits_only[:2]}:{digits_only[2:4]}"
    elif len(digits_only) == 3:
        normalized_text = f"0{digits_only[0]}:{digits_only[1:3]}"
    if re.match(r'^\d{1,2}:\d{2}$', normalized_text):
        parts = normalized_text.split(':')
        hour = parts[0].zfill(2)
        minute = parts[1]
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    parts = re.findall(r'\d+', normalized_text)
    if len(parts) >= 2:
        hour = parts[0].zfill(2)
        minute = parts[1]
        if len(minute) == 1:
            minute = minute + '0'
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    return text.strip()

def recognize_number_in_bbox(pdf_path, bbox_rect, meaning, processor, model):
    pdf_document = fitz.open(pdf_path)
    page = pdf_document.load_page(0)
    rect = fitz.Rect(*bbox_rect)
    page.set_cropbox(rect)
    pix = page.get_pixmap()
    img = Image.open(io.BytesIO(pix.tobytes())).convert("RGB")
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(2)
    pixel_values = processor(images=img, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values, max_new_tokens=20)
    extract_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    extract_text = re.sub(r'g', '9', extract_text) # Replace 'g' with '9' here
    pdf_document.close()
    if len(extract_text) > 6 or not re.search(r'\d', extract_text):
        return ""
    if meaning != "kalenderwoche":
        extract_text = process_time_text(extract_text)
    return extract_text

def fill_excel_from_dict(path, data_dict, progress_callback):
    workbook = load_workbook(path)
    sheet = workbook["01"]
    year = sheet['D3'].value
    kalenderwoche = int(data_dict.get("kalenderwoche", "").strip('.'))
    month = get_month_from_week(year, kalenderwoche)
    sheet_number = {
        "January": "01",
        "February": "02",
        "March": "03",
        "April": "04",
        "May": "05",
        "June": "06",
        "July": "07",
        "August": "08",
        "September": "09",
        "October": "10",
        "November": "11",
        "December": "12"
    }.get(month, "04")
    sheet_name = f"{sheet_number}"
    sheet = workbook[sheet_name]
    target_row = None
    for row in sheet.iter_rows(min_col=1, max_col=1):
        if row[0].value == kalenderwoche:
            target_row = row[0].row
            break
    if target_row is None:
        messagebox.showerror("Error", f"Kalenderwoche {kalenderwoche} kann nicht in Spalte A gefunden werden!")
        return
    base_row = target_row
    red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
    yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
    columns = {
        "start": "D",
        "ende": "K",
        "pause_1_start": "E",
        "pause_1_ende": "F",
        "pause_2_start": "G",
        "pause_2_ende": "H",
    }
    days = ["montag", "dienstag", "mittwoch", "donnerstag", "freitag", "samstag", "sonntag"]
    for i, day in enumerate(days):
        row = base_row + i
        for key_suffix, col in columns.items():
            key = f"{key_suffix}_{day}"
            time_value = data_dict.get(key, "")
            cell = sheet[f"{col}{row}"]
            if time_value or time_value == "":
                fill_color = red_fill if time_value and not is_valid_time(time_value) else yellow_fill
                for merged_range in sheet.merged_cells.ranges:
                    if cell.coordinate in merged_range:
                        top_left_cell = sheet.cell(row=merged_range.min_row, column=merged_range.min_col)
                        top_left_cell.value = time_value
                        top_left_cell.fill = fill_color
                        break
                    else:
                        cell.value = time_value
                        cell.fill = fill_color
        progress_callback(i + 1, len(days))
    output_path = path.replace(".xlsm", "_modified.xlsx")
    workbook.save(output_path)
    return output_path

def load_model(progress_callback, label, root):
    label.config(text="Model für Handschrifterkennung wird geladen...")
    root.update_idletasks()  # Update the UI
    time.sleep(1)
    
    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
    
    progress_callback(1, 3)  # Move the progress bar to the first stage
    return processor, model

def process_files(progress_callback, processor, model, root, label):
    folder_path = os.getcwd()
    pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
    if not pdf_files:
        messagebox.showinfo("Keine PDFs gefunden", "Keine PDF-Dateien im Ordner gefunden.")
        return
    pdf_path = os.path.join(folder_path, pdf_files[0])
    data_dict = {}  # Implement your logic to populate this dictionary
    bounding_boxes = {
        "kalenderwoche": (450, 700, 700, 770),
        "start_montag": (510, 1160, 730, 1250) ,
        "ende_montag": (510, 1270, 730, 1370),
        "start_dienstag": (510, 1380, 730, 1480),
        "ende_dienstag": (510, 1490, 730, 1580), 
        "start_mittwoch": (510, 1600, 730, 1690),
        "ende_mittwoch": (510, 1710, 730, 1800), 
        "start_donnerstag": (510, 1815, 730, 1915),
        "ende_donnerstag": (510, 1925, 730, 2020), 
        "start_freitag": (510, 2038, 730, 2130),
        "ende_freitag": (510, 2145, 730, 2240),
        "start_samstag": (510, 2260, 730, 2350),
        "ende_samstag":(510, 2365, 730, 2465),

        "pause_1_start_montag": (810, 1160, 980, 1250),
        "pause_1_ende_montag": (810, 1270, 980, 1370),
        "pause_1_start_dienstag": (810, 1380, 980, 1480),
        "pause_1_ende_dienstag": (810, 1490, 980, 1580),
        "pause_1_start_mittwoch": (810, 1600, 980, 1690),
        "pause_1_ende_mittwoch": (810, 1710, 980, 1800),
        "pause_1_start_donnerstag": (810, 1815, 980, 1915),
        "pause_1_ende_donnerstag": (810, 1925, 980, 2020),
        "pause_1_start_freitag": (810, 2038, 980, 2130),
        "pause_1_ende_freitag": (810, 2145, 980, 2240),
        "pause_1_start_samstag": (810, 2260, 980, 2350),
        "pause_1_ende_samstag": (810, 2365, 980, 2465),

        "pause_2_start_montag": (1055, 1160, 1210, 1250),
        "pause_2_ende_montag": (1055, 1270, 1210, 1370),
        "pause_2_start_dienstag": (1055, 1380, 1210, 1480),
        "pause_2_ende_dienstag": (1055, 1490, 1210, 1580),
        "pause_2_start_mittwoch": (1055, 1600, 1210, 1690),
        "pause_2_ende_mittwoch": (1055, 1710, 1210, 1800),
        "pause_2_start_donnerstag": (1055, 1815, 1210, 1915),
        "pause_2_ende_donnerstag": (1055, 1925, 1210, 2020),
        "pause_2_start_freitag": (1055, 2038, 1210, 2130),
        "pause_2_ende_freitag": (1055, 2145, 1210, 2240),
        "pause_2_start_samstag": (1055, 2260, 1210, 2350),
        "pause_2_ende_samstag": (1055, 2365, 1210, 2465)
    }
    for meaning, bbox in bounding_boxes.items():
        recognized_number = recognize_number_in_bbox(pdf_path, bbox, meaning, processor, model)
        data_dict[meaning] = recognized_number
    excel_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsm')]
    if not excel_files:
        messagebox.showinfo("Keine Excel-Dateien gefunden", "Keine Excel-Dateien im Ordner gefunden.")
        return
    excel_path = os.path.join(folder_path, excel_files[0])
    output_path = fill_excel_from_dict(excel_path, data_dict, progress_callback)
    return output_path

def start_process(progress_callback, label, root, start_button):
    try:
        # Disable the start button to prevent re-clicks during processing
        start_button.config(state=tk.DISABLED)
        
        # Update the UI to indicate that the model is being loaded
        processor, model = load_model(progress_callback, label, root)

        # Once the model is loaded, proceed with processing
        label.config(text="PDF-Datei wird verarbeitet ...")
        root.update_idletasks()  # Force UI update
        
        output_path = process_files(progress_callback, processor, model, root, label)
        if output_path:
            label.config(text=f"Fertig! Ausgabe gespeichert unter {output_path}")
        else:
            label.config(text="Verarbeitung fehlgeschlagen.")
    except Exception as e:
        label.config(text=f"Fehler: {str(e)}")
    finally:
        progress_callback(3, 3)  # Ensure progress bar is filled
        root.update_idletasks()  # Force UI update
        
        # Re-enable the start button after completion
        start_button.config(state=tk.NORMAL)
        
        # Close the application if you want it to automatically end
        root.destroy()

def run():
    root = tk.Tk()
    root.title("PDF Handschrifterkennung - Excel")
    root.geometry("400x200")

    label = tk.Label(root, text="Klicke ‘Start‘ um die PDF Datei zu verarbeiten.", font=("Arial", 12))
    label.pack(pady=10)

    progress = ttk.Progressbar(root, orient="horizontal", length=300, mode="determinate")
    progress.pack(pady=20)

    def progress_callback(current, total):
        progress['value'] = (current / total) * 100
        root.update_idletasks()

    start_button = tk.Button(root, text="Start", command=lambda: start_process(progress_callback, label, root, start_button))
    start_button.pack(pady=20)

    root.mainloop()

if __name__ == "__main__":
    run()

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
def process_time_text(text):
    normalized_text = re.sub(r'[ \-%,.]', ':', text)
    normalized_text = re.sub(r'[^\d:]', '', normalized_text)
    if normalized_text == "00" or normalized_text == "0:0":
        return ""
    if re.match(r'^\d{1,2}:$', normalized_text):
        normalized_text = normalized_text.zfill(3) + "00"
    digits_only = re.sub(r'[^\d]', '', normalized_text)
    if len(digits_only) == 5:
        normalized_text = f"0{digits_only[1:3]}:{digits_only[3:5]}"
    elif len(digits_only) == 4:
        normalized_text = f"{digits_only[:2]}:{digits_only[2:4]}"
    elif len(digits_only) == 3:
        normalized_text = f"0{digits_only[0]}:{digits_only[1:3]}"
    if re.match(r'^\d{1,2}:\d{2}$', normalized_text):
        parts = normalized_text.split(':')
        hour = parts[0].zfill(2)
        minute = parts[1]
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    parts = re.findall(r'\d+', normalized_text)
    if len(parts) >= 2:
        hour = parts[0].zfill(2)
        minute = parts[1]
        if len(minute) == 1:
            minute = minute + '0'
        if minute == "60":
            minute = "00"
        return f"{hour}:{minute}"
    return text.strip()

def recognize_number_in_bbox(pdf_path, bbox_rect, meaning, processor, model):
    pdf_document = fitz.open(pdf_path)
    page = pdf_document.load_page(0)
    rect = fitz.Rect(*bbox_rect)
    page.set_cropbox(rect)
    pix = page.get_pixmap()
    img = Image.open(io.BytesIO(pix.tobytes())).convert("RGB")
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(2)
    pixel_values = processor(images=img, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values, max_new_tokens=20)
    extract_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    extract_text = re.sub(r'g', '9', extract_text) # Replace 'g' with '9' here
    pdf_document.close()
    if len(extract_text) > 6 or not re.search(r'\d', extract_text):
        return ""
    if meaning != "kalenderwoche":
        extract_text = process_time_text(extract_text)
    return extract_text

def fill_excel_from_dict(path, data_dict):
    workbook = load_workbook(path)
    sheet = workbook["01"]
    year = sheet['D3'].value
    kalenderwoche = int(data_dict.get("kalenderwoche", "").strip('.'))
    month = get_month_from_week(year, kalenderwoche)
    sheet_number = {
        "January": "01",
        "February": "02",
        "March": "03",
        "April": "04",
        "May": "05",
        "June": "06",
        "July": "07",
        "August": "08",
        "September": "09",
        "October": "10",
        "November": "11",
        "December": "12"
    }.get(month, "04")
    sheet_name = f"{sheet_number}"
    sheet = workbook[sheet_name]
    target_row = None
    for row in sheet.iter_rows(min_col=1, max_col=1):
        if row[0].value == kalenderwoche:
            target_row = row[0].row
            break
    if target_row is None:
        messagebox.showerror("Error", f"Kalenderwoche {kalenderwoche} cannot be found in column A.")
        return
    base_row = target_row
    red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
    yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
    columns = {
        "start": "D",
        "ende": "K",
        "pause_1_start": "E",
        "pause_1_ende": "F",
        "pause_2_start": "G",
        "pause_2_ende": "H",
    }
    days = ["montag", "dienstag", "mittwoch", "donnerstag", "freitag", "samstag", "sonntag"]
    for i, day in enumerate(days):
        row = base_row + i
        for key_suffix, col in columns.items():
            key = f"{key_suffix}_{day}"
            time_value = data_dict.get(key, "")
            cell = sheet[f"{col}{row}"]
            if time_value or time_value == "":
                fill_color = red_fill if time_value and not is_valid_time(time_value) else yellow_fill
                for merged_range in sheet.merged_cells.ranges:
                    if cell.coordinate in merged_range:
                        top_left_cell = sheet.cell(row=merged_range.min_row, column=merged_range.min_col)
                        top_left_cell.value = time_value
                        top_left_cell.fill = fill_color
                        break
                    else:
                        cell.value = time_value
                        cell.fill = fill_color
    output_path = path.replace(".xlsm", "_modified.xlsx")
    workbook.save(output_path)
    return output_path

def load_model(label, root):
    label.config(text="Model für Handschrifterkennung wird geladen...")
    root.update_idletasks()  # Update the UI
    time.sleep(1)
    
    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
    
    return processor, model

def process_files(processor, model, root, label):
    folder_path = os.getcwd()
    pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
    if not pdf_files:
        messagebox.showinfo("Keine PDFs gefunden", "Keine PDF-Dateien im Ordner gefunden.")
        return
    pdf_path = os.path.join(folder_path, pdf_files[0])
    data_dict = {}  # Implement your logic to populate this dictionary
    bounding_boxes = {
        "kalenderwoche": (450, 700, 700, 770),
        "start_montag": (510, 1160, 730, 1250) ,
        "ende_montag": (510, 1270, 730, 1370),
        "start_dienstag": (510, 1380, 730, 1480),
        "ende_dienstag": (510, 1490, 730, 1580), 
        "start_mittwoch": (510, 1600, 730, 1690),
        "ende_mittwoch": (510, 1710, 730, 1800), 
        "start_donnerstag": (510, 1815, 730, 1915),
        "ende_donnerstag": (510, 1925, 730, 2020), 
        "start_freitag": (510, 2038, 730, 2130),
        "ende_freitag": (510, 2145, 730, 2240),
        "start_samstag": (510, 2260, 730, 2350),
        "ende_samstag":(510, 2365, 730, 2465),

        "pause_1_start_montag": (810, 1160, 980, 1250),
        "pause_1_ende_montag": (810, 1270, 980, 1370),
        "pause_1_start_dienstag": (810, 1380, 980, 1480),
        "pause_1_ende_dienstag": (810, 1490, 980, 1580),
        "pause_1_start_mittwoch": (810, 1600, 980, 1690),
        "pause_1_ende_mittwoch": (810, 1710, 980, 1800),
        "pause_1_start_donnerstag": (810, 1815, 980, 1915),
        "pause_1_ende_donnerstag": (810, 1925, 980, 2020),
        "pause_1_start_freitag": (810, 2038, 980, 2130),
        "pause_1_ende_freitag": (810, 2145, 980, 2240),
        "pause_1_start_samstag": (810, 2260, 980, 2350),
        "pause_1_ende_samstag": (810, 2365, 980, 2465),

        "pause_2_start_montag": (1055, 1160, 1210, 1250),
        "pause_2_ende_montag": (1055, 1270, 1210, 1370),
        "pause_2_start_dienstag": (1055, 1380, 1210, 1480),
        "pause_2_ende_dienstag": (1055, 1490, 1210, 1580),
        "pause_2_start_mittwoch": (1055, 1600, 1210, 1690),
        "pause_2_ende_mittwoch": (1055, 1710, 1210, 1800),
        "pause_2_start_donnerstag": (1055, 1815, 1210, 1915),
        "pause_2_ende_donnerstag": (1055, 1925, 1210, 2020),
        "pause_2_start_freitag": (1055, 2038, 1210, 2130),
        "pause_2_ende_freitag": (1055, 2145, 1210, 2240),
        "pause_2_start_samstag": (1055, 2260, 1210, 2350),
        "pause_2_ende_samstag": (1055, 2365, 1210, 2465)
    }
    for meaning, bbox in bounding_boxes.items():
        recognized_number = recognize_number_in_bbox(pdf_path, bbox, meaning, processor, model)
        data_dict[meaning] = recognized_number
    excel_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsm')]
    
    if not excel_files:
        messagebox.showinfo("Keine Excel-Dateien gefunden", "Keine Excel-Dateien im Ordner gefunden.")
        return
    
    excel_path = os.path.join(folder_path, excel_files[0])
    output_path = fill_excel_from_dict(excel_path, data_dict)
    
    return output_path

def start_process(label, root, start_button):
    try:
        # Deactivate the start button to prevent re-clicks
        start_button.config(state=tk.DISABLED)
        
        # Update the UI to indicate that the model is being loaded
        processor, model = load_model(label, root)

        # Once the model is loaded, proceed with processing
        label.config(text="PDF und Excel-Dateien werden verarbeitet...")
        root.update_idletasks()  # Force UI update
        
        output_path = process_files(processor, model, root, label)
        
        label.config(text=f"Fertig! Neue Excel ist gespeichert.")
        root.update()  # Ensure all pending updates are processed

        # Close the application immediately after saving the Excel file
        root.after(1000, root.destroy)  # Close the window after 1 second delay to allow updates
    except Exception as e:
        label.config(text=f"Fehler: {str(e)}")
        root.update()  # Ensure error message is shown
        root.after(1000, root.destroy) 


def run():
    root = tk.Tk()
    root.title("PDF zu Excel Verarbeitung")
    root.geometry("400x150")

    label = tk.Label(root, text="Klicke ‘Start‘ um die PDF Datei zu verarbeiten.", font=("Arial", 12))
    label.pack(pady=10)

    start_button = tk.Button(root, text="Start", command=lambda: start_process(label, root, start_button))
    start_button.pack(pady=20)

    root.mainloop()  # Start the Tkinter event loop


if __name__ == "__main__":
    run()

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Backup

In [4]:
pdf_path = 'Beispiel_Zeiten_processed.pdf'
bounding_boxes = {
    "kalenderwoche": (450, 700, 700, 770),
    "start_montag": (510, 1160, 730, 1250) ,
    "ende_montag": (510, 1270, 730, 1370),
    "start_dienstag": (510, 1380, 730, 1480),
    "ende_dienstag": (510, 1490, 730, 1580), 
    "start_mittwoch": (510, 1600, 730, 1690),
    "ende_mittwoch": (510, 1710, 730, 1800), 
    "start_donnerstag": (510, 1815, 730, 1915),
    "ende_donnerstag": (510, 1925, 730, 2020), 
    "start_freitag": (510, 2038, 730, 2130),
    "ende_freitag": (510, 2145, 730, 2240),
    "start_samstag": (510, 2260, 730, 2350),
    "ende_samstag":(510, 2365, 730, 2465),

    "pause_1_start_montag": (810, 1160, 980, 1250),
    "pause_1_ende_montag": (810, 1270, 980, 1370),
    "pause_1_start_dienstag": (810, 1380, 980, 1480),
    "pause_1_ende_dienstag": (810, 1490, 980, 1580),
    "pause_1_start_mittwoch": (810, 1600, 980, 1690),
    "pause_1_ende_mittwoch": (810, 1710, 980, 1800),
    "pause_1_start_donnerstag": (810, 1815, 980, 1915),
    "pause_1_ende_donnerstag": (810, 1925, 980, 2020),
    "pause_1_start_freitag": (810, 2038, 980, 2130),
    "pause_1_ende_freitag": (810, 2145, 980, 2240),
    "pause_1_start_samstag": (810, 2260, 980, 2350),
    "pause_1_ende_samstag": (810, 2365, 980, 2465),

    "pause_2_start_montag": (1055, 1160, 1210, 1250),
    "pause_2_ende_montag": (1055, 1270, 1210, 1370),
    "pause_2_start_dienstag": (1055, 1380, 1210, 1480),
    "pause_2_ende_dienstag": (1055, 1490, 1210, 1580),
    "pause_2_start_mittwoch": (1055, 1600, 1210, 1690),
    "pause_2_ende_mittwoch": (1055, 1710, 1210, 1800),
    "pause_2_start_donnerstag": (1055, 1815, 1210, 1915),
    "pause_2_ende_donnerstag": (1055, 1925, 1210, 2020),
    "pause_2_start_freitag": (1055, 2038, 1210, 2130),
    "pause_2_ende_freitag": (1055, 2145, 1210, 2240),
    "pause_2_start_samstag": (1055, 2260, 1210, 2350),
    "pause_2_ende_samstag": (1055, 2365, 1210, 2465)
}



# Dictionary to store the recognized numbers
recognized_numbers = {}

# Loop through the bounding boxes and recognize the numbers
for meaning, bbox in bounding_boxes.items():
    recognized_number = recognize_number_in_bbox(pdf_path, bbox, meaning)
    recognized_numbers[meaning] = recognized_number

# Print the recognized numbers with their meanings
for meaning, number in recognized_numbers.items():
    print(f"{meaning}: {number}")



Extracted Text:  16.
Extracted Text:  6.6.
Extracted Text:  1330
Extracted Text:  6 30
Extracted Text:  1630
Extracted Text:  6 30
Extracted Text:  1500
Extracted Text:  830
Extracted Text:  1700
Extracted Text:  515. 5
Extracted Text:  14 00.
Extracted Text:  The first generation was the first ever ever ever ever ever ever ever ever ever ever ever ever ever
Extracted Text:  Furname that has been used to be a number of articles
Extracted Text:  11 00
Extracted Text:  1130
Extracted Text:  0g.
Extracted Text:  0g 30
Extracted Text:  100 0
Extracted Text:  1030
Extracted Text:  11 00
Extracted Text:  1 1 30
Extracted Text:  8 830
Extracted Text:  9 00
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  1961 62
Extracted Text:  1 430
Extracted Text:  1 445
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  1961 62
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  0 0
Extracted Text:  1961 62
kalenderwoche: 16.
start_montag: