In [None]:
# Main pipeline
from ocr import extract_text_from_image,ocr_results_to_dataframe, filter_by_score, cluster_polygons, add_cluster_column, bounding_boxes_by_cluster_with_text
from img_tools import get_image_size, save_crops_from_coords, load_image_as_numpy,create_and_save_solid_image,average_grayscale
from traduction import ollama_translate_en_fr, translate_cluster_texts
from tools import clean_folder, launch_exe, natural_sort_key
import os

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
clean_folder("../outputs/ocr_outputs")
clean_folder("../outputs/text_remove_outputs")
clean_folder("../outputs/text_drawn_outputs")

Le dossier ../outputs/ocr_outputs a été nettoyé avec succès.
Le dossier ../outputs/text_remove_outputs a été nettoyé avec succès.
Le dossier ../outputs/text_drawn_outputs a été nettoyé avec succès.


In [None]:
def draw_centered_text(image_path, text, font_path, font_size, output_path, margin=10, min_font_size=10, fill_color=(0,0,0)):
    """
    Écrit du texte centré sur une image, avec retour à la ligne automatique.
    Ajuste automatiquement la taille de la police si le texte est trop grand.
    
    Parameters:
        fill_color : tuple RGB ou str, couleur du texte (ex: (255,0,0) ou "red")
    """
    img = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(img)

    font = ImageFont.truetype(font_path, font_size)
    max_width = img.width - 2 * margin
    max_height = img.height - 2 * margin

    # Fonction pour découper le texte en lignes
    def split_text_lines(font):
        lines = []
        for line in text.split('\n'):
            words = line.split()
            current_line = ""
            for word in words:
                test_line = current_line + (" " if current_line else "") + word
                bbox = draw.textbbox((0, 0), test_line, font=font)
                w = bbox[2] - bbox[0]
                if w <= max_width:
                    current_line = test_line
                else:
                    if current_line:
                        lines.append(current_line)
                    current_line = word
            if current_line:
                lines.append(current_line)
        return lines

    # Réduire la police si le texte est trop grand
    while font_size >= min_font_size:
        font = ImageFont.truetype(font_path, font_size)
        lines = split_text_lines(font)
        bbox = draw.textbbox((0, 0), "Ay", font=font)
        line_height = (bbox[3] - bbox[1]) + 5
        total_text_height = line_height * len(lines)

        if total_text_height <= max_height:
            break
        font_size -= 1

    # Dessiner le texte centré
    y_text = (img.height - total_text_height) // 2
    for line in lines:
        bbox = draw.textbbox((0, 0), line, font=font)
        w = bbox[2] - bbox[0]
        x_text = (img.width - w) // 2
        draw.text((x_text, y_text), line, font=font, fill=fill_color)
        y_text += line_height

    img.save(output_path)
    print(f"Image sauvegardée : {output_path}")

In [24]:
# Step 1: Extract text from image
img_np, factor = load_image_as_numpy("../notebooks/ch_0_2.jpg", None)
result = extract_text_from_image(img_np)

# Step 2: Convert OCR results to DataFrame
df = ocr_results_to_dataframe(result)  
df=df.iloc[:1,:]

# Step 3: Filter DataFrame by score
filtered_df = filter_by_score(df, min_score=0.7)

# Step 4 : Cluster the polygons
clusters = cluster_polygons(filtered_df, "x1","y1","x2","y2","x3","y3","x4","y4", margin_factor=0.1)

# Step 5 : Add cluster information to the DataFrame
clustered_df = add_cluster_column(filtered_df, clusters)
print(clustered_df)

# Step 6 : Get bounding boxes for each cluster
df_boxes = bounding_boxes_by_cluster_with_text(clustered_df)

# Step 7 : Save crops from bounding boxes
ocr_outputs_path = "../outputs/ocr_outputs"
save_crops_from_coords(img_np, df_boxes[["x_min", "y_min", "x_max", "y_max"]].values, ocr_outputs_path,1)

# Step 8 : Remove text from img
text_remove_path = "../outputs/text_remove_outputs"
for filename in os.listdir(ocr_outputs_path):
    img_path = os.path.join(ocr_outputs_path, filename)
    size = get_image_size(img_path)
    if average_grayscale(img_path) > 255/2:
        create_and_save_solid_image(size[0], size[1], color=(255, 255, 255), save_path=os.path.join(text_remove_path, filename))
    else:
        create_and_save_solid_image(size[0], size[1], color=(0, 0, 0), save_path=os.path.join(text_remove_path, filename))

# Step 9 : Translate the text in each cluster gemma3n:e2b gemma3:12b
df_translated = translate_cluster_texts(df_boxes, ollama_translate_en_fr, context="Translating dialogues from a webtoon", model="gemma3:12b")
df_translated['translated_upper'] = df_translated['translated'].str.upper()
print(df_translated)
    
# Step 10 : Write translation on img
text_drawn_outputs = "../outputs/text_drawn_outputs"
files = sorted(os.listdir(text_remove_path), key=natural_sort_key)
for i, filename in enumerate(files):
    img_path = os.path.join(text_remove_path, filename)
    text = df_translated["translated"][i]
    out_path = os.path.join(text_drawn_outputs, filename)

    if average_grayscale(img_path) > 255/2:
        fill_color=(0, 0, 0)
    else:
        fill_color=(255, 255, 255)

    draw_centered_text(
        image_path=img_path,
        text=text,
        font_path="../inputs/fonts/Komika Text-FontZillion/Fonts/komtxtb_.ttf",
        output_path=out_path,
        fill_color=fill_color
    )


[32mCreating model: ('PP-OCRv5_server_det', None)[0m
[32mUsing official model (PP-OCRv5_server_det), the model files will be automatically downloaded and saved in C:\Users\teo\.paddlex\official_models.[0m
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 2413.29it/s]
[32mCreating model: ('latin_PP-OCRv5_mobile_rec', None)[0m
[32mUsing official model (latin_PP-OCRv5_mobile_rec), the model files will be automatically downloaded and saved in C:\Users\teo\.paddlex\official_models.[0m
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 1927.38it/s]
[33mResized image size (15383x720) exceeds max_side_limit of 4000. Resizing to fit within limit.[0m


    text     score   x1   y1   x2   y2   x3   y3   x4   y4  cluster
0  MURIM  0.998331  131  576  605  586  601  797  126  787        0
Cluster 0 sauvegardé : ../outputs/ocr_outputs/cluster_0.png
✅ Image sauvegardée ici : ../outputs/text_remove_outputs\cluster_0.png

Cluster 0 original text:
MURIM

Cluster 0 translated:
MURIM

   cluster  x_min  y_min  x_max  y_max   text translated translated_upper
0        0    126    576    605    797  MURIM      MURIM            MURIM
Image sauvegardée : ../outputs/text_drawn_outputs\cluster_0.png


   cluster  x_min  y_min  x_max  y_max   text translated translated_upper
0        0    126    576    605    797  MURIM      MURIM            MURIM
