# Requirements

This project uses several Python modules to handle tasks such as image processing, data manipulation, deep learning, and visualization. These dependencies can be installed using pip if they are not already available in your environment with the Following formula :

In [None]:
!pip install opencv-python pandas pillow torch torchvision matplotlib numpy

# Meaninful areas of the captchas
The input data consists of CAPTCHAs saved as PNG files. These CAPTCHAs have dimensions of 340 x 410 pixels, but not all pixels are relevant for our solver. Only the central field and the two drawings in the top-right corners are essential for solving.  

This section of the notebook focuses on isolating these meaningful areas and adjusting the labels accordingly.  

This cell is splitting the captchas and extracs the field (where the user has to click for solving the captcha) and the 2 draws

In [None]:
import os
import cv2

def crop_and_save_images(input_folder, output_folder,top_pixels, bot_pixels, left_pixels, right_pixels):
    """
    Crops images in the input_folder according to predefined pixel boundaries
    and saves them to the output_folder with the same filenames.

    Args:
        input_folder (str): Path to the folder containing the original images.
        output_folder (str): Path where the cropped images will be saved.
    """
 
    # Create output folder if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Process each image in the input folder
    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Read the image
        image = cv2.imread(input_path)
        if image is None:
            print(f"Skipping {filename} (could not load image)")
            continue

        # Apply cropping
        cropped_image = image[bot_pixels:top_pixels, left_pixels:right_pixels]

        # Save the cropped image to the new folder
        cv2.imwrite(output_path, cropped_image)



# Ectract interesting zone
def isolate_information_zones(captchas_folder = "your_captchas/captchas", field_folder = "your_captchas/field", draw1_folder = "your_captchas/draw1", draw2_folder = "your_captchas/draw2"):
    #Extract the field where the user have to click
    print("Extracting fields...")
    
    top_pixels,bot_pixels,left_pixels,right_pixels = 310,100,65,275 # Field coordonates
    crop_and_save_images(captchas_folder,field_folder,top_pixels,bot_pixels,left_pixels,right_pixels) #Field
    print("Field extracted")

    #Extract the first draw (left one)
    print("Extracting draws...")
    
    top_pixels,bot_pixels,left_pixels,right_pixels = 55,5,185,235 # Draw 1 coordonates
    crop_and_save_images(captchas_folder,draw1_folder,top_pixels,bot_pixels,left_pixels,right_pixels) #Draw 1
    print("Draw 1 extracted")

    #Extract the second draw (right one)
    top_pixels, bot_pixels, left_pixels, right_pixels = 55,5, 260, 310 # Draw 2 coordonates
    crop_and_save_images(captchas_folder,draw2_folder,top_pixels,bot_pixels,left_pixels,right_pixels) #Draw 2
    print("Draw 2 extracted")
    print("All images extracted")

#Example set
imgs_folder = "your_captchas/" 
captchas_folder = imgs_folder + "captchas"
field_folder = imgs_folder +"field"
draw1_folder = imgs_folder +"draw1"
draw2_folder = imgs_folder +"draw2"
isolate_information_zones(captchas_folder=captchas_folder, field_folder=field_folder,draw1_folder=draw1_folder,draw2_folder=draw2_folder)

This cell adapts the labels file to the truncated fields

In [None]:
import pandas as pd

def adjust_x_coordinates(label, left_pixels):
    """
    Adjusts the x-coordinate by subtracting a left offset.

    Args:
        label (float): The original x-coordinate.
        left_pixels (int): The number of pixels to subtract from the x-coordinate.

    Returns:
        float: The adjusted x-coordinate.
    """
    return label - left_pixels

def adjust_y_coordinates(label, bot_pixels):
    """
    Adjusts the y-coordinate by subtracting a bottom offset.

    Args:
        label (float): The original y-coordinate.
        bot_pixels (int): The number of pixels to subtract from the y-coordinate.

    Returns:
        float: The adjusted y-coordinate.
    """
    return label - bot_pixels

def truncate_labels(df, left_pixels=65, bot_pixels=100):
    """
    Adjusts the coordinate labels in a DataFrame by subtracting fixed offsets. 
    
    The DataFrame is expected to have the columns "x1", "y1", "x2", and "y2".
    
    Args:
        df (pd.DataFrame): DataFrame containing the labels.
        left_pixels (int, optional): Number of pixels to subtract from x-coordinates. Default is 65.
        bot_pixels (int, optional): Number of pixels to subtract from y-coordinates. Default is 100.
    
    Returns:
        pd.DataFrame: DataFrame with the adjusted labels.
    """
    # Ensure coordinate columns are numeric.
    df[['x1', 'y1', 'x2', 'y2']] = df[['x1', 'y1', 'x2', 'y2']].apply(pd.to_numeric)
    
    # Adjust the x-coordinates by subtracting left_pixels.
    df['x1'] = df['x1'].apply(lambda x: adjust_x_coordinates(x, left_pixels))
    df['x2'] = df['x2'].apply(lambda x: adjust_x_coordinates(x, left_pixels))
    
    # Adjust the y-coordinates by subtracting bot_pixels.
    df['y1'] = df['y1'].apply(lambda y: adjust_y_coordinates(y, bot_pixels))
    df['y2'] = df['y2'].apply(lambda y: adjust_y_coordinates(y, bot_pixels))
    
    return df

def process_labels_file(input_csv="labels.txt", output_csv="truncated_labels.csv", left_pixels=65, bot_pixels=100):
    """
    Processes a labels CSV file by adjusting the coordinates with fixed offsets and saves the result.
    
    This function reads the input CSV (which should contain columns "img_name", "x1", "y1", "x2", "y2"),
    adjusts the coordinate values by subtracting the provided offsets, and then writes the adjusted
    DataFrame to a new CSV file. The img_name column is expected to be ordered by lexicographic order.
    
    Args:
        input_csv (str): Path to the input labels CSV file.
        output_csv (str): Path where the adjusted labels CSV will be saved.
        left_pixels (int, optional): Number of pixels to subtract from x-coordinates. Default is 65.
        bot_pixels (int, optional): Number of pixels to subtract from y-coordinates. Default is 100.
    
    Returns:
        pd.DataFrame: The processed DataFrame containing adjusted labels.
    """
    # Load the labels CSV file.
    df = pd.read_csv(input_csv)
    
    # Adjust the labels using the truncate_labels function.
    df_adjusted = truncate_labels(df, left_pixels, bot_pixels)
    
    # Save the adjusted labels to the output CSV file.
    df_adjusted.to_csv(output_csv, index=False)
    
    return df_adjusted

labels_path = "labels.txt"
truncate_labels_path = "truncated_labels.csv"
processed_df = process_labels_file(labels_path,truncate_labels_path, left_pixels=65, bot_pixels=100) #The left and bot pixels are the ones used to isolate the field
print("Processed labels saved to truncated_labels.csv")


# Data Augmentation  

Due to the limited amount of data available, we will apply data augmentation techniques to expand our dataset. Our goal is to develop a model that, given a field and a drawing, can accurately determine the coordinates of the drawing within the field.  

To achieve this, we will rotate each drawing three times. Since each CAPTCHA contains two drawings, this process effectively increases the number of training samples by a factor of 8 compared to a naive training approach that simply uses full CAPTCHAs with all labels.  

When performing data augmentation, it is crucial to prevent data leakage between the evaluation and training sets. To ensure a fair evaluation, we will carefully separate the train and evaluation datasets in the following cells.  

The following cell is creating the train and test datasets

In [None]:
from PIL import Image
import os
import torch
import pandas as pd
from torchvision import transforms

###############################################
# Augmentation Functions
###############################################

def rotate_image(image_path, num_rotations=4, output_dir="augmented_images", rotate=True):
    """
    Rotates an image 90° num_rotations times and saves each rotated version.

    Args:
        image_path (str): Path to the original image.
        num_rotations (int): Number of 90° rotations to apply.
        output_dir (str): Directory to save the rotated images.
        rotate (bool): If True, perform rotation; otherwise, skip rotation.

    Returns:
        list: A list of file paths to the saved rotated images.
    """
    # Load the image using PIL
    img = Image.open(image_path)
    
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    rotated_image_paths = []
    # Rotate the image the specified number of times
    for i in range(num_rotations):
        # Rotate image only if rotate flag is True
        if rotate:
            # Rotate 90 degrees (clockwise) and expand image to avoid cropping
            img = img.rotate(90, expand=True)
        # Create a new filename for the rotated image.
        # Note: We create one filename per rotation, not in an inner loop.
        base_name = os.path.splitext(os.path.basename(image_path))[0]
        rotated_image_path = os.path.join(output_dir, f"{base_name}_rotated_{i+1}.png")
        # Save the rotated image
        img.save(rotated_image_path)
        rotated_image_paths.append(rotated_image_path)
    
    return rotated_image_paths

###############################################
# Dataset Generation Functions
###############################################

def count_png_images(folder_path):
    """
    Counts the number of PNG images in a given folder.

    Args:
        folder_path (str): Path to the folder.

    Returns:
        int: Number of PNG images in the folder.
    """
    if not os.path.isdir(folder_path):
        raise ValueError(f"Invalid folder path: {folder_path}")
    # Count files ending with '.png' (case insensitive)
    png_count = sum(1 for file in os.listdir(folder_path) if file.lower().endswith(".png"))
    return png_count

def get_treshold(folder_path, train_test_split=0.8):
    """
    Computes a threshold index based on the train_test_split ratio.
    This threshold is used to split the data into training and test subsets.

    Args:
        folder_path (str): Folder containing images.
        train_test_split (float): Fraction of data to be used for training.

    Returns:
        int: The threshold index.
    """
    # Subtract a tiny epsilon to avoid floating point rounding issues
    return int(train_test_split * count_png_images(folder_path) - 0.000001) + 1

def gen_augmented_images(image_folder, treshold, output_final_dir, output_dir="training/", rotate=True):
    """
    Generates augmented (rotated) images from the provided image folder.
    Splits the generated images into training and testing based on the threshold.

    Args:
        image_folder (str): Folder containing the original images.
        treshold (int): Number of images to include in the training set.
        output_final_dir (str): Subfolder name for saving the augmented images.
        output_dir (str): Base output directory (default is "training/").
        rotate (bool): Whether to apply rotation.
    """
    count = 0
    # Process images for the training set
    for filename in os.listdir(image_folder):
        if count >= treshold:
            break
        count += 1
        # Build full path and generate augmented images in train_set folder
        full_input_path = os.path.join(image_folder, filename)
        output_path = os.path.join(output_dir, "train_set", output_final_dir)
        rotate_image(full_input_path, output_dir=output_path, rotate=rotate)
    
    # Reset counter for test set processing
    count = 0
    # Process images for the test set (skip training images)
    for filename in os.listdir(image_folder):
        count += 1
        if count <= treshold:
            continue
        full_input_path = os.path.join(image_folder, filename)
        output_path = os.path.join(output_dir, "test_set", output_final_dir)
        rotate_image(full_input_path, output_dir=output_path, rotate=rotate)

def gen_datasets(imgs_folder='your_captchas/', train_test_split=0.8):
    """
    Generates augmented datasets for field images, draw1 images, and draw2 images.
    It uses a given train/test split to determine which images are used for training and testing.

    Args:
        imgs_folder (str): Folder containing subfolders 'field/', 'draw1/', and 'draw2/'.
        train_test_split (float): Fraction of data to be used for training.
    """
    # Process field images
    field_folder = os.path.join(imgs_folder, "field/")
    treshold = get_treshold(field_folder, train_test_split=train_test_split)
    gen_augmented_images(field_folder, treshold=treshold, output_final_dir="field/", rotate=False)

    # Process draw1 images (apply rotation)
    draw1_folder = os.path.join(imgs_folder, "draw1/")
    gen_augmented_images(draw1_folder, treshold=treshold, output_final_dir="rotated_draw1/", rotate=True)

    # Process draw2 images (apply rotation)
    draw2_folder = os.path.join(imgs_folder, "draw2/")
    gen_augmented_images(draw2_folder, treshold=treshold, output_final_dir="rotated_draw2/", rotate=True)

    print("Augmentation and dataset generation done!")

###############################################
# Main execution
###############################################

# Set your images folder and train-test split ratio
imgs_folder = 'your_captchas/'
train_test_split = 0.8

# Generate augmented datasets
gen_datasets(imgs_folder=imgs_folder, train_test_split=train_test_split)


This cell moves the augmented labels file to the appropriate folder in preparation for dataset creation in the `Training.ipynb` notebook.  

In [None]:
import pandas as pd 
import random

def augmentation(df: pd.DataFrame, noise_amplitude = 3, field_size = 210) -> pd.DataFrame:
    """
    Create augmented dataframe by adding the name of the rotated images and noise to the coordonates. 
    """

    augmented_data = []  # List to collect all rows for the new dataframe
    for _, row in df.iterrows():  # iterrows returns index and row (row is a pandas Series)
        for i in range(1, 5):  # Iterate through 1 to 4 for the rotations
            # Create the new img_name for each rotated image
           
            img_name = f'{row["img_name"][:-4]}_rotated_{i}.png'
            # Create a new row with the same coordinates but updated image name with noise
            new_row = {
                df.columns[0]: img_name,
                df.columns[1]: (row[df.columns[1]] + noise_amplitude*(1/2-random.random()))/field_size,
                df.columns[2]: (row[df.columns[2]] + noise_amplitude*(1/2-random.random()))/field_size,
                df.columns[3]: (row[df.columns[3]] + noise_amplitude*(1/2-random.random()))/field_size,
                df.columns[4]: (row[df.columns[4]] + noise_amplitude*(1/2-random.random()))/field_size
            }
            
            # Append the new row to the augmented_data list
            augmented_data.append(new_row)

    # Convert the list of new rows to a DataFrame
    augmented_df = pd.DataFrame(augmented_data)
    
    return augmented_df

def save_csv_for_training(df,draw=1,train_test_split=0.8):
    """
    Saves the csv containing the labels in the appropriate folder for the training and evaluating datasets

    Args:
        df (pd.Dataframe): Dataframe containing the augmented dataframe containing this columns ["img_name", "x1", "y1", "x2", "y2"]
        draw (int): 1 or 2 to chose the coordonate to remove
        train_test_split (float) : The proportion of images for the training
    """
    L = [1,2]
    df = df.drop([f"x{L[-draw]}",f"y{L[-draw]}"],axis=1)
    df.columns = ["img_name","x","y"]
    treshold = get_treshold(field_folder,train_test_split) * 4 # treshold * 4 because the augmented dataframe has 4 rows for each image
    df[treshold:].to_csv(f"training/test_set/rotated_draw{draw}/augmented_labels.csv", index= False)
    df[:treshold].to_csv(f"training/train_set/rotated_draw{draw}/augmented_labels.csv", index= False)


#Main pipeline with the csv file in entrance and saving the augmented csv file in output file
def labels_augmentation(truncated_labels_path = "truncated_labels.csv"):
    # Load the original dataframe
    df = pd.read_csv(truncated_labels_path)

    # Apply augmentation to the dataframe
    augmented_df = augmentation(df)

    # Save data and normalize for training
    save_csv_for_training(augmented_df,draw=1)
    save_csv_for_training(augmented_df,draw=2)

    # Optionally, save the new augmented dataframe to a CSV
    augmented_df.to_csv("training/augmented_labels.csv", index=False)
    print("Done ! Your labels should be between 0 and 1")
    return augmented_df

truncated_labels_path = "truncated_labels.csv"
labels_augmentation(truncated_labels_path)
