In [1]:
print(1)

1


In [None]:
import os
import cv2

def crop_and_save_images(input_folder, output_folder="field",top_pixels= 330, bot_pixels= 55, left_pixels= 20, right_pixels = 320):
    """
    Crops images in the input_folder according to predefined pixel boundaries
    and saves them to the output_folder with the same filenames.

    Args:
        input_folder (str): Path to the folder containing the original images.
        output_folder (str): Path where the cropped images will be saved.
    """
    # Define cropping boundaries
    

    # Create output folder if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Process each image in the input folder
    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Read the image
        image = cv2.imread(input_path)
        if image is None:
            print(f"Skipping {filename} (could not load image)")
            continue

        # Apply cropping
        cropped_image = image[bot_pixels:top_pixels, left_pixels:right_pixels]

        # Save the cropped image to the new folder
        cv2.imwrite(output_path, cropped_image)


# Ectract interesting zone
def isolate_information_zones(imgs_folder = "extracted_captchas/captchas_saved", field_folder = "extracted_captchas/field", draw1_folder = "extracted_captchas/draw1", draw2_folder = "extracted_captchas/draw2"):
    #Extract the field where the user have to click
    print("Extracting fields...")
    
    top_pixels,bot_pixels,left_pixels,right_pixels = 310,100,65,275 # Field coordonates
    crop_and_save_images(imgs_folder,field_folder,top_pixels,bot_pixels,left_pixels,right_pixels) #Field
    print("Field extracted")

    #Extract the first draw (left one)
    print("Extracting draws...")
    
    top_pixels,bot_pixels,left_pixels,right_pixels = 55,5,185,235 # Draw 1 coordonates
    crop_and_save_images(imgs_folder,draw1_folder,top_pixels,bot_pixels,left_pixels,right_pixels) #Draw 1
    print("Draw 1 extracted")

    #Extract the second draw (right one)
    top_pixels, bot_pixels, left_pixels, right_pixels = 55,5, 260, 310 # Draw 2 coordonates
    crop_and_save_images(imgs_folder,draw2_folder,top_pixels,bot_pixels,left_pixels,right_pixels) #Draw 2
    print("Draw 2 extracted")
    print("All images extracted")

isolate_information_zones()

Extracting fields...
Field extracted
Extracting draws...
Draw 1 extracted
Draw 2 extracted
All images extracted


In [6]:
import pandas as pd 
import random

def augmentation(df: pd.DataFrame, noise_amplitude = 3) -> pd.DataFrame:
    """
    Create augmented dataframe by adding the name of the rotated images and noise to the coordonates
    """

    augmented_data = []  # List to collect all rows for the new dataframe
    for _, row in df.iterrows():  # iterrows returns index and row (row is a pandas Series)
        for i in range(1, 5):  # Iterate through 1 to 4 for the rotations
            # Create the new img_name for each rotated image
           
            img_name = f'{row["img_name"][:-4]}_rotated_{i}.png'
            # Create a new row with the same coordinates but updated image name with noise
            new_row = {
                df.columns[0]: img_name,
                df.columns[1]: (row[df.columns[1]] + noise_amplitude*(1/2-random.random()))/210,
                df.columns[2]: (row[df.columns[2]] + noise_amplitude*(1/2-random.random()))/210,
                df.columns[3]: (row[df.columns[3]] + noise_amplitude*(1/2-random.random()))/210,
                df.columns[4]: (row[df.columns[4]] + noise_amplitude*(1/2-random.random()))/210
            }
            
            # Append the new row to the augmented_data list
            augmented_data.append(new_row)

    # Convert the list of new rows to a DataFrame
    augmented_df = pd.DataFrame(augmented_data)
    
    return augmented_df


#Main pipeline with the csv file in entrance and saving the augmented csv file in output file
def labels_augmentation(input_file = "truncated_labels.csv", output_file = "training/augmented_labels.csv"):
    # Load the original dataframe
    df = pd.read_csv(input_file)

    # Apply augmentation to the dataframe
    augmented_df = augmentation(df)

    # Optionally, save the new augmented dataframe to a CSV
    augmented_df.to_csv(output_file, index=False)

    return augmented_df

labels_augmentation()

Unnamed: 0,img_name,x1,y1,x2,y2
0,captcha_1_rotated_1.png,0.222547,0.692295,0.263160,0.093638
1,captcha_1_rotated_2.png,0.227678,0.697836,0.256823,0.090395
2,captcha_1_rotated_3.png,0.218199,0.703096,0.259892,0.082731
3,captcha_1_rotated_4.png,0.218552,0.690267,0.266175,0.089570
4,captcha_10_rotated_1.png,0.258491,0.689224,0.734855,0.324897
...,...,...,...,...,...
1259,captcha_98_rotated_4.png,0.219098,0.507578,0.836167,0.231861
1260,captcha_99_rotated_1.png,0.198716,0.897782,0.118977,0.561890
1261,captcha_99_rotated_2.png,0.205199,0.891504,0.112309,0.559925
1262,captcha_99_rotated_3.png,0.200418,0.886850,0.107908,0.553184


In [None]:
from PIL import Image
import os
#Creation of augmented data for dataset
def rotate_image(image_path, num_rotations=4, output_dir="augmented_images", rotate = True):
    """
    Rotates an image 90 degrees num_rotations times and saves the result.

    Args:
        image_path (str): Path to the original image.
        num_rotations (int): Number of 90 degree rotations to apply.
        output_dir (str): Directory to save the rotated images. Default is "augmented_images".
    
    Returns:
        list: A list of file paths to the saved rotated images.
    """
    # Load the image
    img = Image.open(image_path)
    # Prepare the output directory
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Rotate the image the specified number of times
    for i in range(num_rotations):
        if rotate:
            img = img.rotate(90, expand=True)  # Rotate 90 degrees and expand the image size if necessary
        # Create a new file name for the rotated image
        for j in range(1,4):
            rotated_image_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_rotated_{i+1}.png")
        # Save the rotated image
        img.save(rotated_image_path)


def gen_augmented_images(image_folder,output_dir, rotate = False):
    count = 0
    for filename in os.listdir(image_folder):
        if count > 252 : break
        count+=1
        rotate_image(image_folder+filename,output_dir="training/train_set/field/",rotate=False)
    count = 0 
    for filename in os.listdir(image_folder):
        if count <253: 
            count+=1
            continue
        rotate_image(image_folder+filename,output_dir="training/test_set/field/",rotate=False)

def gen_datasets(norm=True):
    if norm : 
        norm = "normalized_"
    else : 
        norm = ""
    image_folder = norm + "field/"
    count = 0
    for filename in os.listdir(image_folder):
        if count > 252 : break
        count+=1
        rotate_image(image_folder+filename,output_dir="training/train_set/field/",rotate=False)
    count = 0 
    for filename in os.listdir(image_folder):
        if count <253: 
            count+=1
            continue
        rotate_image(image_folder+filename,output_dir="training/test_set/field/",rotate=False)

    image_folder = norm + "draw1/"
    count = 0
    for filename in os.listdir(image_folder):
        if count > 252 : break
        count+=1
        rotate_image(image_folder+filename,output_dir="training/train_set/rotated_draw1/")
    count = 0 
    for filename in os.listdir(image_folder):
        if count <253: 
            count+=1
            continue
        rotate_image(image_folder+filename,output_dir="training/test_set/rotated_draw1/")

    image_folder = norm + "draw2/"
    count = 0
    for filename in os.listdir(image_folder):
        if count > 252 : break
        count+=1
        rotate_image(image_folder+filename,output_dir="training/train_set/rotated_draw2/")
    count = 0 
    for filename in os.listdir(image_folder):
        if count <253: 
            count+=1
            continue
        rotate_image(image_folder+filename,output_dir="training/test_set/rotated_draw2/")
    print("Done !")



gen_datasets(norm=False)
    