In [5]:
import cv2
import numpy as np
import torch
import pandas as pd


def draw_labels_on_image(image_path, labels_file):
    """
    Loads an image from image_path, retrieves ground truth (x1, y1, x2, y2) from labels.txt,
    and displays the image with red circles at those coordinates.

    Args:
        image_path (str): Path to the image.
        labels_file (str): Path to the labels file (CSV or TXT with x1, y1, x2, y2).
    """
    # Load the image using OpenCV
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to load image: {image_path}")
        return

    # Resize the image to (width=340, height=410) to match the model's expected size
    image_resized = image#cv2.resize(image, (340, 410))

    # Read labels from the file
    labels_df = pd.read_csv(labels_file)  # Ensure labels.txt is formatted correctly
    image_name = image_path.split('/')[-1]  # Extract filename from path

    # Find the row corresponding to the image name (assuming there is an 'id' or filename column)
    if 'img_name' in labels_df.columns:
        row = labels_df[labels_df['img_name'] == image_name]
    else:
        row = labels_df.iloc[0]  # If there's no ID column, just use the first row (for testing)

    if row.empty:
        print(f"No labels found for {image_name}")
        return

    # Extract ground truth coordinates
    x1, y1, x2, y2 = row[['x1', 'y1', 'x2', 'y2']].to_numpy().flatten()
    x1 = int(x1)
    x2 = int(x2)
    y1 = int(y1)
    y2 = int(y2)
    print(f"Ground truth coordinates: x1={x1}, y1={y1}, x2={x2}, y2={y2}")

    # Draw red circles at the ground truth coordinates
    image_drawn = image_resized.copy()
    cv2.circle(image_drawn, (x1, y1), radius=5, color=(0, 255, 0), thickness=-1)  # Green circle
    cv2.circle(image_drawn, (x2, y2), radius=5, color=(0, 0, 255), thickness=-1)  # Red circle

    # Display the image with the drawn points
    cv2.imshow("img",image_drawn)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

extract_dir = 'extracted_captchas'
processed_images_dir = "processed_captchas"
image_path = f"{extract_dir}/captchas_saved/captcha_8.png"
labels_file = "labels.txt"
draw_labels_on_image(image_path,labels_file)
image_path = f"truncated_captchas/captcha_8.png"
labels_file = "truncated_labels.csv"
draw_labels_on_image(image_path,labels_file)


Ground truth coordinates: x1=105, y1=175, x2=180, y2=207
Ground truth coordinates: x1=85, y1=120, x2=160, y2=152


In [None]:
import cv2
import numpy as np

def show_pixels(image_path,nbPixelsHaut,nbPixelsBas,nbPixelsGauche,nbPixelsDroite):
    """
    Displays the top 20 rows of an image.

    Args:
        image_path (str): Path to the image file.
    """
    # Load the image
    image = cv2.imread(image_path)

    # Check if the image was loaded correctly
    if image is None:
        print(f"Error: Unable to load image at {image_path}")
        return

    truncated_image = image[nbPixelsBas:nbPixelsHaut, nbPixelsGauche:nbPixelsDroite]

    # Display the cropped section
    cv2.imshow("img",truncated_image)  # Use cv2_imshow in Google Colab, replace with cv2.imshow() for local use
    cv2.waitKey(0)
    cv2.destroyAllWindows()


extract_dir = 'extracted_captchas'
processed_images_dir = "processed_captchas"
image_path = f"{extract_dir}/captchas_saved/captcha_315.png"
labels_file = "labels.txt"
# nbPixelsHaut,nbPixelsBas,nbPixelsGauche,nbPixelsDroite = 55,5,185,235 # Premier perso
# nbPixelsHaut, nbPixelsBas, nbPixelsGauche, nbPixelsDroite = 50,10, 265, 305 #Second perso
show_pixels(image_path,nbPixelsHaut,nbPixelsBas,nbPixelsGauche,nbPixelsDroite)

In [81]:
import pandas as pd

def adjust_x_coordinates(label, nbPixelsGauche):
    return label - nbPixelsGauche

def adjust_y_coordinates(label, nbPixelsBas):
    return label - nbPixelsBas

# Load the labels file
df = pd.read_csv("labels.txt")

# Ensure the coordinate columns are numeric
df[['x1', 'y1', 'x2', 'y2']] = df[['x1', 'y1', 'x2', 'y2']].apply(pd.to_numeric)

# Define the cropping values
nbPixelsHaut, nbPixelsBas, nbPixelsGauche, nbPixelsDroite = 330, 55, 20, 320

# Adjust coordinates
df['x1'] = df['x1'].apply(lambda x: adjust_x_coordinates(x, nbPixelsGauche))
df['x2'] = df['x2'].apply(lambda x: adjust_x_coordinates(x, nbPixelsGauche))
df['y1'] = df['y1'].apply(lambda y: adjust_y_coordinates(y, nbPixelsBas))
df['y2'] = df['y2'].apply(lambda y: adjust_y_coordinates(y, nbPixelsBas))

# Save the adjusted labels
df.to_csv('truncated_labels.csv', index=False)

# Display the updated DataFrame
print(df)


            img_name          x1          y1          x2          y2
0      captcha_1.png   91.816017  191.209957   99.581169   63.639610
1     captcha_10.png   99.581169  188.991342  200.528139  112.449134
2    captcha_100.png  148.390693  237.800866   67.411255  236.691558
3    captcha_101.png  187.216450  170.133117  105.127706  210.068182
4    captcha_102.png  140.625541  175.679654  200.528139  215.614719
..               ...         ...         ...         ...         ...
311   captcha_95.png  128.423160   80.279221  167.248918  227.817100
312   captcha_96.png   94.034632   85.825758   89.597403  217.833333
313   captcha_97.png   90.706710  215.614719  183.888528  137.963203
314   captcha_98.png   90.706710  150.165584  221.604978   94.700216
315   captcha_99.png   87.378788  232.254329   68.520563  162.367965

[316 rows x 5 columns]


In [None]:
import os
import cv2

def crop_and_save_images(input_folder, output_folder="truncated_captchas",nbPixelsHaut= 330, nbPixelsBas= 55, nbPixelsGauche= 20, nbPixelsDroite = 320):
    """
    Crops images in the input_folder according to predefined pixel boundaries
    and saves them to the output_folder with the same filenames.

    Args:
        input_folder (str): Path to the folder containing the original images.
        output_folder (str): Path where the cropped images will be saved.
    """
    # Define cropping boundaries
    

    # Create output folder if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Process each image in the input folder
    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Read the image
        image = cv2.imread(input_path)
        if image is None:
            print(f"Skipping {filename} (could not load image)")
            continue

        # Apply cropping
        cropped_image = image[nbPixelsBas:nbPixelsHaut, nbPixelsGauche:nbPixelsDroite]

        # Save the cropped image to the new folder
        cv2.imwrite(output_path, cropped_image)
        print(f"Saved cropped image: {output_path}")

# nbPixelsHaut,nbPixelsBas,nbPixelsGauche,nbPixelsDroite = 50,10,190,230 # Premier perso
# nbPixelsHaut, nbPixelsBas, nbPixelsGauche, nbPixelsDroite = 50,10, 265, 305 #Second perso

# Example Usage
# crop_and_save_images("extracted_captchas/captchas_saved") #truncated captchas
# nbPixelsHaut,nbPixelsBas,nbPixelsGauche,nbPixelsDroite = 55,5,185,235
# crop_and_save_images("extracted_captchas/captchas_saved","premier_perso",nbPixelsHaut,nbPixelsBas,nbPixelsGauche,nbPixelsDroite) #premier perso
# nbPixelsHaut, nbPixelsBas, nbPixelsGauche, nbPixelsDroite = 55,5, 260, 310
# crop_and_save_images("extracted_captchas/captchas_saved","second_perso",nbPixelsHaut,nbPixelsBas,nbPixelsGauche,nbPixelsDroite) #second perso


Saved cropped image: premier_perso\captcha_1.png
Saved cropped image: premier_perso\captcha_10.png
Saved cropped image: premier_perso\captcha_100.png
Saved cropped image: premier_perso\captcha_101.png
Saved cropped image: premier_perso\captcha_102.png
Saved cropped image: premier_perso\captcha_103.png
Saved cropped image: premier_perso\captcha_104.png
Saved cropped image: premier_perso\captcha_105.png
Saved cropped image: premier_perso\captcha_106.png
Saved cropped image: premier_perso\captcha_107.png
Saved cropped image: premier_perso\captcha_108.png
Saved cropped image: premier_perso\captcha_109.png
Saved cropped image: premier_perso\captcha_11.png
Saved cropped image: premier_perso\captcha_110.png
Saved cropped image: premier_perso\captcha_111.png
Saved cropped image: premier_perso\captcha_112.png
Saved cropped image: premier_perso\captcha_113.png
Saved cropped image: premier_perso\captcha_114.png
Saved cropped image: premier_perso\captcha_115.png
Saved cropped image: premier_perso\

In [33]:
import cv2
import numpy as np
import os

# Path to the folder containing the images
image_folder = "truncated_captchas"  # Change this to your actual folder path

# List all image files in the folder
image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]  # Change extension if needed

# Initialize an accumulator with zeros (assuming images are the same size)
num_images = len(image_files)
if num_images == 0:
    raise ValueError("No images found in the specified folder.")

# Load the first image to get dimensions
first_image = cv2.imread(os.path.join(image_folder, image_files[0]), cv2.IMREAD_COLOR)
h, w, c = first_image.shape
mean_image = np.zeros((h, w, c), dtype=np.float32)

# Compute the sum of all images
for file in image_files:
    img = cv2.imread(os.path.join(image_folder, file), cv2.IMREAD_COLOR)
    if img is None:
        print(f"Warning: Could not read {file}")
        continue
    mean_image += img.astype(np.float32)  # Convert to float to prevent overflow

# Compute the mean by dividing by the number of images
mean_image /= num_images

# Convert back to uint8 format for visualization and saving
mean_image = np.clip(mean_image, 0, 255).astype(np.uint8)

# Save the mean image
cv2.imwrite("mean_image.png", mean_image)

# Display the mean image (optional)
cv2.imshow("Mean Image", mean_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [4]:
import cv2
import numpy as np
import os

# Paths
image_folder = "truncated_captchas"  # Change this to your actual folder
output_folder = "filtered_truncated_captchas"  # Folder to save cleaned images
mean_image_path = "mean_image.png"  # Path to saved mean image

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Load the mean image
mean_image = cv2.imread(mean_image_path, cv2.IMREAD_COLOR).astype(np.float32)

# List all image files in the folder
image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]  # Change extension if needed

# Process each image
for file in image_files:
    img_path = os.path.join(image_folder, file)
    output_path = os.path.join(output_folder, file)
    
    # Load the image
    img = cv2.imread(img_path, cv2.IMREAD_COLOR).astype(np.float32)

    # Subtract the mean image
    cleaned_img = img - mean_image

    # Normalize back to 0-255
    cleaned_img = np.clip(cleaned_img, 0, 255).astype(np.uint8)

    # Save the cleaned image
    cv2.imwrite(output_path, cleaned_img)

print(f"Cleaned images saved in {output_folder}")


Cleaned images saved in filtered_truncated_captchas


In [5]:
import cv2
import numpy as np
import torch
import pandas as pd


def draw_labels_on_image(image_path, labels_file):
    """
    Loads an image from image_path, retrieves ground truth (x1, y1, x2, y2) from labels.txt,
    and displays the image with red circles at those coordinates.

    Args:
        image_path (str): Path to the image.
        labels_file (str): Path to the labels file (CSV or TXT with x1, y1, x2, y2).
    """
    # Load the image using OpenCV
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to load image: {image_path}")
        return

    # Resize the image to (width=340, height=410) to match the model's expected size
    image_resized = image#cv2.resize(image, (340, 410))

    # Read labels from the file
    labels_df = pd.read_csv(labels_file)  # Ensure labels.txt is formatted correctly
    image_name = image_path.split('/')[-1]  # Extract filename from path

    # Find the row corresponding to the image name (assuming there is an 'id' or filename column)
    if 'img_name' in labels_df.columns:
        row = labels_df[labels_df['img_name'] == image_name]
    else:
        row = labels_df.iloc[0]  # If there's no ID column, just use the first row (for testing)

    if row.empty:
        print(f"No labels found for {image_name}")
        return

    # Extract ground truth coordinates
    x1, y1, x2, y2 = row[['x1', 'y1', 'x2', 'y2']].to_numpy().flatten()
    x1 = int(x1)
    x2 = int(x2)
    y1 = int(y1)
    y2 = int(y2)
    print(f"Ground truth coordinates: x1={x1}, y1={y1}, x2={x2}, y2={y2}")

    # Draw red circles at the ground truth coordinates
    image_drawn = image_resized.copy()
    cv2.circle(image_drawn, (x1, y1), radius=5, color=(0, 255, 0), thickness=-1)  # Green circle
    cv2.circle(image_drawn, (x2, y2), radius=5, color=(0, 0, 255), thickness=-1)  # Red circle

    # Display the image with the drawn points
    cv2.imshow("img",image_drawn)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

image_path = f"filtered_truncated_captchas/captcha_8.png"
labels_file = "truncated_labels.csv"
draw_labels_on_image(image_path,labels_file)

Ground truth coordinates: x1=85, y1=120, x2=160, y2=152


In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm  # Progress bar

def compute_mean_std(input_folder):
    """Compute the mean and std for each RGB channel across all images."""
    image_list = []
    
    for filename in tqdm(os.listdir(input_folder), desc="Computing mean & std"):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path)  # Read image in BGR format
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
            image_list.append(img)

    # Convert list to a big numpy array (N, H, W, C)
    image_array = np.stack(image_list, axis=0).astype(np.float32) / 255.0

    # Compute mean and std along (N, H, W) axis → (C,)
    mean = np.mean(image_array, axis=(0, 1, 2))
    std = np.std(image_array, axis=(0, 1, 2))
    print(f"moyenne = {mean} \n std={std}")
    return mean, std

def normalize_images(input_folder, output_folder):
    """Normalize RGB channels of all images in input_folder and save them to output_folder."""
    os.makedirs(output_folder, exist_ok=True)  # Create output folder if not exists

    # Compute mean and std
    mean, std = compute_mean_std(input_folder)

    for filename in tqdm(os.listdir(input_folder), desc="Normalizing images"):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path)  # Read image in BGR format
        if img is None:
            print(f"Skipping {filename} (unable to read)")
            continue

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
        img = img.astype(np.float32) / 255.0  # Scale to [0,1]

        # Normalize: (pixel - mean) / std
        img = (img - mean) / std

        # Convert back to 0-255 range for saving
        # img = ((img - img.min()) / (img.max() - img.min()) * 255).astype(np.uint8)

        # Save processed image
        output_path = os.path.join(output_folder, filename)
        cv2.imwrite(output_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))  # Convert back to BGR

    print(f"✅ Normalized images saved in: {output_folder}")


# Example Usage
input_folder = "truncated_captchas"
output_folder = "normalized_images"
normalize_images(input_folder, output_folder)


Computing mean & std: 100%|██████████| 316/316 [00:00<00:00, 1426.80it/s]


moyenne = [0.6435449 0.6435449 0.6435449] 
 std=[0.28112793 0.20526662 0.1340277 ]


Normalizing images: 100%|██████████| 316/316 [00:01<00:00, 295.00it/s]

✅ Normalized images saved in: normalized_images





In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm

def apply_existing_normalization(input_folder, output_folder, mean, std):
    """
    Applies the same mean and std normalization (computed earlier) to new images.

    Args:
        input_folder (str): Path to the folder containing new images.
        output_folder (str): Path to save the normalized images.
        mean (tuple): Mean values of the original dataset (R, G, B).
        std (tuple): Standard deviation values of the original dataset (R, G, B).
    """
    os.makedirs(output_folder, exist_ok=True)  # Create the output folder if it doesn't exist

    for filename in tqdm(os.listdir(input_folder), desc="Processing new images"):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path)  # Read image in BGR format
        if img is None:
            print(f"Skipping {filename} (unable to read)")
            continue
        
        # Convert to RGB and scale pixel values to [0,1]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        # Apply normalization using precomputed mean & std
        img_normalized = (img - mean) / std  # Standardize the image

        # Rescale to 0-255 for saving
        img_normalized = ((img_normalized - img_normalized.min()) / (img_normalized.max() - img_normalized.min()) * 255).astype(np.uint8)

        # Convert back to BGR before saving (OpenCV expects BGR format)
        output_path = os.path.join(output_folder, filename)
        cv2.imwrite(output_path, cv2.cvtColor(img_normalized, cv2.COLOR_RGB2BGR))

    print(f"✅ Normalized images saved in: {output_folder}")

# Example Usage:
mean = (0.6435449 0.6435449 0.6435449)  # Replace with your dataset's mean
std = (0.28112793 0.20526662 0.1340277)  # Replace with your dataset's std
input_folder = "new_images"
output_folder = "normalized_new_images"

apply_existing_normalization(input_folder, output_folder, mean, std)
