In [41]:
!pip install opencv-python
!pip install numpy



In [1]:
import torch
from torch.utils.data import DataLoader,Dataset
import torchvision.transforms as transforms
from torchvision.io import read_image

from PIL import Image
import cv2
import numpy as np
import matplotlib as plt
import math
import random # sampling captcha text
import os # used for path and image storage
from captcha.image import ImageCaptcha  # Module that will generate all captcha images# pip install captcha


In [25]:
def remove_noise(image_path, save_path):
    # Open the image using Pillow
    image = Image.open(image_path)

    # Convert the image to grayscale
    gray_image = image.convert('L')

    # Convert PIL image to numpy array
    np_image = np.array(gray_image)

    # Apply binary thresholding
    _, binary_image = cv2.threshold(np_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Apply morphological operations
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel, iterations=1)  # Increase iterations for more noise removal

    # Apply Gaussian blur
    denoised_image = cv2.GaussianBlur(opening, (3, 3), 0)

    # Apply median blur to further reduce noise, particularly small circles
    denoised_image = cv2.medianBlur(denoised_image, 3)

    # Save the denoised image
    denoised_image_pil = Image.fromarray(denoised_image)
    denoised_image_pil.save(save_path)


In [26]:
# trying to resize the image
def resize_image(image, new_width, new_height):
    # Resize the image
    resized_image = cv2.resize(image, (new_width, new_height))
    return resized_image

def save_contours_as_images(image_path, output_directory, image_id):
    # Load the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Threshold the image to obtain binary image
    _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sort contours based on x-coordinate
    contours = sorted(contours, key=lambda contour: cv2.boundingRect(contour)[0])

    # Create output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    counter = 0 # keep track of how many characters have been saved
    label = image_path.split('/')[0].split('.')[0].split("\\")[1]
    print(label)
    image_name = label.split("--")[0]
    char_labels = [char_label for char_label in label.split("_")[0]]
    # print(char_labels)

    for i, contour in enumerate(contours):
        # Get bounding box for each contour
        x, y, w, h = cv2.boundingRect(contour)

        if counter > 3:
            break

        # Check if contour is too small (possibly noise)
        if w > 5 and h > 5:
            # Add some padding around the character bounding box
            padding = 10
            x_padding = max(0, x - padding)
            y_padding = max(0, y - padding)
            w_padding = min(image.shape[1], w + 2 * padding)
            h_padding = min(image.shape[0], h + 2 * padding)

            # Create a black canvas with padded dimensions
            padded_image = np.zeros((h_padding, w_padding), dtype=np.uint8)

            # Calculate coordinates to place the character in the center
            x_offset = (w_padding - w) // 2
            y_offset = (h_padding - h) // 2

            # Copy the character region from the original image to padded image
            padded_image[y_offset:y_offset+h, x_offset:x_offset+w] = image[y:y+h, x:x+w]

            # Resize the padded image
            resized_image = resize_image(padded_image, 100, 100)

            # Save the resized image as a separate image
            character_filename = os.path.join(output_directory, f'{image_name}_{char_labels[counter]}--{image_id}.png')
            cv2.imwrite(character_filename, resized_image)
            # print(f"contour saved: {character_filename}")
            counter += 1
            image_id += 1

### Passing images to the filter

In [28]:
# Generate denoised images
folder = 'four_cap_36'
output_folder = 'denoised_images'

# Get list of all files in the folder
file_list = os.listdir(folder)

# Iterate through the first 10 images in the folder
for i, filename in enumerate(file_list):    
    # Check if the file is an image (you can add more image extensions if needed)
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        # Construct the full path to the image
        image_path = os.path.join(folder, filename)
        
        # Save path for the denoised image
        # original image -> denoised image (now named after its label)
        label = filename.split('-')[0]
        save_filename = f'{label}_{i}--denoised.png' # There is a possibility that the images might have the same label -- TODO: FIX IT later
        save_path = os.path.join(output_folder, save_filename)
        
        # Call the remove_noise function
        remove_noise(image_path, save_path)


### Cropping characters

In [29]:
# Folder path containing the images
folder_path = 'denoised_images'

# Output directory for saved contour images
output_directory = 'cropped_characters'

# Get list of all files in the folder
file_list = os.listdir(folder_path)

image_id = 0

# Iterate through the first 10 images in the folder
for i, filename in enumerate(file_list):
    # Check if the file is an image (you can add more image extensions if needed)
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        # Construct the full path to the image
        image_path = os.path.join(folder_path, filename)
        
        # Call the save_contours_as_images function
        save_contours_as_images(image_path, output_directory, image_id)

        image_id += 4

11P5_0--denoised
11VX_1--denoised
124T_2--denoised
12N1_3--denoised
1347_4--denoised
13XR_5--denoised
144W_6--denoised
14PJ_7--denoised
1535_8--denoised
1582_9--denoised
15LU_10--denoised
16GO_11--denoised
17YH_12--denoised
19FX_13--denoised
1A5E_14--denoised
1AT1_15--denoised
1BA6_16--denoised
1CKJ_17--denoised
1DWN_18--denoised
1EAL_19--denoised
1ELM_20--denoised
1FL7_21--denoised
1FRQ_22--denoised
1GOC_23--denoised
1GP8_24--denoised
1GYU_25--denoised
1IQ4_26--denoised
1IZV_27--denoised
1JFX_28--denoised
1KLN_29--denoised
1KOP_30--denoised
1KS2_31--denoised
1LUD_32--denoised
1LXD_33--denoised
1MI2_34--denoised
1OLZ_35--denoised
1PNB_36--denoised
1QJ9_37--denoised
1T59_38--denoised
1T6R_39--denoised
1TL3_40--denoised
1TYZ_41--denoised
1U51_42--denoised
1V9Q_43--denoised
1VM8_44--denoised
1WE7_45--denoised
1XSD_46--denoised
1XVI_47--denoised
1Y9Q_48--denoised
1Z1L_49--denoised
1ZQ5_50--denoised
1ZVE_51--denoised
214G_52--denoised
21EW_53--denoised
21JY_54--denoised
21N2_55--denoised
21

### Custom Class for charactes

In [None]:
class CroppedCharacterDataset(Dataset):
    def __init__(self, annotations_file, img_dir):
        self.img_labels = self.read_annotations_file(annotations_file)
        self.img_dir = img_dir

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_filename, label = self.img_labels[idx]
        img_path = os.path.join(self.img_dir, img_filename)
        image = read_image(img_path) # reads image from image path
        return image, label

    # returns an array wuth many sub arrays that contain [image_file_name.png, label]
    def read_annotations_file(self, annotations_file):
        with open(annotations_file, 'r') as file:
            lines = file.readlines()
        img_labels = [line.strip().split(',') for line in lines]
        return img_labels