This notebook uses inputs from https://www.kaggle.com/fanbyprinciple/preprocessing-and-segmenting-letters-from-captcha/output

![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSOo7kT1RFyx90c6H9P3WnHYeRKOqHp-NlOdA&usqp=CAU)

In [2]:
!pip install kaggle




In [3]:
import os
import shutil
import zipfile

# Step 1: Set up kaggle.json
os.makedirs(os.path.expanduser('~/.kaggle'), exist_ok=True)
shutil.copy('kaggle.json', os.path.expanduser('~/.kaggle/'))

'/Users/priyanshukumar/.kaggle/kaggle.json'

In [5]:
!kaggle kernels pull fantao/creating-a-captcha-solver -p /path/to/directory

Source code downloaded to /Users/priyanshukumar/Downloads/creating-a-captcha-solver.ipynb


In [1]:
captcha_processing_output_folder = "../input/preprocessing-and-segmenting-letters-from-captcha/extracted_letter_images"

In [12]:
import numpy as np
import os
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt


def resize_image_to_dimensions(image, desired_width, desired_height):
    """Resizes an image to the desired dimensions, maintaining aspect ratio and padding if necessary."""
    # Ensure image has three dimensions (height, width, channels)
    if image.shape[-1] != 1:
        image = tf.expand_dims(image, axis=-1)
    
    (h, w) = image.shape[:2]

    # Resize the image while keeping the aspect ratio
    if w > h:
        scale_ratio = desired_width / w
        new_size = (desired_width, int(h * scale_ratio))
    else:
        scale_ratio = desired_height / h
        new_size = (int(w * scale_ratio), desired_height)
    
    resized_image = tf.image.resize(image, new_size)

    # Add padding to achieve the exact desired dimensions
    image_with_border = tf.image.resize_with_crop_or_pad(
        resized_image, desired_height, desired_width
    )

    return image_with_border

def read_image(image_file_path):
    """Reads an image file, converts it to grayscale, resizes it, and adds a channel dimension."""
    # Load the image in grayscale using TensorFlow
    img = tf.io.read_file(image_file_path)
    img = tf.image.decode_image(img, channels=1)  # Grayscale image (1 channel)
    
    # Resize to 20x20 pixels
    img = resize_image_to_dimensions(img, 20, 20)
    
    # Normalize to [0,1] range
    img = tf.cast(img, tf.float32) / 255.0
    
    return img

# Set the folder containing the CAPTCHA images
captcha_processing_output_folder = "../input/preprocessing-and-segmenting-letters-from-captcha/extracted_letter_images"

# Initialize lists to hold images and labels
images = []
labels = []

# Iterate through each image in the specified directory
for image_file_name in os.listdir(captcha_processing_output_folder):
    # Construct full image path
    full_image_path = os.path.join(captcha_processing_output_folder, image_file_name)
    
    # Check if it's a file (skip directories)
    if os.path.isfile(full_image_path):
        # Read and process the image
        image_file = read_image(full_image_path)
        
        # Extract the label (assuming the label is the character in the filename, before the extension)
        label = os.path.splitext(image_file_name)[0]  # Gets file name without extension
        
        # Append the image and label to their respective lists
        images.append(image_file)
        labels.append(label)

# Convert lists to NumPy arrays and normalize images
images = np.array(images, dtype="float32") / 255.0
labels = np.array(labels)

# Optionally, you can check the shape of images and labels
print(f'Images shape: {images.shape}')
print(f'Labels shape: {labels.shape}')


Images shape: (0,)
Labels shape: (0,)


In [13]:
!dir ../input/preprocessing-and-segmenting-letters-from-captcha/extracted_letter_images

2  4  6  8  A  C  E  G	J  L  N  Q  S  U  W  Y
3  5  7  9  B  D  F  H	K  M  P  R  T  V  X  Z


In [14]:
images = []
labels = []

# Get all image paths
img_paths = tf.io.gfile.glob(os.path.join(captcha_processing_output_folder, '*', '*.png'))

# Iterate through each image path
for image_file_path in img_paths:
    # Read and process the image
    image_file = read_image(image_file_path)
    
    # Extract the label (assuming the label is the folder name containing the image)
    label = os.path.basename(os.path.dirname(image_file_path))
    
    # Append the image and label to their respective lists
    images.append(image_file)
    labels.append(label)

# Convert lists to NumPy arrays
images = np.array(images)
labels = np.array(labels)

In [15]:
images = np.array(images, dtype="float") / 255.0
labels = np.array(labels)

In [18]:
len(labels)

38744

In [41]:
from sklearn.model_selection import train_test_split

(X_train, X_test, y_train, y_test) = train_test_split(
    images, labels, test_size=0.2, random_state=11
)

In [42]:
X_train.shape, X_test.shape

((30995, 20, 20, 1), (7749, 20, 20, 1))

In [43]:
from sklearn.preprocessing import LabelBinarizer

In [44]:
label_binarizer = LabelBinarizer().fit(y_train)

In [45]:
y_train

array(['C', '9', 'W', ..., 'S', '3', 'V'], dtype='<U1')

In [46]:
y_train = label_binarizer.transform(y_train)

In [47]:
y_train[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [48]:
y_train.shape

(30995, 32)

In [49]:
y_test = label_binarizer.transform(y_test)

In [51]:
y_test.shape

(7749, 32)

In [52]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

num_classes = 32
NN_model = Sequential()

# First convolutional block
NN_model.add(Conv2D(20, (5, 5), padding="same", input_shape=(20, 20, 1), activation="relu"))
NN_model.add(BatchNormalization())
NN_model.add(Dropout(0.25))  # Add dropout

# Second convolutional block
NN_model.add(Conv2D(40, (5, 5), padding="same", activation="relu"))
NN_model.add(BatchNormalization())
NN_model.add(Dropout(0.25))  # Add dropout
NN_model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# Third convolutional block
NN_model.add(Conv2D(80, (5, 5), padding="same", activation="relu"))
NN_model.add(BatchNormalization())

# Fourth convolutional block
NN_model.add(Conv2D(160, (5, 5), padding="same", activation="relu"))
NN_model.add(BatchNormalization())
NN_model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# Flatten and fully connected layers
NN_model.add(Flatten())
NN_model.add(Dense(512, activation="relu"))
NN_model.add(BatchNormalization())
NN_model.add(Dropout(0.5))  # Higher dropout for dense layer

# Output layer
NN_model.add(Dense(num_classes, activation="softmax"))

# Compile the model
NN_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Display the model summary
NN_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 20, 20, 20)        520       
_________________________________________________________________
batch_normalization (BatchNo (None, 20, 20, 20)        80        
_________________________________________________________________
dropout (Dropout)            (None, 20, 20, 20)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 20, 20, 40)        20040     
_________________________________________________________________
batch_normalization_1 (Batch (None, 20, 20, 40)        160       
_________________________________________________________________
dropout_1 (Dropout)          (None, 20, 20, 40)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 10, 10, 40)       

In [None]:
NN_model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    batch_size=16,
    epochs=20,
    verbose=1,
)

In [None]:
CAPTCHA = "../input/captcha-images/captcha_images/256Q.png"

In [None]:
image = cv2.imread(CAPTCHA)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(image)

In [None]:
image.shape

In [None]:
def find_bounding_rectangles_of_contours(contours):
    """Determines the bounding rectangles of the contours of the cropped letters."""
    letter_bounding_rectangles = []
    for contour in contours:
        (x, y, w, h) = cv2.boundingRect(contour)
        if w / h > 1.25:
            half_width = int(w / 2)
            letter_bounding_rectangles.append((x, y, half_width, h))
            letter_bounding_rectangles.append((x + half_width, y, half_width, h))
        else:
            letter_bounding_rectangles.append((x, y, w, h))
    return letter_bounding_rectangles


def preprocess_CAPTCHA(img):
    """Takes a CAPTCHA image and thresholds it."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray_with_border = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)
    preprocessed = cv2.threshold(
        gray_with_border, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
    )[1]
    return gray_with_border, preprocessed


def get_CAPTCHA_label(path_to_file):
    """Get the CAPTCHA text from the file name."""
    filename = os.path.basename(path_to_file)
    label = filename.split(".")[0]
    return label


def CAPTCHA_to_gray_scale_and_bounding_rectangles(captcha_image_file):
    """Take a CAPTCHA and output a grayscale version as well as the bounding rectangles of its cropped letters."""
    image = cv2.imread(captcha_image_file)
    gray, preprocessed = preprocess_CAPTCHA(image)
    contours = cv2.findContours(
        preprocessed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    contours = contours[0]
    letter_bounding_rectangles = find_bounding_rectangles_of_contours(contours)
    letter_bounding_rectangles = sorted(letter_bounding_rectangles, key=lambda x: x[0])
    return gray, letter_bounding_rectangles

In [None]:
captcha_label = get_CAPTCHA_label(CAPTCHA)
gray, letter_bounding_rectangles = CAPTCHA_to_gray_scale_and_bounding_rectangles(
    CAPTCHA
)
predictions = []


In [None]:
for letter_bounding_rectangle in letter_bounding_rectangles:
    x, y, w, h = letter_bounding_rectangle
    letter_image = gray[y - 2 : y + h + 2, x - 2 : x + w + 2]
    letter_image = resize_image_to_dimensions(letter_image, 20, 20)
    letter_image = np.expand_dims(letter_image, axis=2)
    letter_image = np.expand_dims(letter_image, axis=0)
    prediction = NN_model.predict(letter_image)
    letter = label_binarizer.inverse_transform(prediction)[0]
    predictions.append(letter)

In [None]:
predicted_captcha_text = "".join(predictions)
print(f"predicted CAPTCHA text is: {predicted_captcha_text}")
print(f"captch text is : 256Q")


# it works!

In [None]:
import matplotlib.pyplot as plt

plt.imshow(read_image(CAPTCHA))