In [1]:
!pip install torch torchvision matplotlib scikit-learn opencv-python imutils




In [2]:
import os, pickle
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from imutils import paths
from lab_2_helpers import *
import matplotlib.pyplot as plt


In [3]:
def extract_chars(image):
    """ Find contours and extract characters inside each CAPTCHA. """
    image_bw = cv2.threshold(
        image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
    )[1]

    contours = cv2.findContours(
        image_bw, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )[0]

    char_regions = []

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)

        if w / h > 1.25:
            half = int(w / 2)
            char_regions.append((x, y, half, h))
            char_regions.append((x + half, y, half, h))
        else:
            char_regions.append((x, y, w, h))

    if len(char_regions) != 4:
        return None

    char_regions.sort(key=lambda x: x[0])

    chars = []
    for x, y, w, h in char_regions:
        chars.append(image[y-2:y+h+2, x-2:x+w+2])

    return chars


In [4]:
!tar -xf captcha-images.tar.xz


In [5]:
def load_transform_image(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    padded = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)
    return padded

CAPTCHA_IMAGE_FOLDER = "./captcha-images"
captcha_image_paths = list(paths.list_images(CAPTCHA_IMAGE_FOLDER))
captcha_images = [load_transform_image(p) for p in captcha_image_paths]
captcha_texts = [os.path.splitext(os.path.basename(p))[0] for p in captcha_image_paths]


In [6]:
captcha_images_tv, captcha_images_test, captcha_texts_tv, captcha_texts_test = train_test_split(
    captcha_images, captcha_texts, test_size=0.2, random_state=31528476
)


In [7]:
def make_feature(image):
    image = resize_to_fit(image, 20, 20)
    image = image.astype("float32") / 255.0
    return image


In [8]:
features = []
labels = []

for img, text in zip(captcha_images_tv, captcha_texts_tv):
    chars = extract_chars(img)
    if chars is None:
        continue
    for c_img, c in zip(chars, text):
        features.append(make_feature(c_img))
        labels.append(c)

features = np.array(features)
labels = np.array(labels)

lb = LabelBinarizer()
labels_oh = lb.fit_transform(labels)

X_train, X_val, y_train, y_val = train_test_split(
    features, labels_oh, test_size=0.25, random_state=42
)


In [9]:
class CharDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X).unsqueeze(1)
        self.y = torch.tensor(y).float()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = CharDataset(X_train, y_train)
val_ds = CharDataset(X_val, y_val)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)


In [10]:
class CNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 20, 5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 50, 5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(5*5*50, 500),
            nn.ReLU(),
            nn.Linear(500, n_classes)
        )

    def forward(self, x):
        return self.fc(self.conv(x))

model = CNN(len(lb.classes_))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())


In [11]:
for epoch in range(10):
    model.train()
    for X, y in train_loader:
        optimizer.zero_grad()
        preds = model(X)
        loss = criterion(preds, torch.argmax(y, dim=1))
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/10 complete")


Epoch 1/10 complete
Epoch 2/10 complete
Epoch 3/10 complete
Epoch 4/10 complete
Epoch 5/10 complete
Epoch 6/10 complete
Epoch 7/10 complete
Epoch 8/10 complete
Epoch 9/10 complete
Epoch 10/10 complete


In [12]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for X, y in val_loader:
        preds = model(X)
        predicted = torch.argmax(preds, dim=1)
        actual = torch.argmax(y, dim=1)
        correct += (predicted == actual).sum().item()
        total += y.size(0)

print("Validation accuracy:", correct / total)


Validation accuracy: 0.9887892376681614
