In [None]:
import csv
import os
import random
import string

import cv2
import matplotlib.pyplot as plt
import numpy as np  # linear algebra
import torch
import torch.nn as nn
import torch.nn.functional as Fun
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from tqdm import trange

# TRAIN_PATH = "./train"
# TEST_PATH  = "./test"
TRAIN_PATH = "/kaggle/input/captcha-hacker/train"
TEST_PATH  = "/kaggle/input/captcha-hacker/test"

class_index = dict(zip(string.ascii_lowercase+string.digits, range(36)))
class_index_rev = dict(zip(range(36), string.ascii_lowercase+string.digits))
print(class_index)
print(class_index_rev)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
print(f"current using device: {device}")

In [None]:
train_data = []
val_data = []

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.7:
            train_data.append(row)
        else:
            val_data.append(row)

In [None]:
# Helper function
def rotate_img(img, angle):
    height, width = img.shape[:2]
    # 使用 cv2.getRotationMatrix2D() 函式計算旋轉矩陣
    # getRptationMatrix2D(center, angle(+/-), zoom rate)
    M = cv2.getRotationMatrix2D((width // 2, height // 2), angle, 1)

    # 使用 cv2.warpAffine() 函式對圖片進行旋轉
    img_rotated = cv2.warpAffine(img, M, (width, height))
    return img_rotated
def InjectNoise(img, std):
    noise = np.zeros_like(img, dtype=np.float32)
    cv2.randn(noise, 0, std)
    img = (img+noise)
    img[np.where(img > 255)] = 255
    img[np.where(img < 0)] = 0
    return img

def DataAugmentation(x_train, y_train):
    aug_number = 5
    rotate_angle = [30, 60, -30, -60]
    x_train_aug = np.zeros(shape=(len(x_train) * aug_number, 64, 64, 3), dtype=np.uint8)
    y_train_aug = np.zeros(shape=(len(y_train) * aug_number, len(y_train[0])), dtype=np.int64)
    for i in trange(0, len(x_train) * aug_number, aug_number):
        index = i // aug_number
        y_train_aug[i : i + aug_number] = y_train[index]
        x_train_aug[i] = x_train[index]
        x_train_aug[i + 1] = rotate_img(x_train[index], rotate_angle[0])
        x_train_aug[i + 2] = rotate_img(x_train[index], rotate_angle[1])
        x_train_aug[i + 3] = rotate_img(x_train[index], rotate_angle[2])
        x_train_aug[i + 4] = rotate_img(x_train[index], rotate_angle[3])
    return np.array(x_train_aug), np.array(y_train_aug)

def read_data(rows, prefix, root):
    filenames = np.array([sample for sample in rows if sample[0].startswith(prefix)])
    imgs = filenames[:, 0]
    label = filenames[:, 1]
    images = np.zeros(shape=(len(filenames), 64, 64, 3), dtype=np.uint8)
    labels = np.zeros(shape=(len(filenames), len(label[0])), dtype=np.int32)
    for i in trange(len(filenames)):
        curr_img = cv2.imread(f"{root}/{imgs[i]}")
        curr_img = cv2.resize(curr_img,  (64, 64))
#         curr_img = np.mean(curr_img, axis=2)
        images[i] = curr_img
        if prefix == "task1":
            curr_label = [class_index[x]-26 for x in label[i]]
        else:
            curr_label = [class_index[x] for x in label[i]]
        labels[i] = np.array(curr_label)
    return np.array(imgs), np.array(images), np.array(labels)

In [None]:
class ImgDataset(Dataset):
    def __init__(self, x_data, y_data, root, filenames, return_filename=False, prefix="task1"):
        x_data = x_data.astype('float32')
        self.x_data = torch.from_numpy(x_data).permute(0, 3, 1, 2) # (N, 1, 32, 32)
        self.y_data = torch.from_numpy(y_data) # (N, 1)
        self.filenames = filenames
        self.return_filename = return_filename

    def __getitem__(self, index):
        if self.return_filename:
            return self.x_data[index], self.filenames[index] # aug_num = 5
        else:
            return self.x_data[index], self.y_data[index]

    def __len__(self):
        return len(self.x_data)

In [None]:
# Load all data
# filename to image
train_data_path, x_train, y_train = read_data(rows=train_data, prefix="task1", root=TRAIN_PATH)
val_data_path, x_val, y_val = read_data(rows=val_data, prefix="task1", root=TRAIN_PATH)
x_train_aug, y_train_aug = DataAugmentation(x_train, y_train)

train_ds1 = ImgDataset(x_data=x_train_aug, y_data=y_train_aug, root=TRAIN_PATH, filenames=train_data, return_filename=False, prefix="task1")
train_dl1 = DataLoader(train_ds1, batch_size=500, num_workers=2, shuffle=True)

val_ds1 = ImgDataset(x_data=x_val, y_data=y_val, root=TRAIN_PATH, filenames=val_data, return_filename=False, prefix="task1")
val_dl1 = DataLoader(val_ds1, batch_size=500, num_workers=2, shuffle=True)

print("Data1 Done")
train_data_path, x_train, y_train = read_data(rows=train_data, prefix="task2", root=TRAIN_PATH)
val_data_path, x_val, y_val = read_data(rows=val_data, prefix="task2", root=TRAIN_PATH)
x_train_aug, y_train_aug = DataAugmentation(x_train, y_train)

train_ds2 = ImgDataset(x_data=x_train_aug, y_data=y_train_aug, root=TRAIN_PATH, filenames=train_data, return_filename=False, prefix="task2")
train_dl2 = DataLoader(train_ds2, batch_size=500, num_workers=2, shuffle=True)

val_ds2 = ImgDataset(x_data=x_val, y_data=y_val, root=TRAIN_PATH, filenames=val_data, return_filename=False, prefix="task2")
val_dl2 = DataLoader(val_ds2, batch_size=500, num_workers=2, shuffle=True)

print("Data2 Done")

# filename to image
train_data_path, x_train, y_train = read_data(rows=train_data, prefix="task3", root=TRAIN_PATH)
val_data_path, x_val, y_val = read_data(rows=val_data, prefix="task3", root=TRAIN_PATH)
x_train_aug, y_train_aug = DataAugmentation(x_train, y_train)

train_ds3 = ImgDataset(x_data=x_train_aug, y_data=y_train_aug, root=TRAIN_PATH, filenames=train_data, return_filename=False, prefix="task3")
train_dl3 = DataLoader(train_ds3, batch_size=500, num_workers=2, shuffle=True)

val_ds3 = ImgDataset(x_data=x_val, y_data=y_val, root=TRAIN_PATH, filenames=val_data, return_filename=False, prefix="task3")
val_dl3 = DataLoader(val_ds3, batch_size=500, num_workers=2, shuffle=True)
print("Data3 Done")

In [None]:

class Mymodel1(nn.Module):
    def __init__(self):
        super(Mymodel1, self).__init__()

        self.model = models.resnet18(pretrained=True)

    # for param in self.model.parameters():
      # param.requires_grad = False

        self.model.fc = nn.Linear(512, 10)
    def forward(self, x):
        logits = self.model(x)
        return logits
model1 = Mymodel1().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model1.parameters(), lr=1e-3)

for epoch in range(30):
    print(f"Epoch [{epoch}]")
    model1.train()
    for images, label in train_dl1:
        images, label = images.to(device), label.to(device)
        output = model1(images)
        
        loss = loss_fn(output, label.squeeze(1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    sample_count = 0
    correct_count = 0
    model1.eval()
    for image, label in val_dl1:
        image, label = image.to(device), label.to(device)
        # predict and get prediction for each char
        output = model1(image)
        pred = torch.argmax(output, dim=1)
        label = label.squeeze(1)
        sample_count += len(image)
        correct_count += (pred == label).sum()
        final_acc1 = correct_count / sample_count
    print("Model1 accuracy (validation):", final_acc1)
print("Task1 done")

In [None]:
class Mymodel2(nn.Module):
    def __init__(self):
        super(Mymodel2, self).__init__()

        self.model = models.resnet18(pretrained=True)

    # for param in self.model.parameters():
      # param.requires_grad = False
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Identity()
        self.fc1 = nn.Linear(num_ftrs, 36)
        self.fc2 = nn.Linear(num_ftrs, 36)
    def forward(self, x):
        x = self.model(x)
        out1 = self.fc1(x)
        out2 = self.fc2(x)
        return out1, out2        
model2 = Mymodel2().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-3)
final_acc2 = 0

for epoch in range(40):
    print(f"Epoch [{epoch}]")
    model2.train()
    for images, label in train_dl2:
        images, label = images.to(device), label.to(device)
        output = model2(images)
        loss1 = loss_fn(output[0], label[:, 0])
        loss2 = loss_fn(output[1], label[:, 1])
        loss = loss1 + loss2

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    sample_count = 0
    correct_count = 0
    model2.eval()
    for image, label in val_dl2:
        image, label = image.to(device), label.to(device)
        # predict and get prediction for each char
        output = model2(image)
        pred1 = torch.argmax(output[0], dim=1)
        pred2 = torch.argmax(output[1], dim=1)
        sample_count += len(image)*2
        correct_count += (pred1 == label[:, 0]).sum()
        correct_count += (pred2 == label[:, 1]).sum()
        final_acc2 = correct_count / sample_count
    print("Model2 accuracy (validation):", final_acc2)
print("Task2 Done")

In [None]:
class Mymodel3(nn.Module):
    def __init__(self):
        super(Mymodel3, self).__init__()

        self.model = models.resnet18(pretrained=True)

    # for param in self.model.parameters():
      # param.requires_grad = False

        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Identity()
        self.fc1 = nn.Linear(num_ftrs, 36)
        self.fc2 = nn.Linear(num_ftrs, 36)
        self.fc3 = nn.Linear(num_ftrs, 36)
        self.fc4 = nn.Linear(num_ftrs, 36)
    def forward(self, x):
        x = self.model(x)
        logits1 = self.fc1(x)
        logits2 = self.fc2(x)
        logits3 = self.fc3(x)
        logits4 = self.fc4(x)
        return logits1, logits2, logits3, logits4
model3 = Mymodel3().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model3.parameters(), lr=4e-3)
final_acc3 = 0
for epoch in range(50):
    print(f"Epoch [{epoch}]")
    model3.train()
    for images, label in train_dl3:
        images, label = images.to(device), label.to(device)
        output = model3(images)
        
        loss1 = loss_fn(output[0], label[:, 0])
        loss2 = loss_fn(output[1], label[:, 1])
        loss3 = loss_fn(output[2], label[:, 2])
        loss4 = loss_fn(output[3], label[:, 3])
        loss = loss1 + loss2 + loss3 + loss4

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    sample_count = 0
    correct_count = 0
    model3.eval()
    for image, label in val_dl3:
        image, label = image.to(device), label.to(device)
        # predict and get prediction for each char
        output = model3(image)
        pred1 = torch.argmax(output[0], dim=1)
        pred2 = torch.argmax(output[1], dim=1)
        pred3 = torch.argmax(output[2], dim=1)
        pred4 = torch.argmax(output[3], dim=1)
        sample_count += len(image)*4
        correct_count += (pred1 == label[:, 0]).sum()
        correct_count += (pred2 == label[:, 1]).sum()
        correct_count += (pred3 == label[:, 2]).sum()
        correct_count += (pred4 == label[:, 3]).sum()
        final_acc3 = correct_count / sample_count
    print("Model3 accuracy (validation):", final_acc3)
print("Task3 Done")