## Load dataset

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames[:3]:
        print(os.path.join(dirname, filename))
    if len(filenames) > 3:
        print("...")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/captcha-hacker-2023-spring/dataset/sample_submission.csv
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task1/xsg8GMuoBtdBJees.png
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task1/e68y2Vs6Fq5e4V81.png
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task1/cv5iniI57EbCSc1Q.png
...
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task2/uxDEbigOFS9rRHn5.png
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task2/LMZ2Nkk6X6WYlJFt.png
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task2/YY28AIyP6hHC18mq.png
...
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task3/SistF7IUel8j7A0j.png
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task3/nAt2UlgBK4wAmTfd.png
/kaggle/input/captcha-hacker-2023-spring/dataset/test/task3/2V3uDcFqx0yVbHU2.png
...
/kaggle/input/captcha-hacker-2023-spring/dataset/train/annotations.csv
/kaggle/input/captcha-hacker-2023-spring/dataset/train/task1/ikOVReqRDCyHYnkg.png
/kaggle/input/captcha-hacker-2023-s

In [2]:
import csv
import cv2
import numpy as np
import random
import os
import torchvision
import torchvision.transforms as transforms
from torchvision import io
from tqdm import tqdm
import torchvision.models as models
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [3]:
TRAIN_PATH = "/kaggle/input/captcha-hacker-2023-spring/dataset/train"
TEST_PATH = "/kaggle/input/captcha-hacker-2023-spring/dataset/test"
device = 'cuda'

# try device = "cuda" 
# and change your settings/accelerator to GPU if you want it to run faster if your using kaggle's enviroment

In [4]:
alphabets = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
alphabets2index = {alphabet:i for i, alphabet in enumerate(alphabets)}
print(alphabets2index)
index2alphabets = {i: alphabet for i, alphabet in enumerate(alphabets)}

{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25, 'A': 26, 'B': 27, 'C': 28, 'D': 29, 'E': 30, 'F': 31, 'G': 32, 'H': 33, 'I': 34, 'J': 35, 'K': 36, 'L': 37, 'M': 38, 'N': 39, 'O': 40, 'P': 41, 'Q': 42, 'R': 43, 'S': 44, 'T': 45, 'U': 46, 'V': 47, 'W': 48, 'X': 49, 'Y': 50, 'Z': 51, '0': 52, '1': 53, '2': 54, '3': 55, '4': 56, '5': 57, '6': 58, '7': 59, '8': 60, '9': 61}


In [5]:
from scipy.ndimage import gaussian_laplace
class LoGFilter(object):
    def __init__(self, sigma):
        self.sigma = sigma

    def __call__(self, img):
        img_np = np.array(img)
        img_filtered = gaussian_laplace(img_np, self.sigma)
        return img_filtered



## traning task1

In [6]:
transform_1= transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Grayscale(num_output_channels=3),  # 添加灰階轉換
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.Normalize(mean=[0.485], std=[0.229]),  # 只有一個通道的均值和標準差
])


In [7]:
class Task1Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task1")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}")
        # img = cv2.resize(img, (32, 32))
        # img = np.mean(img, axis=2)
        img = transform_1(img)
        if self.return_filename:
            return torch.FloatTensor(img ), filename
        else:
            return torch.FloatTensor(img ), alphabets2index[label]

    def __len__(self):
        return len(self.data)

In [8]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(1024, 512),
            nn.LeakyReLU(),
            nn.Linear(512, len(alphabets))
        )
        
        
    def forward(self, x):
        batch_size, h, w = x.shape
        x = x.view(batch_size, h*w)
        return self.layers(x)

In [9]:
class ResNet18_1(nn.Module):
    def __init__(self):
        super(ResNet18_1, self).__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(512, 62)

    def forward(self, x):
        logits = self.model(x)
        return logits

In [10]:
train_data = []
val_data = []
PATH_1 = "model_1.pt"

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.8:
            train_data.append(row)
        else:
            val_data.append(row)

train_ds = Task1Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=4, drop_last=True, shuffle=True)

val_ds = Task1Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=4, drop_last=False, shuffle=False)



In [11]:
model1 = ResNet18_1().to(device)
optimizer = torch.optim.Adam(model1.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()


for epoch in range(100):
    print(f"Epoch [{epoch}]")
    model1.train()
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model1(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    sample_count = 0
    correct_count = 0
    model1.eval()
    for image, label in val_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model1(image)
        loss = loss_fn(pred, label)
        
        pred = torch.argmax(pred, dim=1)
        
        sample_count += len(image)
        correct_count += (label == pred).sum()
        
    print("accuracy (validation):", correct_count / sample_count)
torch.save(model1.state_dict(), PATH_1)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 257MB/s]


Epoch [0]
accuracy (validation): tensor(0.6286, device='cuda:0')
Epoch [1]
accuracy (validation): tensor(0.9072, device='cuda:0')
Epoch [2]
accuracy (validation): tensor(0.9655, device='cuda:0')
Epoch [3]
accuracy (validation): tensor(0.9151, device='cuda:0')
Epoch [4]
accuracy (validation): tensor(0.9019, device='cuda:0')
Epoch [5]
accuracy (validation): tensor(0.9337, device='cuda:0')
Epoch [6]
accuracy (validation): tensor(0.9310, device='cuda:0')
Epoch [7]
accuracy (validation): tensor(0.9523, device='cuda:0')
Epoch [8]
accuracy (validation): tensor(0.9682, device='cuda:0')
Epoch [9]
accuracy (validation): tensor(0.9602, device='cuda:0')
Epoch [10]
accuracy (validation): tensor(0.9629, device='cuda:0')
Epoch [11]
accuracy (validation): tensor(0.9682, device='cuda:0')
Epoch [12]
accuracy (validation): tensor(0.9682, device='cuda:0')
Epoch [13]
accuracy (validation): tensor(0.9655, device='cuda:0')
Epoch [14]
accuracy (validation): tensor(0.9682, device='cuda:0')
Epoch [15]
accuracy 

In [12]:
test_data = []
model = ResNet18_1().to(device)
PATH_1 = "model_1.pt"
with open(f'{TEST_PATH}/../sample_submission.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        test_data.append(row)

# !rm -f submission.csv
if os.path.exists('submission.csv'):
    file = open('submission.csv', 'w', newline='')
    csv_writer = csv.writer(file)
else:
    file = open('submission.csv', 'w', newline='')
    csv_writer = csv.writer(file)
    csv_writer.writerow(["filename", "label"])

test_ds = Task1Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=64, num_workers=4, drop_last=False, shuffle=False)

model.load_state_dict(torch.load(PATH_1))

print( len(test_data) )

model.eval()
cnt = 0
for image, filenames in test_dl:
    image = image.to(device)
    
    pred = model(image)
    pred = torch.argmax(pred, dim=1)
    
    for i in range(len(filenames)):
      csv_writer.writerow([filenames[i], alphabets[pred[i].item()]])
      cnt += 1
print(cnt) # 6500



10001
6500


## traning task2


In [13]:
transform_2= transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Grayscale(num_output_channels=3),  # 添加灰階轉換
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.Normalize(mean=[0.485], std=[0.229]),  # 只有一個通道的均值和標準差
])

In [14]:
class Task2Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task2")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}")
        img = transform_2(img)
        if self.return_filename:
            return torch.FloatTensor( img ), filename
        else:
            char_label = np.zeros(124)
            j = 0
            for i in label :
               char_label[alphabets2index[i]+j] = 1
               j = j + 62
            return torch.FloatTensor(img ), char_label

    def __len__(self):
        return len(self.data)

In [15]:
class ResNet18_2(nn.Module):
    def __init__(self):
        super(ResNet18_2, self).__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(512, 124)

    def forward(self, x):
        logits = self.model(x)
        return logits

In [16]:

PATH_2 = "model_2.pt"
with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.8:
            train_data.append(row)
        else:
            val_data.append(row)

train_ds = Task2Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=4, drop_last=True, shuffle=True)
# for images, labels in train_dl:
#     # print('Images:', images)
#     print('Labels:', labels)
#     # Stop after the first batch

val_ds = Task2Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=4, drop_last=False, shuffle=False)

In [17]:
model2 = ResNet18_2().to(device)
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss()

PATH_2 = "model_2.pt"
for epoch in range(150):
    print(f"Epoch [{epoch}]")
    model2.train()
    for image, label in train_dl:

        # label=torch.from_numpy(label)
        image = image.to(device)
        label = label.to(device)
        # print(label)
        pred = model2(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    sample_count = 0
    correct_count = 0
    model2.eval()
    for image, label in val_dl:
        image = image.to(device)
        pred = model2(image)
        label1 = label.to(device)
        loss = loss_fn(pred, label1)
        pred = pred.data.cpu().numpy()
        ### 要將pred轉成one hot 形式
        pred_one_hot = list()
        for j in range(pred.shape[0]):
            temp = np.zeros(124)
            for k in range(2) :
              max = -10000
              index = 0
              for i in range(62):
                if pred[j][i+k*62] > max:
                    index=i+k*62
                    max=pred[j][i+k*62]
              temp[index] = 1
            pred_one_hot.append(temp)
        pred_one_hot = np.array(pred_one_hot)
        label = np.array(label)
        sample_count += len(image)
        # correct_count += (label == pred_one_hot).sum()
        for i in range( pred_one_hot.shape[0] ):
          if ((label[i]==pred_one_hot[i]).all()):
                correct_count += 1
        
    print("accuracy (validation):", correct_count / sample_count)
        
    
torch.save(model2.state_dict(), PATH_2)

Epoch [0]
accuracy (validation): 0.07535641547861507
Epoch [1]
accuracy (validation): 0.8533604887983707
Epoch [2]
accuracy (validation): 0.9429735234215886
Epoch [3]
accuracy (validation): 0.9643584521384929
Epoch [4]
accuracy (validation): 0.9714867617107943
Epoch [5]
accuracy (validation): 0.9837067209775967
Epoch [6]
accuracy (validation): 0.9887983706720977
Epoch [7]
accuracy (validation): 0.9867617107942973
Epoch [8]
accuracy (validation): 0.9918533604887984
Epoch [9]
accuracy (validation): 0.9938900203665988
Epoch [10]
accuracy (validation): 0.9938900203665988
Epoch [11]
accuracy (validation): 0.9918533604887984
Epoch [12]
accuracy (validation): 0.9918533604887984
Epoch [13]
accuracy (validation): 0.9938900203665988
Epoch [14]
accuracy (validation): 0.9918533604887984
Epoch [15]
accuracy (validation): 0.9938900203665988
Epoch [16]
accuracy (validation): 0.9918533604887984
Epoch [17]
accuracy (validation): 0.9918533604887984
Epoch [18]
accuracy (validation): 0.9918533604887984
Ep

In [18]:
# if os.path.exists('submission.csv'):
#     csv_writer = csv.writer(open('submission.csv', 'a', newline=''))
# else:
#     csv_writer = csv.writer(open('submission.csv', 'w', newline=''))
#     csv_writer.writerow(["filename", "label"])

test_ds = Task2Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=64, num_workers=4, drop_last=False, shuffle=False)

model = ResNet18_2().to(device)
PATH_2 = "model_2.pt"
model.load_state_dict(torch.load(PATH_2))
model.eval()
for image, filenames in test_dl:
    image = image.to(device)
    pred = model(image)
    pred = pred.data.cpu().numpy()
    pred_chars = list()
    for j in range(pred.shape[0]):
      ans = ""
      for k in range(2) :
        max = -10000
        index = 0
        for i in range(62):
          if pred[j][i+k*62] > max:
            index=i
            max=pred[j][i+k*62]
        ans += alphabets[index]
      pred_chars.append(ans)
    for i in range(len(filenames)):
        csv_writer.writerow([filenames[i], pred_chars[i]])
        cnt += 1
print(cnt) # 6500


9000


### traning teak3

In [19]:
transform_3= transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Grayscale(num_output_channels=3),  # 添加灰階轉換
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.Normalize(mean=[0.485], std=[0.229]),  # 只有一個通道的均值和標準差
])

In [20]:
class Task3Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}")
        # img = cv2.resize(img, (32, 32))
        # img = np.mean(img, axis=2)
        img = transform_3(img)
        if self.return_filename:
            return torch.FloatTensor(img) , filename
        else:
            char_label = np.zeros(248)
            j = 0
            for i in label :
               char_label[alphabets2index[i]+j] = 1
               j = j + 62
            return torch.FloatTensor(img), char_label

    def __len__(self):
        return len(self.data)

In [21]:
class ResNet18_3(nn.Module):
    def __init__(self):
        super(ResNet18_3, self).__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(512, 248)

    def forward(self, x):
        logits = self.model(x)
        return logits

In [22]:
PATH_3 = "model_3.pt"
with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
      if random.random() < 0.8:
        train_data.append(row)
      else:
        val_data.append(row)


train_ds = Task3Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=100, num_workers=4, drop_last=True, shuffle=True)

val_ds = Task3Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=100, num_workers=4, drop_last=False, shuffle=False)

In [23]:
model = ResNet18_3().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss()

PATH_3 = "model_3.pt"
for epoch in range(150):
    print(f"Epoch [{epoch}]")
    model.train()
    for image, label in train_dl:

        # label=torch.from_numpy(label)
        image = image.to(device)
        label = label.to(device)
        # print(label)
        pred = model(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    sample_count = 0
    correct_count = 0
    model.eval()
    for image, label in val_dl:
        image = image.to(device)
        pred = model(image)
        label1 = label.to(device)
        loss = loss_fn(pred, label1)
        pred = pred.data.cpu().numpy()
        ### 要將pred轉成one hot 形式
        pred_one_hot = list()
        for j in range(pred.shape[0]):
            temp = np.zeros(248)
            for k in range(4) :
              max = -10000
              index = 0
              for i in range(62):
                if pred[j][i+k*62] > max:
                    index=i+k*62
                    max=pred[j][i+k*62]
              temp[index] = 1
            pred_one_hot.append(temp)
        pred_one_hot = np.array(pred_one_hot)
        label = np.array(label)
        sample_count += len(image)
        # correct_count += (label == pred_one_hot).sum()
        for i in range( pred_one_hot.shape[0] ):
          if ((label[i]==pred_one_hot[i]).all()):
                correct_count += 1
        
    print("accuracy (validation):", correct_count / sample_count)
        
    
torch.save(model.state_dict(), PATH_3)

Epoch [0]
accuracy (validation): 0.0
Epoch [1]
accuracy (validation): 0.0073559460563955865
Epoch [2]
accuracy (validation): 0.5104209235798938
Epoch [3]
accuracy (validation): 0.8598283612586841
Epoch [4]
accuracy (validation): 0.9636289333878219
Epoch [5]
accuracy (validation): 0.9914180629342052
Epoch [6]
accuracy (validation): 0.9934613812832039
Epoch [7]
accuracy (validation): 0.9975480179812015
Epoch [8]
accuracy (validation): 0.9963220269718022
Epoch [9]
accuracy (validation): 0.9950960359624029
Epoch [10]
accuracy (validation): 0.9950960359624029
Epoch [11]
accuracy (validation): 0.9963220269718022
Epoch [12]
accuracy (validation): 0.9963220269718022
Epoch [13]
accuracy (validation): 0.9950960359624029
Epoch [14]
accuracy (validation): 0.9942787086228034
Epoch [15]
accuracy (validation): 0.9963220269718022
Epoch [16]
accuracy (validation): 0.9963220269718022
Epoch [17]
accuracy (validation): 0.9963220269718022
Epoch [18]
accuracy (validation): 0.9950960359624029
Epoch [19]
accu

In [24]:
# if os.path.exists('submission.csv'):
#     csv_writer = csv.writer(open('submission.csv', 'a', newline=''))
# else:
#     csv_writer = csv.writer(open('submission.csv', 'w', newline=''))
#     csv_writer.writerow(["filename", "label"])
test_ds = Task3Dataset(test_data, root=TEST_PATH, return_filename=True)
test_dl = DataLoader(test_ds, batch_size=64, num_workers=4, drop_last=False, shuffle=False)
model = ResNet18_3().to(device)
PATH_3 = "model_3.pt"
model.load_state_dict(torch.load(PATH_3))

model.eval()
for image, filenames in test_dl:
    image = image.to(device)
    pred = model(image)
    pred = pred.data.cpu().numpy()
    pred_chars = list()
    for j in range(pred.shape[0]):
      ans = ""
      for k in range(4) :
        max = -10000
        index = 0
        for i in range(62):
          if pred[j][i+k*62] > max:
            index=i
            max=pred[j][i+k*62]
        ans += alphabets[index]
      pred_chars.append(ans)
    for i in range(len(filenames)):
      csv_writer.writerow([filenames[i], pred_chars[i]])
      cnt += 1


print(cnt) # 6500


file.close()

10000
