In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

#print(os.walk('kaggle/input'))
for dirname, _, filenames in os.walk('/kaggle/input'): ## 原為/kaggle/input
    for filename in filenames[:3]:
        print(os.path.join(dirname, filename))
    if len(filenames) > 3:
        print("...")

/kaggle/input/captcha-hacker/sample_submission.csv
/kaggle/input/captcha-hacker/test/task1/x4LPcV5n6IvLj4vz.png
/kaggle/input/captcha-hacker/test/task1/W88fVMlAs5IpsXn4.png
/kaggle/input/captcha-hacker/test/task1/ZWDL6pUMfPu5c9jh.png
...
/kaggle/input/captcha-hacker/test/task2/jMalnsI5a5IWxYAi.png
/kaggle/input/captcha-hacker/test/task2/ihE9HHgyOINGEMcO.png
/kaggle/input/captcha-hacker/test/task2/ZATEVW3P5s0akZjd.png
...
/kaggle/input/captcha-hacker/test/task3/cXBlxYfvQWbiK7dn.png
/kaggle/input/captcha-hacker/test/task3/5gEp1jR9jNNfuqlk.png
/kaggle/input/captcha-hacker/test/task3/hEQ0WQtB9B7j8C2f.png
...
/kaggle/input/captcha-hacker/train/annotations.csv
/kaggle/input/captcha-hacker/train/task1/H85RQ6dbWUvLSIDV.png
/kaggle/input/captcha-hacker/train/task1/n2GC8uY1N4QfvVxe.png
/kaggle/input/captcha-hacker/train/task1/XOqfRx2R6SnoEjFr.png
...
/kaggle/input/captcha-hacker/train/task2/Mr4B2zxXk92hyzn9.png
/kaggle/input/captcha-hacker/train/task2/SIuRCnlK8VS91FhX.png
/kaggle/input/captcha-h

In [None]:
import csv
import cv2
import numpy as np
import random
import os
import torchvision
from tqdm import tqdm
from PIL import Image
import gc
from torchvision import transforms
import torchvision.models as models

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [None]:
TRAIN_PATH = "/kaggle/input/captcha-hacker/train"
TEST_PATH = "/kaggle/input/captcha-hacker/test"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device
print(f"Using {device} device")

Using cuda device


In [None]:
class Task1Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task1")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img =torchvision.io.read_image(f"{self.root}/{filename}")  #改用torchvision.io 
        trans = transforms.Compose([
            transforms.ToPILImage(), 
            transforms.Resize(100), # resize img to 100*100
            #transforms.ColorJitter(contrast = (1, 10)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalize the channels of the input image tensor
         ])
        img = trans(img)
        
        #print(img.shape)
        if self.return_filename:
            return torch.FloatTensor((img - 128) / 128), filename
        else:
            return torch.FloatTensor((img - 128) / 128), int(label)

    def __len__(self):
        return len(self.data)

In [None]:

train_data = []
val_data = []
with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.7:
            train_data.append(row)
        else:
            val_data.append(row)
#print(train_data)
train_ds = Task1Dataset(train_data, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=200, num_workers=2, drop_last=True, shuffle=True)

val_ds = Task1Dataset(val_data, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=200, num_workers=2, drop_last=False, shuffle=False)


In [None]:
best1 = 0.0
model = models.resnet18(pretrained = True).to(device) # use resnet18 as model, with pretrained weight
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) 
loss_fn = nn.CrossEntropyLoss()
model.fc = nn.Linear(512, 10).to(device) # modify fc layer in resnet18 -> output = 10
print(next(model.parameters()).device)

for epoch in range(30):
    print(f"Epoch [{epoch}]")
    model.train()
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        del(label)
        del(image)
        gc.collect()
        
    sample_count = 0
    correct_count = 0
    with torch.no_grad():
        model.eval()
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)

            pred = model(image)
            loss = loss_fn(pred, label)

            pred = torch.argmax(pred, dim=1)

            sample_count += len(image)
            correct_count += (label == pred).sum()
            del(label)
            del(image)
            gc.collect()
            torch.cuda.empty_cache()
    acc = float(correct_count / sample_count)
    if((acc > best1) and epoch > 14): 
        best1 = acc
        torch.save(model.state_dict(),'/kaggle/working/model_1.pt') # save model when best result
        print("update best:", best1)
    print("accuracy (validation):", correct_count / sample_count)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

cuda:0
Epoch [0]
accuracy (validation): tensor(0.0997, device='cuda:0')
Epoch [1]
accuracy (validation): tensor(0.1029, device='cuda:0')
Epoch [2]
accuracy (validation): tensor(0.1095, device='cuda:0')
Epoch [3]
accuracy (validation): tensor(0.0997, device='cuda:0')
Epoch [4]
accuracy (validation): tensor(0.1667, device='cuda:0')
Epoch [5]
accuracy (validation): tensor(0.7549, device='cuda:0')
Epoch [6]
accuracy (validation): tensor(0.1144, device='cuda:0')
Epoch [7]
accuracy (validation): tensor(0.0997, device='cuda:0')
Epoch [8]
accuracy (validation): tensor(0.1029, device='cuda:0')
Epoch [9]
accuracy (validation): tensor(0.1356, device='cuda:0')
Epoch [10]
accuracy (validation): tensor(0.9690, device='cuda:0')
Epoch [11]
accuracy (validation): tensor(0.8235, device='cuda:0')
Epoch [12]
accuracy (validation): tensor(0.6863, device='cuda:0')
Epoch [13]
accuracy (validation): tensor(0.7565, device='cuda:0')
Epoch [14]
accuracy (validation): tensor(0.5359, device='cuda:0')
Epoch [15]
up

In [None]:
def text_process(label): # one-hot encoding
    cnt=0
    vector = [0] * len(label) * 36 # vector of zeros for one-hot
    for ch in label:
        if ch.isdigit():
            index = (int(ch) + 36*cnt) # when is digit
        else:
            index = (ord(ch) - 87 + 36*cnt) # when is alpha
        cnt+=1
        vector[index] = 1 # set corresponding place in vector to 1 
    return vector


In [None]:
def tag_num(label): 
    cnt=0
    vector = [0] * len(label)
    for ch in label:
        if ch.isdigit():
            vector[cnt] = (int(ch) + 36*cnt)
        else:
            vector[cnt] = (ord(ch) - 87 + 36*cnt)
        cnt+=1
    return vector


In [None]:
class Task2Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task2")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img =torchvision.io.read_image(f"{self.root}/{filename}")  #改用torchvision.io 
        trans = transforms.Compose([
            transforms.ToPILImage(), 
            transforms.Resize(250), # resize to 250*250
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
         ])
        img = trans(img)
        label_vec = text_process(label)
        if self.return_filename:
            return torch.FloatTensor(img  / 255), filename
        else:
            return torch.FloatTensor(img  / 255),  torch.LongTensor(label_vec)

    def __len__(self):
        return len(self.data)

In [None]:
train2_data = []
val2_data = []

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.8: 
            train2_data.append(row)
        else:
            val2_data.append(row)
train2_ds = Task2Dataset(train2_data, root=TRAIN_PATH)
train2_dl = DataLoader(train2_ds, batch_size=100, num_workers=2, drop_last=True, shuffle=True)

val2_ds = Task2Dataset(val2_data, root=TRAIN_PATH)
val2_dl = DataLoader(val2_ds, batch_size=100, num_workers=2, drop_last=False, shuffle=False)


In [None]:
model2 = models.resnet18(pretrained = True).to(device)
loss_fn2 = torch.nn.MultiLabelSoftMarginLoss().to(device) # loss function for one-hot encoding
model2.fc = nn.Linear(512, 72).to(device) # modify fc layer in resnet18 -> output = 72 (10 digits + 26 alphabets) * 2
optimizer2 = torch.optim.Adam(model2.parameters(), lr=1e-3)
print(next(model2.parameters()).device)

#print(model2)
best2 = 0.0
for epoch in range(30):
    print(f"Epoch [{epoch}]")
    model2.train()
    for image, label in train2_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model2(image)
        loss = loss_fn2(pred, label)
        
        optimizer2.zero_grad()
        loss.backward()
        optimizer2.step()
        
        del(label)
        del(image)
        #del(label_num)
        gc.collect()
    sample_count = 0
    correct_count = 0
    with torch.no_grad():
        model2.eval()
        for image, label in val2_dl:
            image = image.to(device)
            label = label.to(device)
            pred = model2(image)
            loss = loss_fn2(pred, label)
            batch = len(pred)
            for i in range(batch):
                one = torch.argmax(pred[i][:36]) # get max(index) of each char
                two = torch.argmax(pred[i][36:])
                pred_new = torch.zeros(72).to(device) # one-hot
                pred_new[one]=1
                pred_new[two+36]=1
                #還原one-hot

                if(label[i].equal(pred_new)): # compare pred & label
                    correct_count +=1
            sample_count += len(image)
            gc.collect()
            del(label)
            del(image)
            torch.cuda.empty_cache()
    acc = float(correct_count / sample_count)
    if((acc > best2) and epoch > 14):
        best2 = acc
        torch.save(model2.state_dict(),'/kaggle/working/model_2.pt')
        print("update best:", best2)
    print("accuracy (validation):", correct_count / sample_count, correct_count, sample_count)

cuda:0
Epoch [0]
accuracy (validation): 0.0 0 487
Epoch [1]
accuracy (validation): 0.0 0 487
Epoch [2]
accuracy (validation): 0.3326488706365503 162 487
Epoch [3]
accuracy (validation): 0.9240246406570842 450 487
Epoch [4]
accuracy (validation): 0.9691991786447639 472 487
Epoch [5]
accuracy (validation): 0.9938398357289527 484 487
Epoch [6]
accuracy (validation): 1.0 487 487
Epoch [7]
accuracy (validation): 0.9958932238193019 485 487
Epoch [8]
accuracy (validation): 0.997946611909651 486 487
Epoch [9]
accuracy (validation): 0.9958932238193019 485 487
Epoch [10]
accuracy (validation): 0.997946611909651 486 487
Epoch [11]
accuracy (validation): 1.0 487 487
Epoch [12]
accuracy (validation): 1.0 487 487
Epoch [13]
accuracy (validation): 0.9958932238193019 485 487
Epoch [14]
accuracy (validation): 1.0 487 487
Epoch [15]
update best: 1.0
accuracy (validation): 1.0 487 487
Epoch [16]
accuracy (validation): 0.9958932238193019 485 487
Epoch [17]
accuracy (validation): 0.9958932238193019 485 487

In [None]:
class Task3Dataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img =torchvision.io.read_image(f"{self.root}/{filename}")  #改用torchvision.io 
        trans = transforms.Compose([
            transforms.ToPILImage(), 
            transforms.Resize(275), # resize img = 275*275
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) #normalize 
         ])
        img = trans(img)
        label_vec = text_process(label)
        
        #print(img.shape)
        if self.return_filename:
            return torch.FloatTensor(img  / 255), filename
        else:
            return torch.FloatTensor(img  / 255),  torch.LongTensor(label_vec)

    def __len__(self):
        return len(self.data)

In [None]:
train3_data = []
val3_data = []

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 0.9:
            train3_data.append(row)
        else:
            val3_data.append(row)
#print(train_data)
train3_ds = Task3Dataset(train3_data, root=TRAIN_PATH)
train3_dl = DataLoader(train3_ds, batch_size=75, num_workers=2, drop_last=True, shuffle=True)

val3_ds = Task3Dataset(val3_data, root=TRAIN_PATH)
val3_dl = DataLoader(val3_ds, batch_size=75, num_workers=2, drop_last=False, shuffle=False)

In [None]:
model3 = models.resnet18(pretrained = True).to(device)
loss_fn3 = torch.nn.MultiLabelSoftMarginLoss().to(device) # loss function for one-hot encoding
model3.fc = nn.Linear(512, 144).to(device) # modify fc layer in resnet18 -> output = 144 (10 digits + 26 alphabets) * 4
optimizer3 = torch.optim.Adam(model3.parameters(), lr=1e-3)
print(next(model3.parameters()).device)


best3 = 0.0
for epoch in range(30):
    print(f"Epoch [{epoch}]")
    model3.train()
    for image, label in train3_dl:
        image = image.to(device)
        label = label.to(device)
        #label_num = label_num.to(device)
        
        pred = model3(image)
        loss = loss_fn3(pred, label)
        #print("tra loss: ",loss)
        
        optimizer3.zero_grad()
        loss.backward()
        optimizer3.step()
        
        del(label)
        del(image)
        #del(label_num)
        gc.collect()
    sample_count = 0
    correct_count = 0
    with torch.no_grad():
        model3.eval()
        for image, label in val3_dl:
            image = image.to(device)
            label = label.to(device)

            pred = model3(image)
            loss = loss_fn3(pred, label)
            i=0
            batch = len(pred)
            for i in range(batch):
                one = torch.argmax(pred[i][:36])  # get max(index) of each char
                two = torch.argmax(pred[i][36:72])
                three = torch.argmax(pred[i][72:108])
                four = torch.argmax(pred[i][108:144])
                pred_new = torch.zeros(144).to(device)
                pred_new[one]=1
                pred_new[two+36]=1
                pred_new[three+72]=1
                pred_new[four+108]=1
                #還原one-hot
                if(label[i].equal(pred_new)): 
                    #print(one,two)
                    correct_count +=1

            sample_count += len(image)
            gc.collect()
            del(label)
            del(image)
            torch.cuda.empty_cache()
    acc = float(correct_count / sample_count)
    if((acc > best3) and epoch > 14):
        best3 = acc
        torch.save(model3.state_dict(),'/kaggle/working/model_3.pt')
        print("update best:", best3)
    print("accuracy (validation):", correct_count / sample_count, correct_count, sample_count)

cuda:0
Epoch [0]
accuracy (validation): 0.0 0 293
Epoch [1]
accuracy (validation): 0.0 0 293
Epoch [2]
accuracy (validation): 0.06143344709897611 18 293
Epoch [3]
accuracy (validation): 0.2627986348122867 77 293
Epoch [4]
accuracy (validation): 0.5767918088737202 169 293
Epoch [5]
accuracy (validation): 0.8532423208191127 250 293
Epoch [6]
accuracy (validation): 0.9419795221843004 276 293
Epoch [7]
accuracy (validation): 0.9590443686006825 281 293
Epoch [8]
accuracy (validation): 0.9658703071672355 283 293
Epoch [9]
accuracy (validation): 0.9829351535836177 288 293
Epoch [10]
accuracy (validation): 0.9863481228668942 289 293
Epoch [11]
accuracy (validation): 0.9795221843003413 287 293
Epoch [12]
accuracy (validation): 0.9863481228668942 289 293
Epoch [13]
accuracy (validation): 0.9829351535836177 288 293
Epoch [14]
accuracy (validation): 0.9761092150170648 286 293
Epoch [15]
update best: 0.9795221843003413
accuracy (validation): 0.9795221843003413 287 293
Epoch [16]
update best: 0.9829