In [None]:
import csv
import cv2
import numpy as np
import random
import os

import torchvision
import torch
import torch.nn as nn
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')
TRAIN_PATH = "drive/MyDrive/captcha-hacker/train"
TEST_PATH = "drive/MyDrive/captcha-hacker/test"
device = "cuda"
# device = "cpu"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

class Task_Dataset(Dataset):
    def __init__(self, data, root, task, return_filename=False):
        if task == 1:
          self.data = [sample for sample in data if sample[0].startswith("task1")]
        if task == 2:
          self.data = [sample for sample in data if sample[0].startswith("task2")]
        if task == 3:
          self.data = [sample for sample in data if sample[0].startswith("task3")]
        self.return_filename = return_filename
        self.root = root
        self.task = task
    
    def __getitem__(self, index):
        
        filename, label = self.data[index]
        img_origin = read_image(f"{self.root}/{filename}")
        img_trans = transform(img_origin)
        if self.return_filename:
            return torch.FloatTensor(img_trans / 255), filename
        else:
          if self.task == 1:
            return torch.FloatTensor(img_trans / 255), int(label)
          elif self.task == 2:
            two_hot_table = [0 for i in range(72)]
            for i in range(2):
              if label[i].isdigit():
                two_hot_table[int(ord(label[i])) - 48 + i * 36] =1
              else:
                two_hot_table[int(ord(label[i])) - 87 + i * 36] =1
            return torch.FloatTensor(img_trans / 255), torch.tensor(two_hot_table)

          elif self.task == 3:
            four_hot_table = [0 for i in range(144)]
            for i in range(4):
              if label[i].isdigit():
                four_hot_table[int(ord(label[i])) - 48 + i * 36] =1
              else:
                four_hot_table[int(ord(label[i])) - 87 + i * 36] =1
            return torch.FloatTensor(img_trans / 255), torch.tensor(four_hot_table)


    def __len__(self):
        return len(self.data)
    
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(), 
    torchvision.transforms.Resize(300), 
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
])

In [25]:

train_data = []
val_data = []

with open("drive/MyDrive/captcha-hacker/train/annotations.csv", newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 2:
            train_data.append(row)
        else:
            val_data.append(row)

train_ds = Task_Dataset(train_data, root=TRAIN_PATH, task = 1)
train_dl = DataLoader(train_ds, batch_size=60, num_workers=4, drop_last=True, shuffle=True)

val_ds = Task_Dataset(val_data, root=TRAIN_PATH, task = 1)
val_dl = DataLoader(val_ds, batch_size=60, num_workers=4, drop_last=False, shuffle=False)
model = torchvision.models.resnet18(pretrained = True).to(device)
model.fc = nn.Linear(in_features = 512, out_features=10, bias=True).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()


for epoch in range(15):
    print(f"Epoch [{epoch}]")
    model.train()
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    sample_count = 0
    correct_count = 0
    model.eval()

torch.save(model.state_dict(), "drive/MyDrive/model1")


Epoch [0]
Epoch [1]
Epoch [2]
Epoch [3]
Epoch [4]
Epoch [5]
Epoch [6]
Epoch [7]
Epoch [8]
Epoch [9]
Epoch [10]
Epoch [11]
Epoch [12]
Epoch [13]
Epoch [14]


In [26]:
##T2

train_data = []
val_data = []

with open("drive/MyDrive/captcha-hacker/train/annotations.csv", newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 2:
            train_data.append(row)
        else:
            val_data.append(row)

train_ds = Task_Dataset(train_data, root=TRAIN_PATH, task = 2)
train_dl = DataLoader(train_ds, batch_size=60, num_workers=4, drop_last=True, shuffle=True)

val_ds = Task_Dataset(val_data, root=TRAIN_PATH, task = 2)
val_dl = DataLoader(val_ds, batch_size=60, num_workers=4, drop_last=False, shuffle=False)

model = torchvision.models.resnet18(pretrained = True).to(device)
model.fc = nn.Linear(in_features = 512, out_features=72, bias=True).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss().to(device)

max = 0
for epoch in range(30):
    print(f"Epoch [{epoch}]")
    model.train()
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
torch.save(model.state_dict(), "drive/MyDrive/model2")



Epoch [0]
Epoch [1]
Epoch [2]
Epoch [3]
Epoch [4]
Epoch [5]
Epoch [6]
Epoch [7]
Epoch [8]
Epoch [9]
Epoch [10]
Epoch [11]
Epoch [12]
Epoch [13]
Epoch [14]
Epoch [15]
Epoch [16]
Epoch [17]
Epoch [18]
Epoch [19]
Epoch [20]
Epoch [21]
Epoch [22]
Epoch [23]
Epoch [24]
Epoch [25]
Epoch [26]
Epoch [27]
Epoch [28]
Epoch [29]


In [27]:
##T3

train_data = []
val_data = []

with open("drive/MyDrive/captcha-hacker/train/annotations.csv", newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if random.random() < 2:
            train_data.append(row)
        else:
            val_data.append(row)

train_ds = Task_Dataset(train_data, root=TRAIN_PATH, task = 3)
train_dl = DataLoader(train_ds, batch_size=60, num_workers=4, drop_last=True, shuffle=True)

val_ds = Task_Dataset(val_data, root=TRAIN_PATH, task = 3)
val_dl = DataLoader(val_ds, batch_size=60, num_workers=4, drop_last=False, shuffle=False)

model = torchvision.models.resnet18(pretrained = True).to(device)
model.fc = nn.Linear(in_features = 512, out_features=144, bias=True).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MultiLabelSoftMarginLoss().to(device)

for epoch in range(50):
    print(f"Epoch [{epoch}]")
    model.train()
    for image, label in train_dl:
        image = image.to(device)
        label = label.to(device)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        

torch.save(model.state_dict(), "/content/drive/MyDrive/model3")

Epoch [0]
Epoch [1]
Epoch [2]
Epoch [3]
Epoch [4]
Epoch [5]
Epoch [6]
Epoch [7]
Epoch [8]
Epoch [9]
Epoch [10]
Epoch [11]
Epoch [12]
Epoch [13]
Epoch [14]
Epoch [15]
Epoch [16]
Epoch [17]
Epoch [18]
Epoch [19]
Epoch [20]
Epoch [21]
Epoch [22]
Epoch [23]
Epoch [24]
Epoch [25]
Epoch [26]
Epoch [27]
Epoch [28]
Epoch [29]
Epoch [30]
Epoch [31]
Epoch [32]
Epoch [33]
Epoch [34]
Epoch [35]
Epoch [36]
Epoch [37]
Epoch [38]
Epoch [39]
Epoch [40]
Epoch [41]
Epoch [42]
Epoch [43]
Epoch [44]
Epoch [45]
Epoch [46]
Epoch [47]
Epoch [48]
Epoch [49]
