In [1]:
! mkdir ~/.kaggle

In [2]:
!mv ./kaggle.json  ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle competitions download -c fathomnet-out-of-sample-detection

Downloading fathomnet-out-of-sample-detection.zip to /content
 77% 1.00M/1.30M [00:00<00:00, 1.99MB/s]
100% 1.30M/1.30M [00:00<00:00, 2.44MB/s]


In [8]:
!unzip fathomnet-out-of-sample-detection.zip

Archive:  fathomnet-out-of-sample-detection.zip
  inflating: category_key.csv        
  inflating: demo_download.ipynb     
  inflating: download_images.py      
  inflating: multilabel_classification/train.csv  
  inflating: object_detection/eval.json  
  inflating: object_detection/train.json  
  inflating: requirements.txt        
  inflating: sample_submission.csv   


# **Install lib**

In [None]:
!pip install positional-encodings timm

# **Dataset download**

In [1]:
import os
import json
import requests
from shutil import copyfileobj
from multiprocessing.pool import ThreadPool

In [2]:
def read_json(filename):
    return json.load(open(filename, "r"))


def delete_empty_files(folder_path):
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path) and os.path.getsize(file_path) == 0:
            os.remove(file_path)


def download_image(image):
    if not os.path.exists(image[0]):
        resp = requests.get(image[1], stream=True)
        resp.raw.decode_content = True
        with open(image[0], 'wb') as f:
            copyfileobj(resp.raw, f)

In [12]:
def valid_download():
    delete_empty_files("./imgs/valid/")
    valid_json = read_json("./object_detection/eval.json")
    valid_imgs = [
        ["./imgs/valid/"+i["file_name"], i["coco_url"]] for i in valid_json["images"] if not os.path.exists("./imgs/valid/"+i["file_name"])
    ]
    pool = ThreadPool(10000)
    results = pool.map(download_image, valid_imgs)
    pool.close()
    pool.join()

In [13]:
def train_download():
    delete_empty_files("./imgs/train/")
    train_json = read_json("./object_detection/train.json")
    train_imgs = [
        ["./imgs/train/"+i["file_name"], i["coco_url"]] for i in train_json["images"] if not os.path.exists("./imgs/train/"+i["file_name"])
    ]
    pool = ThreadPool(10000)
    results = pool.map(download_image, train_imgs)
    pool.close()
    pool.join()

In [14]:
if not os.path.exists("./imgs"):
    os.mkdir("./imgs")
    os.mkdir("./imgs/train/")
    os.mkdir("./imgs/valid/")
else:
    if not os.path.exists("./imgs/train/"):
        os.mkdir("./imgs/train/")

    if not os.path.exists("./imgs/valid/"):
        os.mkdir("./imgs/valid/")

In [17]:
%%time
train_download()

CPU times: user 2.12 s, sys: 891 ms, total: 3.02 s
Wall time: 3.83 s


In [115]:
%%time 
valid_download()

CPU times: user 5min 42s, sys: 5min 14s, total: 10min 56s
Wall time: 7min 49s


In [3]:
data = read_json("./object_detection/train.json")

In [19]:
data.keys()

dict_keys(['info', 'images', 'licenses', 'annotations', 'categories'])

In [4]:
categories = data['categories']
categories

[{'id': 1, 'name': 'Actiniaria', 'supercategory': 'Anemone'},
 {'id': 2, 'name': 'Actinernus', 'supercategory': 'Anemone'},
 {'id': 3, 'name': 'Actiniidae', 'supercategory': 'Anemone'},
 {'id': 4, 'name': 'Actinoscyphia', 'supercategory': 'Anemone'},
 {'id': 5, 'name': 'Bolocera', 'supercategory': 'Anemone'},
 {'id': 6, 'name': 'Dofleinia', 'supercategory': 'Anemone'},
 {'id': 7, 'name': 'Hormathiidae', 'supercategory': 'Anemone'},
 {'id': 8, 'name': 'Isosicyonis', 'supercategory': 'Anemone'},
 {'id': 9, 'name': 'Liponema brevicorne', 'supercategory': 'Anemone'},
 {'id': 10, 'name': 'Metridium farcimen', 'supercategory': 'Anemone'},
 {'id': 11, 'name': 'Actinopterygii', 'supercategory': 'Fish'},
 {'id': 12, 'name': 'Agonidae', 'supercategory': 'Fish'},
 {'id': 13, 'name': 'Albatrossia pectoralis', 'supercategory': 'Fish'},
 {'id': 14, 'name': 'Alepocephalus tenebrosus', 'supercategory': 'Fish'},
 {'id': 15, 'name': 'Anarrhichthys ocellatus', 'supercategory': 'Fish'},
 {'id': 16, 'name'

# **Create Custom Dataset**

In [5]:
import os
import json 
import torch
from PIL import Image 
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

In [6]:
class FathomDataset(Dataset):


    def __init__(self, img_path, json_path, num_classes, img_size):
        super().__init__()
        self.img_path = img_path
        self.num_classes = num_classes
        self._load_json_data(json_path)
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            )
        ]) 


    def _load_json_data(self, json_path):
        self.json_data = json.load(open(json_path, "rb"))

    
    def __len__(self):
        return len(self.json_data["images"])


    def get_one_hot_label(self, image_id):
        annotations = self.json_data["annotations"]
        img_annotation = list(filter(lambda annotation: annotation["image_id"] == image_id ,annotations))
        img_annotation = list(map(lambda annotation: int(annotation["category_id"]), img_annotation))
        one_hot_vector = torch.zeros(self.num_classes)
        one_hot_vector[img_annotation] = 1.0
        return one_hot_vector


    def __getitem__(self, idx):
        image_info = self.json_data["images"][idx]
        filename = image_info["file_name"]
        img_id = image_info["id"]

        label = self.get_one_hot_label(img_id)        
        img = Image.open(os.path.join(self.img_path, filename)).convert("RGB")
        img = self.transform(img)

        return img, label 

In [7]:
class FathomValidDataset(Dataset):

    def __init__(self, img_path, json_path, img_size):
        super().__init__()
        self.img_path = img_path
        self._load_json_data(json_path)
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            )
        ]) 

    def _load_json_data(self, json_path):
        self.json_data = json.load(open(json_path, "rb"))

    
    def __len__(self):
        return len(self.json_data["images"])


    def __getitem__(self, idx):
        image_info = self.json_data["images"][idx]
        filename = image_info["file_name"]    
        img = Image.open(os.path.join(self.img_path, filename)).convert("RGB")
        img = self.transform(img)

        return img, filename.split(".")[0]

In [8]:
IMG_PATH = "./imgs/train/"
JSON_PATH = "./object_detection/train.json"
NUM_CLASSES = len(categories) + 1 # add 1 class for "out of sample"
IMG_SIZE = 256
BATCH_SIZE = 64

In [9]:
valid_dataset = FathomValidDataset("./imgs/valid/", "./object_detection/eval.json", IMG_SIZE)
len(valid_dataset)

10744

In [10]:
dataset = FathomDataset(IMG_PATH, JSON_PATH, NUM_CLASSES, IMG_SIZE)
len(dataset)

5950

In [11]:
dataloader = DataLoader(
    dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
    num_workers = 2    
)
len(dataloader)

93

In [14]:
for idx, (X, y) in enumerate(dataloader):
    print(X.size(), y.size())
    break

torch.Size([16, 3, 512, 512]) torch.Size([16, 291])


# **Create Model**

In [12]:
import torch
import timm
import torch.nn as nn
from positional_encodings.torch_encodings import PositionalEncodingPermute2D, Summer

In [13]:
class Query2Label(nn.Module):


    def __init__(self, 
                 backbone, conv_out, num_classes, 
                 hidden_dim=256, n_heads=4, 
                 encoder_layers=1, decoder_layers=2,
                 use_pos_encoding=True):
        super().__init__()
        self.num_classes = num_classes
        self.hidden_dim = hidden_dim
        self.use_pos_encoding = use_pos_encoding

        self.backbone = timm.create_model(
            backbone, 
            pretrained=True,
            num_classes=0,
            global_pool=''
        ) 

        self.conv = nn.Conv2d(conv_out, hidden_dim, 1)
        self.transformer = nn.Transformer(
            hidden_dim, n_heads, encoder_layers, decoder_layers)

        if self.use_pos_encoding:
            self.pos_encoder = PositionalEncodingPermute2D(hidden_dim)
            self.encoding_adder = Summer(self.pos_encoder)

        self.classifier = nn.Linear(num_classes * hidden_dim, num_classes)
        self.label_emb = nn.Parameter(torch.rand(1, num_classes, hidden_dim))


    def forward(self, x):
        out = self.backbone(x)
        h = self.conv(out)
        B, C, H, W = h.shape

        if self.use_pos_encoding:
            h = self.encoding_adder(h*0.1)

        h = h.flatten(2).permute(2, 0, 1)
        label_emb = self.label_emb.repeat(B, 1, 1)
        label_emb = label_emb.transpose(0, 1)
        
        h = self.transformer(h, label_emb).transpose(0, 1)
        h = torch.reshape(h,(B, self.num_classes * self.hidden_dim))

        return self.classifier(h)

In [34]:
model = Query2Label("resnet18", 512, NUM_CLASSES)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [16]:
model.eval()
with torch.no_grad():
    for idx, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)
        out = model(X)
        print(out)
        print(out.size(), y.size())
        break

tensor([[-0.8148,  0.6778, -0.4798,  ..., -0.2236,  1.1426, -0.2874],
        [-0.8167,  0.7080, -0.4959,  ..., -0.1907,  1.1439, -0.3379],
        [-0.8200,  0.6894, -0.4542,  ..., -0.1829,  1.1446, -0.3175],
        ...,
        [-0.8633,  0.6708, -0.4681,  ..., -0.1885,  1.1067, -0.2975],
        [-0.8411,  0.7228, -0.4287,  ..., -0.2020,  1.0985, -0.3126],
        [-0.8228,  0.6820, -0.4717,  ..., -0.2069,  1.1262, -0.3112]],
       device='cuda:0')
torch.Size([64, 291]) torch.Size([64, 291])


# **Create trainer**

In [17]:
import time
from datetime import timedelta

In [30]:
class Trainer:

    def __init__(self, model, optimizer, criterion, scheduler=None):
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = model
        self.model = self.model.to(self.device)
        self.optimizer = optimizer
        self.criterion = criterion
        self.scheduler = scheduler
        self.train_loss = []
        self.valid_loss = []
        self.train_acc = []
        self.valid_acc = []


    def load_checkpoint(self, checkpoint):
        params = torch.load(checkpoint) 
        self.model.load_state_dict(params["model"])
        self.optimizer.load_state_dict(params["optimizer"])
        if self.scheduler is not None:
            self.scheduler.load_state_dict(params["scheduler"])
        self.train_loss = params["train_loss"]
        self.valid_loss = params["valid_loss"]
        self.train_acc = params["train_acc"] 
        self.valid_acc = params["valid_acc"]
        print("Load Model Successfully")


    def save_checpoint(self, checkpoint):
        params = {
            "model": self.model.state_dict(),
            "optimizer": self.optimizer.state_dict(),
            "scheduler": None if self.scheduler is None else self.scheduler.state_dict(),
            "train_loss": self.train_loss,
            "valid_loss": self.valid_loss,
            "train_acc": self.train_acc,
            "valid_acc": self.valid_acc 
        }
        torch.save(params, checkpoint)


    def forward_step(self, dataloader, is_train=True):
        if is_train:
            self.model.train()
        else:
            self.model.eval()

        loss_his = []
        acc_his = []

        for idx, (X, y) in enumerate(dataloader):    
            st = time.time()
            X = X.to(self.device)
            y = y.to(self.device)
            if is_train:
                self.optimizer.zero_grad()
            output = self.model(X)
            loss = self.criterion(output, y)
            if is_train:
                loss.backward()
                self.optimizer.step()
                if self.scheduler is not None:
                    self.scheduler.step()
            
            loss_his.append(loss.item())
            calc_time = round(time.time() - st, 1)
            print("\r", end="")
            end_str = ""
            if idx == len(dataloader) - 1:
                end_str = "\n"
            print(f"Batch {idx+1}/{len(dataloader)} - Loss: {loss_his[-1]} - Time: {timedelta(seconds=int(calc_time))}/step", end=end_str)
        print()
        loss_his = sum(loss_his) / len(loss_his)
        if is_train:
            self.train_loss.append(loss_his)
            self.train_acc.append(0)
        else:
            self.valid_loss.append(loss_his)
            self.valid_acc.append(0)

        

    def fit(self, epochs, train_loader, valid_loader=None):
        for epoch in range(epochs):
            st = time.time()
            print(f"Epoch {epoch+1} / {epochs}")

            self.forward_step(train_loader)

            if valid_loader is not None:
                self.forward_step(valid_loader, is_train=False)

            train_loss = self.train_loss[-1]
            # train_acc = self.train_acc[-1], self.valid_acc[-1]
            calc_time = round(time.time() - st, 1)
            print(f"\t=> Train loss: {round(train_loss, 5)} - Time: {timedelta(seconds=int(calc_time))}/epoch")
            # print(f"\t=> Train loss: {round(train_loss, 5)} - Valid loss: {round(valid_loss, 5)} - Train acc: {round(train_acc, 3)} - Valid acc: {round(valid_acc, 3)} - Time: {timedelta(seconds=int(calc_time))}/step\n")
            self.save_checpoint("./checkpoint/model_v1.pt")

In [74]:
!mkdir ./checkpoint

In [19]:
LEARNING_RATE = 5e-5
WEIGHT_DECAY = 0.005
EPOCHS = 10

In [31]:
criterion = nn.BCELoss()
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=LEARNING_RATE,
    betas=(0.9, 0.999),
    weight_decay=WEIGHT_DECAY,
)
# scheduler = torch.optim.lr_scheduler.OneCycleLR(
#     optimizer,
#     LEARNING_RATE,
#     epochs=EPOCHS,
#     steps_per_epoch=len(dataloader) + 1,
#     anneal_strategy="cos",
# )

In [32]:
trainer = Trainer(model, optimizer, criterion)

In [None]:
trainer.fit(EPOCHS, dataloader)

In [86]:
results = []
trainer.model.eval()
with torch.no_grad():
    for idx in range(len(valid_dataset)):
        img, idx = valid_dataset[idx]
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        img = img.unsqueeze(0).to(device)
        output = trainer.model(img)
        output = nn.Sigmoid()(output)
        print(output)
        break 

tensor([[1.7287e-06, 5.3291e-08, 6.6239e-07, 1.8638e-06, 2.5507e-06, 1.8966e-06,
         1.6543e-06, 2.3695e-05, 6.7559e-10, 1.9826e-07, 7.2671e-07, 2.8783e-06,
         3.1366e-07, 7.1223e-05, 6.5049e-05, 1.0801e-04, 3.0235e-03, 9.6589e-07,
         1.4305e-06, 1.7218e-06, 6.0739e-05, 3.5432e-06, 1.7922e-04, 1.9563e-06,
         1.0555e-06, 3.8007e-06, 8.2067e-06, 1.2921e-03, 6.4806e-05, 2.1417e-07,
         1.7412e-04, 9.0976e-07, 9.1950e-07, 2.7992e-06, 2.5357e-06, 2.2001e-07,
         6.4538e-06, 9.8788e-05, 1.1027e-06, 9.9283e-06, 1.4752e-06, 1.3767e-06,
         2.2536e-06, 2.8475e-06, 4.1528e-05, 1.9369e-06, 4.9198e-05, 1.3035e-06,
         1.1294e-06, 1.6748e-06, 1.8185e-06, 1.2398e-06, 6.0605e-04, 9.1166e-07,
         3.7756e-06, 2.2454e-04, 7.6120e-05, 7.2596e-07, 1.6242e-06, 2.1294e-06,
         1.5966e-06, 9.9052e-04, 1.6396e-06, 8.3086e-06, 1.2578e-05, 5.4550e-06,
         1.8667e-04, 4.1728e-08, 6.2181e-06, 7.5864e-03, 1.1591e-05, 1.9221e-06,
         1.2602e-06, 5.2557e