In [None]:
import torch
import numpy as np
import pandas as pd
from turbojpeg import TurboJPEG
import tqdm
from os.path import join

from argus import load_model

from src.predictor import Predictor
from src.transforms import get_transforms
from src.metrics import MAPatK
from src import config

turbo_jpeg = TurboJPEG('/usr/lib/x86_64-linux-gnu/libturbojpeg.so.0')

In [None]:
model_path = '/workdir/data/experiments/resnet50_001/model-413-0.761313.pth'
image_size = (208, 656)
batch_size = 32
k = 5
new_prob = 0.71

predictor = Predictor(model_path, **get_transforms(False, image_size))

# Val

In [None]:
train_val_csv_path = config.TRAIN_VAL_CSV_PATH
val_df = pd.read_csv(train_val_csv_path)
val_df = val_df[val_df.val]

In [None]:
image_names = []
pred_lst = []
class_indexes = []

samples = []

for i, row in tqdm.tqdm(val_df.iterrows(), total=len(val_df)):

    image = open(row.image_path, 'rb').read()
    image = turbo_jpeg.decode(image)
    bbox = (row.x0, row.y0, row.x1, row.y1) 
    
    samples.append((image, bbox))
    image_names.append(row.Image)
    class_indexes.append(row.class_index)

    if len(samples) >= batch_size:
        pred = predictor(samples)
        pred_lst.append(pred)
        samples = []
        
pred = predictor(samples)
pred_lst.append(pred)

preds = torch.cat(pred_lst, dim=0)
preds = torch.nn.functional.softmax(preds, dim=1)
preds = preds.numpy()

In [None]:
new_index = preds.shape[1]
class_indexes = [ind if ind!=-1 else new_index for ind in class_indexes]

In [None]:
new_preds = np.concatenate([preds, np.full((preds.shape[0], 1), new_prob)], axis=1)
metric = MAPatK(k=5)
metric.update({
    'prediction': torch.from_numpy(new_preds),
    'target': torch.from_numpy(np.array(class_indexes))
})

metric.compute()

# Test

In [None]:
sample_subm = pd.read_csv(config.SAMPLE_SUBMISSION)
bboxes_df = pd.read_csv(config.BOUNDING_BOXES_CSV)

image_name2bbox = dict()
for i, row in bboxes_df.iterrows():
    image_name2bbox[row.Image] = row.x0, row.y0, row.x1, row.y1

train_val_csv_path = config.TRAIN_VAL_CSV_PATH
train_df = pd.read_csv(train_val_csv_path)
train_df = train_df[~train_df.val]

class_idx2id = dict()
for i, row in tqdm.tqdm(train_df.iterrows(), total=len(train_df)):
    class_idx2id[row.class_index] = row.Id

In [None]:
samples = []
image_names = []
pred_lst = []

for i, row in tqdm.tqdm(sample_subm.iterrows(), total=len(sample_subm)):
    image_path = join(config.TEST_DIR, row.Image)
    image = open(image_path, 'rb').read()
    image = turbo_jpeg.decode(image)
    bbox = image_name2bbox[row.Image]

    samples.append((image, bbox))
    image_names.append(row.Image)
    
    if len(samples) >= batch_size:
        pred = predictor(samples)
        pred_lst.append(pred)
        samples = []
        
pred = predictor(samples)
pred_lst.append(pred)

preds = torch.cat(pred_lst, dim=0)
preds = torch.nn.functional.softmax(preds, dim=1)
preds = preds.numpy()

In [None]:
new_index = preds.shape[1]
class_idx2id[new_index] = 'new_whale'
new_preds = np.concatenate([preds, np.full((preds.shape[0], 1), new_prob)], axis=1)
preds_idx = new_preds.argsort(axis=1)
preds_idx = np.fliplr(preds_idx)[:, :k]
pred_class_ids = [' '.join([class_idx2id[idx] for idx in pred_idx]) for pred_idx in preds_idx] 

In [None]:
subm_df = pd.DataFrame({'Image': image_names, 'Id': pred_class_ids})
subm_df.to_csv('/workdir/data/submissions/second_subm.csv', index=False)
subm_df