In [1]:
import os
import gc
import sys
import json
import glob
import random
from datetime import datetime
import time
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import itertools
from tqdm import tqdm
import imutils

from ax import optimize
from sklearn.model_selection import RepeatedKFold

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchsummary

from torchvision import models, transforms
import torchvision

from sklearn.metrics import precision_score, recall_score, average_precision_score, f1_score

In [2]:
IMAGE_DIR = Path('/home/ubuntu/efs/kaggle/imaterialist/train')
DATA_DIR = Path('/home/ubuntu/efs/kaggle/imaterialist/')
ROOT_DIR = Path('/home/ubuntu/efs/kaggle/imaterialist/maskrcnn/logs')

# For demonstration purpose, the classification ignores attributes (only categories),
# and the image size is set to 512, which is the same as the size of submission masks
NUM_CATS = 46
IMAGE_SIZE = 512

In [3]:
class AttribsDataset(Dataset):
    def __init__(self, df):
        self.n_attributes = 92
        '''
        # create dataset
        segment_df = pd.read_csv(DATA_DIR/"train.csv")
        idx_with_attribs = segment_df['ClassId'].str.contains('_')
        idx_without_attribs = ~idx_with_attribs

        segment_with_attribs = segment_df[idx_with_attribs]
        n_without_attribs = len(segment_with_attribs)//2
        segment_without_attribs = pd.DataFrame.sample(segment_df[idx_without_attribs], n=n_without_attribs) 
        
        self.df = pd.concat([segment_with_attribs, segment_without_attribs])
        '''
        self.df = df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        row = self.df.iloc[idx]
        image = cv2.imread(os.path.join(IMAGE_DIR, row.ImageId))

        # mask out
        h, w = row.Height, row.Width
        mask = np.zeros((h*w), dtype=np.uint8)
        encodedPixels = list(map(int,row.EncodedPixels.split(' ')))
        for startPos, runLen in zip(encodedPixels[0::2], encodedPixels[1::2]):
            mask[startPos+1:startPos+1+runLen] = 1
        mask = np.transpose(np.reshape(mask,(w,h)))        
        #plt.imshow(mask)

        # crop
        bx,by,bw,bh = cv2.boundingRect(mask)
        maskedImage = image* np.repeat(np.expand_dims(mask,-1), 3, axis=2)
        cropped = maskedImage[by:by+bh, bx:bx+bw,:]

        # resize largest dim to 512
        resized_cropped = self.resize_apsect_ratio(cropped, 512)
        #plt.imshow(resized_cropped)
        #plt.show()
        class_id = int(row.ClassId.split('_')[0])
        #print("Class ID", class_id, label_names[class_id])

        attributes = row.ClassId.split('_')[1:]
        #for attrib in attributes:
        #    print(attrib, attributes_list[int(attrib)])

        labels = np.zeros((self.n_attributes), np.float32)
        labels[np.array(list(map(int,attributes)))] = 1
        
        resized_cropped = np.float32(resized_cropped.transpose((2, 0, 1)))/127.5 -1
        
        return resized_cropped, np.float32(class_id), labels
    
    def resize_apsect_ratio(self, img, dim=512):
        img_h, img_w, _ = img.shape
        if img_h>img_w:
            resized = imutils.resize(img, height=dim)
            total_pad = dim-resized.shape[1]
            l_pad = total_pad//2
            r_pad = total_pad -l_pad
            resized = cv2.copyMakeBorder(resized, 0, 0, l_pad, r_pad, cv2.BORDER_CONSTANT, None, 0.0)
        else:
            resized = imutils.resize(img, width=dim)
            total_pad = dim-resized.shape[0]
            top_pad = total_pad//2
            bottom_pad = total_pad -top_pad
            resized = cv2.copyMakeBorder(resized,top_pad, bottom_pad,  0, 0, cv2.BORDER_CONSTANT, None, 0.0)
        assert resized.shape[:2]==(dim, dim)
        return resized
    


In [4]:
segment_df = pd.read_csv(DATA_DIR/"train.csv")
idx_with_attribs = segment_df['ClassId'].str.contains('_')
idx_without_attribs = ~idx_with_attribs

df = segment_df[idx_with_attribs]

dataset = AttribsDataset(df)


# Model

In [5]:
class Resnet(nn.Module):
    def __init__(self):
        super(Resnet, self).__init__()
        base_model = models.resnet101(pretrained=True)
        
        self.base = nn.Sequential(*list(base_model.children())[:-1])
        self.fc = nn.Sequential(
                        nn.Linear(2048,1024),
                        nn.ReLU(),
                        nn.Linear(1024,512),
                        nn.ReLU(),
                        nn.Linear(512,92),
                        nn.Sigmoid())

    def forward(self, images, class_id):
        features = self.base(images)
        #class_id_repeat = class_id.repeat(12,1,1,1)
        #class_id_repeat = class_id.view(-1,1, 1,1).repeat(1,12,1,1)
        ##print(features.shape, class_id.shape, class_id_repeat.shape)
        #x = torch.cat((features, class_id_repeat), dim=1)
        #x = x.view((-1,2060))

        y = self.fc(features.view((-1,2048)))
        return y
    
model = Resnet().to("cuda")
model = nn.DataParallel(model)

#model_path = "/home/ubuntu/efs/kaggle/imaterialist/checkpoints/attribs/20190604-1603/model_step_11653.pth"
model_path = "/home/ubuntu/efs/kaggle/imaterialist/checkpoints/attribs/20190605-1548/model_step_12864.pth"
model.load_state_dict(torch.load(model_path))
model.eval()


DataParallel(
  (module): Resnet(
    (base): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace)
          (downsampl


# Evaluate

In [6]:
groundtruth = []
predictions = []

In [7]:
batch_size = 8
dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=8)

In [8]:
for images, class_ids, labels in tqdm(dataloader):
    pred_labels = model(images, class_ids)
    pred_labels = pred_labels.cpu().detach().numpy().astype(np.float16)
    gt_labels = labels.numpy().astype(np.uint8)

    groundtruth.extend(gt_labels)
    predictions.extend(pred_labels)

100%|██████████| 1443/1443 [11:52<00:00,  3.00it/s]


In [9]:
def calculate_scores(d):
    threshold = d['threshold']
    f1_scores = []
    for gt, pred in zip(groundtruth, predictions):
        idx = pred>=threshold
        temp_pred=np.zeros_like(pred, dtype=np.uint8)
        temp_pred[pred>=threshold]=1
        if np.sum(temp_pred)>0:
            score = f1_score(gt, temp_pred)
            f1_scores.append(score)
    return np.array(f1_scores).mean()

best_parameters, best_values, experiment, model = optimize(
    parameters=[
      {
        "name": "threshold",
        "type": "range",
        "bounds": [0.05, 0.95],
      },
    ],
    # Booth function
    evaluation_function=calculate_scores,
    minimize=False,
)

[INFO 06-05 18:25:05] ax.service.utils.dispatch: Using Bayesian Optimization generation strategy. Iterations after 5 will take longer to generate due to model-fitting.
[INFO 06-05 18:25:05] ax.service.managed_loop: Started full optimization with 20 steps.
[INFO 06-05 18:25:05] ax.service.managed_loop: Running optimization trial 1...
[INFO 06-05 18:25:13] ax.service.managed_loop: Running optimization trial 2...
[INFO 06-05 18:25:21] ax.service.managed_loop: Running optimization trial 3...
[INFO 06-05 18:25:29] ax.service.managed_loop: Running optimization trial 4...
[INFO 06-05 18:25:36] ax.service.managed_loop: Running optimization trial 5...
[INFO 06-05 18:25:44] ax.service.managed_loop: Running optimization trial 6...
[INFO 06-05 18:25:53] ax.service.managed_loop: Running optimization trial 7...
[INFO 06-05 18:26:02] ax.service.managed_loop: Running optimization trial 8...
[INFO 06-05 18:26:11] ax.service.managed_loop: Running optimization trial 9...
[INFO 06-05 18:26:20] ax.service.

In [10]:
print(best_parameters, best_values, experiment, model)

{'threshold': 0.42658067635687696} ({'objective': 0.7446657226876094}, {'objective': {'objective': 1.9594141282134466e-11}}) SimpleExperiment(None) <ax.modelbridge.torch.TorchModelBridge object at 0x7fb4fb7adf98>


# Accuracy

In [6]:
dataloader = DataLoader(dataset, batch_size=8, num_workers=8)
threshold = 0.42658067635687696
hit = 0
miss = 0
for images, class_ids, labels in tqdm(dataloader):
    pred_labels = model(images.to("cuda"), class_ids.to("cuda"))
    pred_labels = pred_labels.cpu().detach().numpy().astype(np.float16)
    gt_labels = labels.numpy().astype(np.uint8)

    for gt_label, pred_label in zip(gt_labels, pred_labels):
        gt = np.where(gt_label)[0]
        pred = np.where(pred_label>=threshold)[0]
        #print("ground truth", gt)
        #print("prediction  ", pred,'\n')
        match = np.prod(gt==pred)
        #print(match)
        if match:
            #print("Hoorrayyy")
            hit+=1
        else:
            miss+=1
accuracy = hit/(hit+miss)
print("accuracy = %.3f hit %d miss %d"%(accuracy, hit, miss))

100%|██████████| 1443/1443 [13:28<00:00,  2.90it/s]

accuracy = 0.300 hit 3465 miss 8075





In [16]:
threshold = best_parameters['threshold']
print("threshold",threshold)
threshold = 0.42658067635687696
for i in range(20):
    images, class_ids, labels = next(data_iter)
    pred_labels = model(images, class_ids)
    pred_labels = pred_labels.cpu().detach().numpy().astype(np.float16)
    gt_labels = labels.numpy().astype(np.uint8)

    gt = np.where(gt_labels)[1]
    pred = np.where(pred_labels>=threshold)[1]
    print("ground truth", gt)
    print("prediction  ", pred,'\n')
    match = np.prod(gt==pred)
    #print(match)
    if match:
        print("Hoorrayyy")

threshold 0.42658067635687696
ground truth [ 3 14 20 32 60 69 86]
prediction   [ 3 14 20 32 60 69 86] 

Hoorrayyy
ground truth [ 0 14 20 40 60 61 88]
prediction   [ 8 20 41 61 88] 

ground truth [ 8 14 20 36 42 52 53 61 87]
prediction   [20 22 35 61] 

ground truth [ 0 20 40 60 61 88]
prediction   [ 0 20 40 60 61 88] 

Hoorrayyy




ground truth [ 1 20 41 60 69 88]
prediction   [ 1 20 41 60 69 88] 

Hoorrayyy
ground truth [ 3 14 20 33 60 69 88]
prediction   [ 3 14 20 33 60 69 88] 

Hoorrayyy
ground truth [ 0 10 20 41 60 72 88]
prediction   [18 20 53 61 72 91] 

ground truth [ 8 15 20 30 42 60 72]
prediction   [20 40 50 53 61 72 85] 

ground truth [ 6 14 20 24 53 69 87]
prediction   [14 20 24 53 69 87] 

ground truth [ 9 20 24 61]
prediction   [ 9 20 22 24 61] 

ground truth [ 3 14 20 34 61 85]
prediction   [ 3 14 20 34 61 85] 

Hoorrayyy
ground truth [ 8 20 35 60 69]
prediction   [ 7 20 35 60 69 85 91] 

ground truth [ 0 20 40 60 72 91]
prediction   [ 0 20 40 60 72 91] 

Hoorrayyy
ground truth [14 20 34 60 69 72 88]
prediction   [14 20 34 60 72 88] 

ground truth [ 3 14 20 24 53 69 87]
prediction   [ 3 14 20 24 53 69 87] 

Hoorrayyy
ground truth [ 0 20 41 61]
prediction   [ 0 20 41 61] 

Hoorrayyy
ground truth [ 0 20 40 60 61 88]
prediction   [10 20 41 60 61 88] 

ground truth [ 8 20 33 41 60 62 87]
prediction   [

prediction   [41 42 61 91]
