In [1]:
import os
import json
import time
import copy
from copy import deepcopy
from collections import defaultdict

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models

from skimage import io

import matplotlib.pyplot as plt
from matplotlib import patches, patheffects

import imgaug as ia
from imgaug import augmenters as iaa

from sklearn.model_selection import train_test_split

from tqdm import tqdm

In [2]:
import torch.nn.functional as F

In [3]:
class Flatten(nn.Module):
    def __init__(self): 
        super().__init__()
    def forward(self, x): 
        return x.view(x.size(0), -1)

class Identity(nn.Module):
    """identity layer"""
    def __init__(self):
        super().__init__()
        
    def forward(self, x):
        return x
    
class RNetCustom(nn.Module):
    def __init__(self, num_cat):
        super().__init__()
        self.num_bbox_param = 4 
        self.num_cat = num_cat
        
        self.backbone = self._prep_backbone()
        self.flatten = Flatten()
        self.relu_1 = nn.ReLU()
        self.dropout_1 = nn.Dropout(p=0.5)
        self.fc_2 = nn.Linear(512 * 7 * 7, 256)
        self.relu_2 = nn.ReLU()
        self.batchnorm_2 = nn.BatchNorm1d(256)
        self.dropout_2 = nn.Dropout(p=0.5)
        self.fc_3 = nn.Linear(256, self.num_bbox_param + self.num_cat )
        
    def _prep_backbone(self):     
        base_model = models.resnet34(pretrained=True)
        removed = list(base_model.children())[:-2]
        backbone = nn.Sequential(*removed)
        for param in backbone.parameters():
            param.require_grad = False
        return backbone
    
    def forward(self, x):
        x = self.backbone(x)
        x = self.flatten(x)
        x = self.relu_1(x)
        x = self.dropout_1(x)
        x = self.fc_2(x)
        x = self.relu_2(x)
        x = self.batchnorm_2(x)
        x = self.dropout_2(x)
        x = self.fc_3(x)

        y_pred = x
        return y_pred

In [4]:
model_ft = RNetCustom(num_cat=20)

In [5]:
IMG_SIZE = 224
CHANNEL = 3
BATCH_SIZE = 5
BBOX_PPTS = 4

X = torch.randn((BATCH_SIZE, CHANNEL, IMG_SIZE, IMG_SIZE), requires_grad=True)

In [6]:
X.shape

torch.Size([5, 3, 224, 224])

In [7]:
y_pred = model_ft(X)

In [8]:
y_pred

tensor([[-0.0365,  0.6733, -0.2702, -0.0959,  0.4068,  0.1084,  0.5923,
         -0.6223, -0.2043,  0.2570,  0.5473, -0.4048,  0.2241,  0.1597,
          0.2962, -0.5246,  0.0277,  0.0504, -0.0784, -0.0658, -0.7119,
          0.2685,  0.6639,  0.2893],
        [ 0.2176, -0.5140, -0.4534, -1.1669, -0.2613,  0.2348, -0.9991,
         -0.1556, -0.5176, -0.3121, -0.9183,  0.8390, -0.4946,  1.2685,
         -0.9822,  1.1606, -0.1499, -0.5758,  0.7510,  0.0293,  0.7246,
          0.1157,  0.2777, -1.0950],
        [ 0.5698, -0.1406,  0.3188, -0.2991,  0.4894,  0.2005, -0.2464,
          0.1024,  0.9853, -0.6283, -0.3543,  0.3930, -0.5451, -0.3930,
          0.4608, -0.3100,  0.3366,  0.3390, -0.2148, -0.1253, -0.4046,
          0.8575, -0.2015,  0.9056],
        [ 0.0851, -0.3475, -0.3155, -0.2202,  0.8900, -0.4248,  0.6112,
         -0.5368,  0.3978,  0.0730,  0.1026,  0.3698,  0.1792, -0.1702,
          0.3014,  0.6212,  0.3096,  0.1816, -0.7029,  0.1568,  0.7637,
          0.0114, -0.9216

In [9]:
y_pred_bb, y_pred_cat = y_pred[:, :4], y_pred[:, 4:]

In [10]:
y_pred_bb.shape

torch.Size([5, 4])

In [11]:
y_pred_cat.shape

torch.Size([5, 20])

In [20]:
y_true_cat = torch.empty((BATCH_SIZE, 2), dtype=torch.long).random_(5)
print(y_true_cat)

tensor([[ 1,  2],
        [ 3,  1],
        [ 1,  0],
        [ 2,  3],
        [ 3,  2]])


In [14]:
y_true_bbox = torch.randn((BATCH_SIZE, BBOX_PPTS))

In [15]:
y_true_bbox

tensor([[ 0.0473, -0.1424, -0.4245, -0.6142],
        [-0.4192,  0.0221,  2.1934, -2.0416],
        [-0.1122, -1.5886, -1.3284,  0.0842],
        [-1.4363,  0.5997,  1.6715,  0.7176],
        [ 0.8619, -1.8748, -0.6465, -0.4697]])

In [16]:
y_true = (y_true_bbox, y_true_cat)

In [17]:
y_true

(tensor([[ 0.0473, -0.1424, -0.4245, -0.6142],
         [-0.4192,  0.0221,  2.1934, -2.0416],
         [-0.1122, -1.5886, -1.3284,  0.0842],
         [-1.4363,  0.5997,  1.6715,  0.7176],
         [ 0.8619, -1.8748, -0.6465, -0.4697]]), tensor([ 2,  0,  1,  2,  2]))

In [18]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [35]:
input = torch.randn(3, requires_grad=True)

In [36]:
target = torch.empty(3).random_(2)

In [37]:
input

tensor([ 1.2914, -1.8268,  0.0940])

In [38]:
target

tensor([ 1.,  0.,  0.])

In [39]:
loss = F.binary_cross_entropy_with_logits(input, target)

In [40]:
loss

tensor(0.3778)

In [89]:
# mlb = MultiLabelBinarizer(classes=range(20))
# y_true_cate = mlb.fit_transform([[1, 2], [4], [6], [0, 12, 18]])

In [134]:
y_true_cate = [[1,2,0,0,0], [4,0,0,0,0], [6,0,0,0,0], [0, 12, 18,0,0]]

In [135]:
y_true_cate

[[1, 2, 0, 0, 0], [4, 0, 0, 0, 0], [6, 0, 0, 0, 0], [0, 12, 18, 0, 0]]

In [91]:
y_true_cate = torch.from_numpy(y_true_cate).float()

In [92]:
y_true_cate.shape

torch.Size([4, 20])

In [93]:
y_pred_cate = np.random.random((4,20))

In [94]:
y_pred_cate = torch.from_numpy(y_pred_cate).float()

In [95]:
y_pred_cate.shape

torch.Size([4, 20])

In [96]:
F.binary_cross_entropy_with_logits(y_pred_cate, y_true_cate)

tensor(0.9491)

In [107]:
def pad(original_seq, size, padding):
    seq = (original_seq + [padding] * abs((len(original_seq)-size)))[:size]
    return seq

In [109]:
pad([[1,2,3], [1,2,3], [1,2,3]], 5, [0,0,0])

[[1, 2, 3], [1, 2, 3], [1, 2, 3], [0, 0, 0], [0, 0, 0]]

In [110]:
def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels.data.cpu()]

In [115]:
one_hot_embedding(y_true_cat, 21).shape

torch.Size([5, 2, 21])

In [119]:
a = one_hot_embedding(y_true_cat, 4)

In [120]:
a

tensor([[[ 0.,  1.,  0.,  0.],
         [ 0.,  0.,  1.,  0.]],

        [[ 0.,  0.,  0.,  1.],
         [ 0.,  1.,  0.,  0.]],

        [[ 0.,  1.,  0.,  0.],
         [ 1.,  0.,  0.,  0.]],

        [[ 0.,  0.,  1.,  0.],
         [ 0.,  0.,  0.,  1.]],

        [[ 0.,  0.,  0.,  1.],
         [ 0.,  0.,  1.,  0.]]])

In [127]:
a = a.sum(1)

In [129]:
a.clamp(0,1)

tensor([[ 0.,  1.,  1.,  0.],
        [ 0.,  1.,  0.,  1.],
        [ 1.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  1.],
        [ 0.,  0.,  1.,  1.]])

In [145]:
def k_hot_embedding(labels, num_classes):
    khot = torch.eye(num_classes)[labels.data.cpu()]
    khot = khot.sum(1).clamp(0,1)
    return khot

In [171]:
y_true_cate = [[1,2,0,0,0], [1,0,0,0,0], [1,1,1,3,4], [0, 1,1,4,0]]
y_true_cate = torch.from_numpy(np.array(y_true_cate))
y_true_khot = k_hot_embedding(y_true_cate, 5)
y_true_khot[:, 0] = 0 #background in y_true is dummy
y_pred_cate= torch.rand((4,5))

In [172]:
F.binary_cross_entropy_with_logits(y_pred_cate, y_true_khot)

tensor(0.7988)