In [31]:
import os
import torch
from tqdm import tqdm_notebook
from matplotlib import pyplot as plt
from itertools import product
import json
from collections import defaultdict

from torch import nn
from torch.autograd import Variable
from torch.functional import F
from torchvision import models
import torchvision
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np

# from fastai import transforms, model, dataset, conv_learner

from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from augmentation import SSDAugmentation

from Config import Config
from SSD_model import get_SSD_model, lr_find
from VOC_data import VOC_dataset
from SSDloss import *

In [32]:
def detection_collate_fn(batch):
    imgs, bboxes, labels = [], [], []
    for i, b, l in batch:
        imgs.append(i); bboxes.append(b); labels.append(l)
    return torch.stack(imgs), bboxes, labels

In [33]:
config = Config('local')
ssd_model = get_SSD_model(1, config.vgg_weight_path, config.vgg_reduced_weight_path)
# ssd_model.freeze_basenet()

print('success build ssd model')

train_dataset = VOC_dataset(config.voc2007_root, config.voc2007_trn_anno)

# img, bbox, label = train_dataset[0]
# img = img.unsqueeze(0)

# conf_pred, loc_pred = ssd_model(img)
# print(conf_pred.shape, loc_pred.shape)
trn_dataloader = DataLoader(train_dataset, config.batch_size, shuffle=False, collate_fn=detection_collate_fn)
# lr_array, loss_array = lr_find(ssd_model, 1e-1, 1e-4, trn_dataloader, linear=False)

  nn.init.kaiming_uniform(m.weight.data)


success build ssd model


In [4]:
original_state_dict = ssd_model.state_dict()
trained_state_dict = torch.load(config.trained_path)
trained2original = {'vgg':'base_net',
                    'L2Norm':'l2_norm', 'extras': 'extra',
                    'loc': 'loc_layers',
                    'conf': 'conf_layers'
                   }

for trained_k in trained_state_dict.keys():
    layer_name = trained_k.split('.')[0]
    layer_num = trained_k.split('.')[1]
    layer_other = '.'.join((trained_k.split('.')[2:]))
    if layer_name == 'L2Norm':
        original_key = 'l2_norm.weight'
#         original_state_dict['l2_norm.weights'] = trained_state_dict[k]
    elif layer_name == 'vgg' and int(layer_num) > 28:
        num = str(int(layer_num) - 30)
        original_key = '.'.join(['reduced_fc', num, layer_other])
    else:
        original_key = '.'.join([trained2original[layer_name], layer_num, layer_other])
#     print('{:15s}:{:15s}'.format(trained_k, original_key))
    original_state_dict[original_key] = trained_state_dict[trained_k]
    
ssd_model.load_state_dict(original_state_dict)

In [5]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
ssd_model = ssd_model.to(device)

In [34]:
img1_path, img2_path = 'C:\\datasets\\pascal\\JPEGImages\\000012.jpg', 'C:\\datasets\\pascal\\JPEGImages\\000017.jpg'
img1, img2 = cv2.imread(img1_path), cv2.imread(img2_path)
img1, img2 = cv2.resize(img1, (300, 300)), cv2.resize(img2, (300, 300))
img1, img2 = torch.FloatTensor(img1), torch.FloatTensor(img2)
img1, img2 = img1.permute(2, 0, 1), img2.permute(2, 0, 1)

imgs = torch.stack([img1, img2])
imgs = imgs.cuda()

In [7]:
conf, loc = ssd_model(imgs)

0 tensor(228668.625, device='cuda:0', grad_fn=<SumBackward0>)
1 tensor(165391.703, device='cuda:0', grad_fn=<SumBackward0>)
2 tensor(34533.184, device='cuda:0', grad_fn=<SumBackward0>)
3 tensor(10194.548, device='cuda:0', grad_fn=<SumBackward0>)
4 tensor(6547.459, device='cuda:0', grad_fn=<SumBackward0>)
5 tensor(1612.559, device='cuda:0', grad_fn=<SumBackward0>)


In [10]:
torch.sum(imgs)

tensor(57216460., device='cuda:0')

In [8]:
torch.sum(conf)

tensor(-155.327, device='cuda:0', grad_fn=<SumBackward0>)

In [9]:
torch.sum(loc)

tensor(-19960.617, device='cuda:0', grad_fn=<SumBackward0>)

In [13]:
torch.sum(ssd_model.l2_norm.weight)

tensor(6785.321, device='cuda:0', grad_fn=<SumBackward0>)

In [6]:
prior_box = get_prior_box()
loss_array = []

for i, batch in enumerate(trn_dataloader):

    imgs, bboxes, labels = batch
    imgs = imgs.to(device)
    cls_preds, loc_preds = ssd_model(imgs)

#     model.zero_grad()

    total_loss = 0
    total_loc_loss, total_cls_loss = 0, 0

    for idx in range(imgs.shape[0]):

        img, bbox, label = imgs[idx], bboxes[idx], labels[idx]
        cls_pred, loc_pred = cls_preds[idx], loc_preds[idx]
        iou = get_iou(bbox, prior_box)

        pos_mask, cls_target, bbox_target = get_target(iou, prior_box, img, bbox, label)
        pos_mask, cls_target, bbox_target = pos_mask.to(device), cls_target.to(device), bbox_target.to(device)

        loss_loc, loss_cls = loss(cls_pred, loc_pred, pos_mask, cls_target, bbox_target)
        total_loc_loss += loss_loc; total_cls_loss += loss_cls

        total_loss += (loss_cls + loss_loc)

    total_loss /= float(imgs.shape[0])
    total_cls_loss /= float(imgs.shape[0])
    total_loc_loss /= float(imgs.shape[0])

#     total_loss.backward()

#     optimizer.step()
    cls_loss = round(float(total_cls_loss), 3)
    loc_loss = round(float(total_loc_loss), 3)
    t_loss = round(float(total_loss), 3)

    print(i, 'cls_loss: {}, loc_loss: {}, loss: {}'.format(cls_loss, loc_loss, t_loss))
    loss_array.append(t_loss)



0 cls_loss: 9.103, loc_loss: 4.608, loss: 13.711
1 cls_loss: 9.528, loc_loss: 3.767, loss: 13.295
2 cls_loss: 9.228, loc_loss: 3.088, loss: 12.316
3 cls_loss: 9.208, loc_loss: 7.935, loss: 17.142
4 cls_loss: 8.811, loc_loss: 4.791, loss: 13.602
5 cls_loss: 8.592, loc_loss: 4.327, loss: 12.92
6 cls_loss: 8.326, loc_loss: 3.546, loss: 11.872
7 cls_loss: 9.201, loc_loss: 9.054, loss: 18.254
8 cls_loss: 8.948, loc_loss: 10.308, loss: 19.256
9 cls_loss: 10.047, loc_loss: 5.539, loss: 15.586
10 cls_loss: 10.427, loc_loss: 4.072, loss: 14.499
11 cls_loss: 6.396, loc_loss: 5.328, loss: 11.724
12 cls_loss: 9.155, loc_loss: 4.246, loss: 13.401
13 cls_loss: 6.205, loc_loss: 7.109, loss: 13.314
14 cls_loss: 9.29, loc_loss: 5.714, loss: 15.004
15 cls_loss: 7.516, loc_loss: 4.717, loss: 12.234
16 cls_loss: 8.497, loc_loss: 6.005, loss: 14.503
17 cls_loss: 8.481, loc_loss: 6.423, loss: 14.904
18 cls_loss: 10.271, loc_loss: 4.646, loss: 14.917
19 cls_loss: 9.056, loc_loss: 6.009, loss: 15.066
20 cls_l

323 cls_loss: 7.187, loc_loss: 5.416, loss: 12.602
324 cls_loss: 9.172, loc_loss: 5.714, loss: 14.886
325 cls_loss: 8.605, loc_loss: 7.542, loss: 16.147
326 cls_loss: 10.065, loc_loss: 6.727, loss: 16.792
327 cls_loss: 8.126, loc_loss: 5.625, loss: 13.752
328 cls_loss: 8.215, loc_loss: 7.198, loss: 15.412
329 cls_loss: 6.03, loc_loss: 4.316, loss: 10.346
330 cls_loss: 10.816, loc_loss: 7.623, loss: 18.44
331 cls_loss: 10.078, loc_loss: 6.571, loss: 16.649
332 cls_loss: 10.149, loc_loss: 4.471, loss: 14.62
333 cls_loss: 9.258, loc_loss: 9.792, loss: 19.051
334 cls_loss: 11.693, loc_loss: 3.753, loss: 15.446
335 cls_loss: 5.477, loc_loss: 12.576, loss: 18.053
336 cls_loss: 7.871, loc_loss: 4.874, loss: 12.745
337 cls_loss: 8.877, loc_loss: 4.493, loss: 13.37
338 cls_loss: 7.909, loc_loss: 5.69, loss: 13.599
339 cls_loss: 8.332, loc_loss: 7.047, loss: 15.379
340 cls_loss: 8.8, loc_loss: 8.016, loss: 16.816
341 cls_loss: 9.133, loc_loss: 7.441, loss: 16.574
342 cls_loss: 9.842, loc_loss: 3

KeyboardInterrupt: 

In [13]:
cls_target[pos_mask]

tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3], device='cuda:0', dtype=torch.int32)

In [20]:
(cls_pred[pos_mask].shape)

torch.Size([78, 21])

In [2]:
import torch
from torch import nn

In [21]:
l = nn.Conv2d(1, 1, kernel_size=(3, 3), stride=1, padding=0)
l.weight = nn.Parameter(torch.ones(1, 1, 3, 3).float())
x = torch.zeros(25)
for i in range(25):
    x[i] = i
x = x.reshape(1, 1, 5, 5)
x.shape

torch.Size([1, 1, 5, 5])

In [22]:
l.weight

Parameter containing:
tensor([[[[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]], requires_grad=True)

In [26]:
l.bias = nn.Parameter(torch.FloatTensor([0]))

In [27]:
x

tensor([[[[ 0.,  1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.,  9.],
          [10., 11., 12., 13., 14.],
          [15., 16., 17., 18., 19.],
          [20., 21., 22., 23., 24.]]]])

In [28]:
l(x)

tensor([[[[ 54.,  63.,  72.],
          [ 99., 108., 117.],
          [144., 153., 162.]]]], grad_fn=<ThnnConv2DBackward>)

In [None]:
img1_path, img2_path = 'C:\\datasets\\pascal\\JPEGImages\\000012.jpg', 'C:\\datasets\\pascal\\JPEGImages\\000017.jpg'
img1, img2 = cv2.imread(img1_path), cv2.imread(img2_path)
img1, img2 = cv2.resize(img1, (600, 300)), cv2.resize(img2, (600, 300))

plt.imshow(img2)