In [1]:
import numpy as np
import torch

from config.config import Configs

import json
import cv2
from ssd.utils_ssd300 import dboxes300_coco
from ssd.train_loader import  CocoDataReader
from ssd.utils_ssd300 import calc_iou_tensor
from ssd.create_model import nvidia_ssd
from ssd.utils_ssd300 import Encoder
from ssd.model import Loss
from ssd.SSD_Transformers import SSDTransformer

from PIL import ImageDraw, Image


from torch.autograd import Variable
from torch.utils.data import DataLoader


from torchmetrics.detection.mean_ap import MeanAveragePrecision
from tqdm import tqdm

from ssd.train_one_loop import train_one_loop
from ssd.model_eval import model_evaluate

import time
from utils.utils import save_model
from train_model import train_model


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
batch_size = 32
device = 'cuda'
model = nvidia_ssd(device=device, pretrainded_custom=False, pretrained_default=True)

for param in model.parameters():
    param.requires_grad = True
    
for param in model.feature_extractor.parameters():
    param.requires_grad = False

dboxes = dboxes300_coco()
encoder = Encoder(dboxes)


loss_func = Loss(dboxes, device=device)

transformers_ssd_train = SSDTransformer(dboxes, size=(300, 300), val=False)

transformers_ssd_val= SSDTransformer(dboxes,  size=(300, 300), val=True)


annotate_file = 'COCOdata\\annotations\\instances_val2017.json'
data_train = CocoDataReader(
                    img_folder='COCOdata\\val2017\\',
                    annotate_file=annotate_file,
                    transform=transformers_ssd_train )

train_dataloader = DataLoader(data_train ,
                                batch_size=batch_size,
                                
                                shuffle=True,  # Note: distributed sampler is shuffled :(
                             )

data_val = CocoDataReader(
                    img_folder='COCOdata\\val2017\\',
                    annotate_file=annotate_file,
                    transform=transformers_ssd_val )

val_dataloader = DataLoader(data_val ,
                                batch_size=batch_size,
                                shuffle=False,  # Note: distributed sampler is shuffled :(
                             )
   # https://www.kaggle.com/code/billiemage/understand-lr-scheduler-with-simple-examples
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
        params, lr=0.0005, momentum=0.9, nesterov=True
    )

scheduler = torch.optim.lr_scheduler.StepLR  (
        optimizer=optimizer, step_size=15, gamma=0.1, verbose=True
    )

cuda
load cuda
Adjusting learning rate of group 0 to 5.0000e-04.


In [3]:
from train_model import train_model

In [3]:
train_model(
    model=model,
    encoder=encoder,
    num_epoch=2,
    optimizer=optimizer,
    loss_func=loss_func,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    device=device,
    scheduler=scheduler
)


EPOCH 1 of 2
Training


100%|██████████| 155/155 [03:49<00:00,  1.48s/it]


Evaluating


  6%|▋         | 10/155 [00:30<07:20,  3.03s/it]


Epoch #1 train loss: 3.716
Epoch #1 mAP: 0.31395190954208374
Epoch #1 mAP_50: 0.5039829015731812
Epoch #1 beset mAP: 0.31395190954208374
Took 4.373 minutes for epoch 0
Adjusting learning rate of group 0 to 5.0000e-04.

EPOCH 2 of 2
Training


 17%|█▋        | 26/155 [00:36<03:03,  1.42s/it]


KeyboardInterrupt: 

In [5]:
num_epoch = 2
loss_mas = []
map50 = []
map = []
bes_map = 0
for epoch in range(num_epoch):
    print(f"\nEPOCH {epoch+1} of {num_epoch}")
    start = time.time()
    losses = train_one_loop(model=model, optimizer=optimizer,loss_func=loss_func, train_dataloader=train_dataloader, device=device)
    metrics_map = model_evaluate(model=model,encoder=encoder,val_dataloader=val_dataloader,device=device)
    
    print(f"Epoch #{epoch+1} train loss: {losses:.3f}")   
    print(f"Epoch #{epoch+1} mAP: {metrics_map['map']}") 
    print(f"Epoch #{epoch+1} mAP_50: {metrics_map['map_50']}")
    cur_map = metrics_map['map']
    
    if cur_map > bes_map:
        bes_map = cur_map
        save_model(
                model = model, optimizer=None, model_name="best_model_at_" + str(epoch), path="weight",
            lr_scheduler=None)
        
    print(f"Epoch #{epoch+1} train loss: {losses:.3f}")   
    print(f"Epoch #{epoch+1} mAP: {metrics_map['map']}") 
    print(f"Epoch #{epoch+1} mAP_50: {metrics_map['map_50']}")  
    print(f"Epoch #{epoch+1} beset mAP: {bes_map}")    
        
    
    loss_mas.append(losses)
    map50.append(metrics_map['map_50'])
    map.append(metrics_map['map'])
    end = time.time()
    print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch}")  
    
    if scheduler is not None:
        scheduler.step()
    
save_model(model = model, optimizer=optimizer, model_name="end_state" , path="weight",
            lr_scheduler=None) 
    
    
with open('train_results.txt', 'w') as file_handler:
    file_handler.write("loss\n")
    for item in loss_mas:
        file_handler.write("{}\t".format(item))
        
    file_handler.write("\nmap\n")
    for item in map50:
        file_handler.write("{}\t".format(item))
        
    file_handler.write("\nmap_50\n")
    for item in map:
        file_handler.write("{}\t".format(item))


EPOCH 1 of 2
Training


  6%|▌         | 9/155 [00:15<04:07,  1.69s/it]


Evaluating


 10%|▉         | 15/155 [00:42<06:38,  2.85s/it]


Epoch #1 train loss: 0.236
Epoch #1 mAP: 0.29772624373435974
Epoch #1 mAP_50: 0.4810028672218323
Epoch #1 train loss: 0.236
Epoch #1 mAP: 0.29772624373435974
Epoch #1 mAP_50: 0.4810028672218323
Epoch #1 beset mAP: 0.29772624373435974
Took 1.020 minutes for epoch 0
Adjusting learning rate of group 0 to 5.0000e-04.

EPOCH 2 of 2
Training


  6%|▌         | 9/155 [00:15<04:11,  1.72s/it]


Evaluating


 10%|▉         | 15/155 [00:43<06:49,  2.93s/it]


Epoch #2 train loss: 0.237
Epoch #2 mAP: 0.2973070442676544
Epoch #2 mAP_50: 0.4827316999435425
Epoch #2 train loss: 0.237
Epoch #2 mAP: 0.2973070442676544
Epoch #2 mAP_50: 0.4827316999435425
Epoch #2 beset mAP: 0.29772624373435974
Took 1.042 minutes for epoch 1
Adjusting learning rate of group 0 to 5.0000e-04.


In [4]:
#losses = train_one_loop(model=model, optimizer=optimizer,loss_func=loss_func, train_dataloader=train_dataloader, device=device)

metrics_map = model_evaluate(model=model,encoder=encoder,val_dataloader=val_dataloader,device=device)

Training


  1%|▏         | 2/155 [00:16<21:29,  8.43s/it]


KeyboardInterrupt: 

In [7]:
#metrics_map = model_evaluate(model=model,encoder=encoder,val_dataloader=val_dataloader,device=device)

In [4]:
metrics_map

{'map': tensor(0.3213),
 'map_50': tensor(0.5015),
 'map_75': tensor(0.3389),
 'map_small': tensor(0.0736),
 'map_medium': tensor(0.3580),
 'map_large': tensor(0.4853),
 'mar_1': tensor(0.2878),
 'mar_10': tensor(0.3897),
 'mar_100': tensor(0.3977),
 'mar_small': tensor(0.1122),
 'mar_medium': tensor(0.4475),
 'mar_large': tensor(0.5705),
 'map_per_class': tensor(-1.),
 'mar_100_per_class': tensor(-1.),
 'classes': tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
         37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55,
         56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
         74, 75, 76, 77, 78, 80], dtype=torch.int32)}

In [None]:
#prog_bar = tqdm(train_dataloader, total=len(train_dataloader))
print('Training')
loss_sum = 0
for i, data in enumerate(tqdm(train_dataloader, total=len(train_dataloader))):
#for nbatch, data in enumerate(train_dataloader):
    
    optimizer.zero_grad()
    
    img, img_id, images_sizes,  bbox_data, bbox_labels = data
    
    
    gloc = Variable(bbox_data.transpose(1, 2).contiguous(), requires_grad=False).to('cuda')
    glabel = Variable(bbox_labels, requires_grad=False).to('cuda')
   # 
    ploc, plabel = model(img.to('cuda'))
    #print(ploc.device, plabel.device, gloc.device, glabel.device)
    loss = loss_func(ploc=ploc, plabel=plabel, gloc=gloc, glabel=glabel )
    loss_sum += loss.item()
    loss.backward()
    optimizer.step()
    
    #prog_bar.update(i)
    #(desc=f"Loss: {1:.4f}"
#print('loss =', loss_sum / len(train_dataloader))

Training


100%|██████████| 155/155 [03:54<00:00,  1.51s/it]

loss = 3.7094279981428575





In [None]:
target = []
preds = []

for nbatch, data in enumerate(tqdm(val_dataloader, total=len(val_dataloader))):
    model.eval()
    img, img_id, images_sizes,  bbox_data, bbox_labels = data
    with torch.no_grad():
        ploc, plabel = model(img.to('cuda'))
        detections = encoder.decode_batch(ploc, plabel, 0.5, 200)
        
   
    for idx in range(ploc.shape[0]):
        true_dict = dict()
        preds_dict = dict()
        htot, wtot = images_sizes[0][idx].item(), images_sizes[1][idx].item()   
        pred_bbx = detections[idx][0]
        prob = detections[idx][2]
        tr_bbx = bbox_labels[idx] > 0
        bbx_target = []
        for j in range(len(bbox_data[idx][tr_bbx ])):
            l,t,r,b = bbox_data[idx][tr_bbx ][j].detach().cpu().numpy()
            bbx_target.append([l* wtot,t* htot,r* wtot,b* htot])

        bbx_pred = []  
        for j in range(pred_bbx.shape[0]):
            l,t,r,b = pred_bbx[j].detach().cpu().numpy()
            bbx_pred.append([l* wtot,t* htot,r* wtot,b* htot])

        true_dict['boxes'] = torch.tensor(bbx_target).detach().cpu()
        true_dict['labels'] = bbox_labels[idx][tr_bbx].detach().cpu()
        preds_dict['boxes'] = torch.tensor(bbx_pred).detach().cpu()
        preds_dict['scores'] = detections[idx][2].detach().cpu()
        preds_dict['labels'] = detections[idx][1].detach().cpu()
        preds.append(preds_dict)
        target.append(true_dict)
        
        if nbatch == 15:
            break

metric = MeanAveragePrecision()
metric.update(preds, target)
metric_summary = metric.compute();
 

  0%|          | 0/155 [00:00<?, ?it/s]

 70%|██████▉   | 108/155 [05:11<02:15,  2.88s/it]


KeyboardInterrupt: 

In [None]:
print(f"Epoch #{0} train loss: {loss_sum / len(train_dataloader):.3f}")   
#print(f"Epoch #{0} mAP: {metric_summary['map']}") 
#print('loss =', loss_sum / len(train_dataloader))

Epoch #0 train loss: 3.709


In [None]:
target = []
preds = []

for nbatch, data in enumerate(tqdm(val_dataloader, total=len(val_dataloader))):
    model.eval()
    img, img_id, images_sizes,  bbox_data, bbox_labels = data
    with torch.no_grad():
        ploc, plabel = model(img.to('cuda')))
        detections = encoder.decode_batch(ploc, plabel, 0.5, 200)   
    
 

    
    import torchvision.transforms as T
    
    for idx in range(ploc.shape[0]):
        true_dict = dict()
        preds_dict = dict()
        
        htot, wtot = images_sizes[0][idx].item(), images_sizes[1][idx].item()
        
        pred_bbx = detections[idx][0]
       # print(detections[idx][1])
        prob = detections[idx][2]
        #pred_bbx = pred_bbx[prob > 0.1]

        # true bbx
        tr_bbx = bbox_labels[idx] > 0
        
        
        bbx_target = []
        for j in range(len(bbox_data[idx][tr_bbx ])):
            l,t,r,b = bbox_data[idx][tr_bbx ][j].detach().cpu().numpy()
            bbx_target.append([l* wtot,t* htot,r* wtot,b* htot])
          
        #ped bbx  
        bbx_pred = []  
        for j in range(pred_bbx.shape[0]):
            l,t,r,b = pred_bbx[j].detach().cpu().numpy()
            bbx_pred.append([l* wtot,t* htot,r* wtot,b* htot])
            
        
        true_dict['boxes'] = torch.tensor(bbx_target).detach().cpu()
        true_dict['labels'] = bbox_labels[idx][tr_bbx].detach().cpu()
        
       
        preds_dict['boxes'] = torch.tensor(bbx_pred).detach().cpu()
        preds_dict['scores'] = detections[idx][2].detach().cpu()
        preds_dict['labels'] = detections[idx][1].detach().cpu()
        preds.append(preds_dict)
        target.append(true_dict)

    if nbatch == 300:
        break
        
            
            
            
      
        
    

SyntaxError: unmatched ')' (4047458937.py, line 8)

In [None]:
nbatch

300

In [None]:
metric = MeanAveragePrecision()
metric.update(preds, target)
metric_summary = metric.compute();
metric_summary



{'map': tensor(0.2627),
 'map_50': tensor(0.4435),
 'map_75': tensor(0.2725),
 'map_small': tensor(0.0437),
 'map_medium': tensor(0.2429),
 'map_large': tensor(0.3938),
 'mar_1': tensor(0.2423),
 'mar_10': tensor(0.3550),
 'mar_100': tensor(0.3740),
 'mar_small': tensor(0.0841),
 'mar_medium': tensor(0.3699),
 'mar_large': tensor(0.5276),
 'map_per_class': tensor(-1.),
 'mar_100_per_class': tensor(-1.),
 'classes': tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
         37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
         55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
         73, 74, 75, 76, 77, 78, 79, 80], dtype=torch.int32)}

In [None]:
{"ID": "284193,1c88700092bff1e9", "gtboxes": [{"fbox": [-4, 331, 35, 114], "tag": "person", "hbox": [1, 334, 12, 17], "extra": {"box_id": 0, "occ": 0}, "vbox": [0, 331, 31, 114], "head_attr": {"ignore": 0, "occ": 0, "unsure": 0}}, {"fbox": [45, 336, 25, 81], "tag": "person", "hbox": [53, 340, 11, 13], "extra": {"box_id": 1, "occ": 1}, "vbox": [48, 336, 22, 28], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [64, 348, 28, 52], "tag": "person", "hbox": [80, 350, 6, 7], "extra": {"box_id": 2, "occ": 1}, "vbox": [65, 349, 28, 50], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [86, 351, 21, 47], "tag": "person", "hbox": [86, 351, 21, 47], "extra": {"box_id": 3, "ignore": 1}, "vbox": [86, 351, 21, 47], "head_attr": {}}, {"fbox": [95, 344, 21, 54], "tag": "person", "hbox": [105, 345, 6, 7], "extra": {"box_id": 4, "occ": 1}, "vbox": [95, 344, 18, 53], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [263, 339, 117, 200], "tag": "person", "hbox": [287, 351, 33, 28], "extra": {"box_id": 5, "occ": 0}, "vbox": [263, 339, 117, 141], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [165, 343, 91, 192], "tag": "person", "hbox": [187, 353, 29, 31], "extra": {"box_id": 6, "occ": 0}, "vbox": [165, 343, 91, 137], "head_attr": {"ignore": 0, "occ": 0, "unsure": 0}}, {"fbox": [129, 343, 90, 175], "tag": "person", "hbox": [149, 348, 24, 28], "extra": {"box_id": 7, "occ": 1}, "vbox": [132, 343, 50, 107], "head_attr": {"ignore": 0, "occ": 0, "unsure": 0}}, {"fbox": [102, 341, 77, 136], "tag": "person", "hbox": [120, 343, 23, 24], "extra": {"box_id": 8, "occ": 1}, "vbox": [105, 341, 37, 91], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [335, 352, 17, 29], "tag": "person", "hbox": [338, 354, 5, 5], "extra": {"box_id": 9, "occ": 0}, "vbox": [335, 352, 17, 29], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [346, 353, 13, 26], "tag": "person", "hbox": [353, 355, 4, 4], "extra": {"box_id": 10, "occ": 1}, "vbox": [348, 353, 11, 25], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [357, 354, 10, 26], "tag": "person", "hbox": [361, 356, 5, 5], "extra": {"box_id": 11, "occ": 0}, "vbox": [357, 354, 10, 26], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [408, 351, 12, 27], "tag": "person", "hbox": [408, 351, 12, 27], "extra": {"box_id": 12, "ignore": 1}, "vbox": [408, 351, 12, 27], "head_attr": {}}, {"fbox": [482, 338, 30, 68], "tag": "person", "hbox": [494, 339, 10, 10], "extra": {"box_id": 13, "occ": 1}, "vbox": [485, 340, 26, 66], "head_attr": {"ignore": 0, "occ": 0, "unsure": 0}}, {"fbox": [527, 343, 16, 35], "tag": "person", "hbox": [532, 345, 5, 6], "extra": {"box_id": 14, "occ": 1}, "vbox": [528, 344, 15, 34], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [635, 327, 45, 133], "tag": "person", "hbox": [651, 329, 15, 17], "extra": {"box_id": 15, "occ": 1}, "vbox": [636, 327, 44, 131], "head_attr": {"ignore": 0, "occ": 0, "unsure": 0}}, {"fbox": [668, 335, 42, 123], "tag": "person", "hbox": [678, 337, 17, 17], "extra": {"box_id": 16, "occ": 0}, "vbox": [668, 335, 42, 123], "head_attr": {"ignore": 0, "occ": 0, "unsure": 0}}, {"fbox": [661, 336, 23, 40], "tag": "person", "hbox": [666, 339, 7, 7], "extra": {"box_id": 17, "occ": 1}, "vbox": [664, 337, 21, 40], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [832, 332, 24, 50], "tag": "person", "hbox": [842, 334, 7, 7], "extra": {"box_id": 18, "occ": 1}, "vbox": [835, 332, 17, 49], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [788, 332, 17, 54], "tag": "person", "hbox": [792, 333, 7, 7], "extra": {"box_id": 19, "occ": 1}, "vbox": [789, 333, 16, 53], "head_attr": {"ignore": 0, "occ": 0, "unsure": 0}}, {"fbox": [773, 342, 22, 42], "tag": "person", "hbox": [778, 344, 7, 7], "extra": {"box_id": 20, "occ": 1}, "vbox": [776, 343, 19, 41], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [759, 337, 32, 48], "tag": "person", "hbox": [770, 339, 8, 8], "extra": {"box_id": 21, "occ": 0}, "vbox": [759, 337, 32, 48], "head_attr": {"ignore": 0, "occ": 1, "unsure": 0}}, {"fbox": [19, 335, 35, 68], "tag": "mask", "hbox": [19, 335, 35, 68], "extra": {"ignore": 1}, "vbox": [19, 335, 35, 68], "head_attr": {}}, {"fbox": [63, 343, 15, 48], "tag": "mask", "hbox": [63, 343, 15, 48], "extra": {"ignore": 1}, "vbox": [63, 343, 15, 48], "head_attr": {}}, {"fbox": [86, 348, 8, 29], "tag": "mask", "hbox": [86, 348, 8, 29], "extra": {"ignore": 1}, "vbox": [86, 348, 8, 29], "head_attr": {}}, {"fbox": [110, 339, 17, 32], "tag": "mask", "hbox": [110, 339, 17, 32], "extra": {"ignore": 1}, "vbox": [110, 339, 17, 32], "head_attr": {}}, {"fbox": [129, 343, 19, 39], "tag": "mask", "hbox": [129, 343, 19, 39], "extra": {"ignore": 1}, "vbox": [129, 343, 19, 39], "head_attr": {}}, {"fbox": [170, 352, 13, 27], "tag": "mask", "hbox": [170, 352, 13, 27], "extra": {"ignore": 1}, "vbox": [170, 352, 13, 27], "head_attr": {}}, {"fbox": [211, 345, 12, 39], "tag": "mask", "hbox": [211, 345, 12, 39], "extra": {"ignore": 1}, "vbox": [211, 345, 12, 39], "head_attr": {}}, {"fbox": [211, 216, 48, 64], "tag": "mask", "hbox": [211, 216, 48, 64], "extra": {"ignore": 1}, "vbox": [211, 216, 48, 64], "head_attr": {}}, {"fbox": [287, 251, 36, 49], "tag": "mask", "hbox": [287, 251, 36, 49], "extra": {"ignore": 1}, "vbox": [287, 251, 36, 49], "head_attr": {}}, {"fbox": [233, 346, 52, 36], "tag": "mask", "hbox": [233, 346, 52, 36], "extra": {"ignore": 1}, "vbox": [233, 346, 52, 36], "head_attr": {}}, {"fbox": [386, 352, 15, 28], "tag": "mask", "hbox": [386, 352, 15, 28], "extra": {"ignore": 1}, "vbox": [386, 352, 15, 28], "head_attr": {}}, {"fbox": [451, 350, 15, 28], "tag": "mask", "hbox": [451, 350, 15, 28], "extra": {"ignore": 1}, "vbox": [451, 350, 15, 28], "head_attr": {}}, {"fbox": [536, 344, 7, 10], "tag": "mask", "hbox": [536, 344, 7, 10], "extra": {"ignore": 1}, "vbox": [536, 344, 7, 10], "head_attr": {}}, {"fbox": [546, 342, 96, 33], "tag": "mask", "hbox": [546, 342, 96, 33], "extra": {"ignore": 1}, "vbox": [546, 342, 96, 33], "head_attr": {}}, {"fbox": [694, 330, 75, 46], "tag": "mask", "hbox": [694, 330, 75, 46], "extra": {"ignore": 1}, "vbox": [694, 330, 75, 46], "head_attr": {}}, {"fbox": [768, 333, 23, 18], "tag": "mask", "hbox": [768, 333, 23, 18], "extra": {"ignore": 1}, "vbox": [768, 333, 23, 18], "head_attr": {}}, {"fbox": [796, 324, 36, 57], "tag": "mask", "hbox": [796, 324, 36, 57], "extra": {"ignore": 1}, "vbox": [796, 324, 36, 57], "head_attr": {}}, {"fbox": [845, 320, 9, 25], "tag": "mask", "hbox": [845, 320, 9, 25], "extra": {"ignore": 1}, "vbox": [845, 320, 9, 25], "head_attr": {}}]}

In [None]:
true_dict

{'boxes': tensor([[289.1390, 252.3710, 308.5259, 270.9455],
         [211.2121, 206.1612, 241.8972, 219.8664],
         [403.1069, 238.3774, 423.8143, 259.7459],
         [172.2977, 174.4394, 183.6966, 192.7408],
         [152.9007, 191.4473, 176.8827, 209.5964],
         [274.2797, 165.6423, 285.4447, 181.8887],
         [105.1875, 280.3208, 114.4473, 286.3444],
         [174.7280, 185.4683, 187.7749, 207.2405],
         [189.3316, 260.3939, 239.2760, 281.5376],
         [342.8775, 151.9134, 487.2823, 256.2133],
         [314.1092, 204.0212, 340.9726, 218.0405],
         [448.2860, 295.9073, 607.7550, 332.0896],
         [333.9937, 314.9082, 362.3904, 350.2684],
         [158.6172, 175.4697, 182.1319, 195.7374],
         [360.7279, 313.5639, 378.2633, 343.2836],
         [262.6660, 192.0515, 298.5731, 206.7117],
         [416.4047, 276.2730, 618.3924, 366.9688],
         [227.0838, 220.0301, 243.8979, 231.2897],
         [100.1689, 171.2339, 143.9916, 210.6805],
         [305.9978, 19

In [None]:
plabel

tensor([[[ 7.8789e+00,  7.9900e+00,  7.9046e+00,  ...,  6.2718e+00,
           6.5717e+00,  6.3975e+00],
         [ 1.9557e+00,  2.2101e+00,  2.0473e+00,  ...,  2.0530e+00,
           2.0681e+00,  2.1723e+00],
         [-1.8214e-01,  3.9990e-02,  1.8266e-01,  ..., -1.5935e-01,
          -1.4367e-01, -1.5316e-01],
         ...,
         [-4.3708e-01, -4.2389e-01, -4.1271e-01,  ...,  1.5653e-01,
           1.0740e-02,  1.3684e-01],
         [-8.9916e-01, -9.0540e-01, -6.4122e-01,  ..., -7.8573e-01,
          -7.8231e-01, -7.8420e-01],
         [-3.2039e-01, -5.3829e-01, -5.9477e-01,  ..., -3.5390e-01,
          -3.8509e-01, -2.8818e-01]],

        [[ 7.9637e+00,  8.2926e+00,  8.0334e+00,  ...,  7.0202e+00,
           7.0389e+00,  6.8604e+00],
         [ 9.4671e-01,  1.0599e+00,  1.2081e+00,  ...,  2.6190e+00,
           2.7694e+00,  2.7887e+00],
         [-4.8606e-01, -4.0981e-01, -2.6360e-01,  ..., -6.6245e-01,
          -6.1814e-01, -5.7342e-01],
         ...,
         [-3.6136e-01, -5

In [None]:
detections[0][3]

IndexError: tuple index out of range

In [None]:
with torch.no_grad():
        ploc, plabel = model(img)

In [None]:
plabel

tensor([[[ 7.8789e+00,  7.9900e+00,  7.9046e+00,  ...,  6.2718e+00,
           6.5717e+00,  6.3975e+00],
         [ 1.9557e+00,  2.2101e+00,  2.0473e+00,  ...,  2.0530e+00,
           2.0681e+00,  2.1723e+00],
         [-1.8214e-01,  3.9990e-02,  1.8266e-01,  ..., -1.5935e-01,
          -1.4367e-01, -1.5316e-01],
         ...,
         [-4.3708e-01, -4.2389e-01, -4.1271e-01,  ...,  1.5653e-01,
           1.0740e-02,  1.3684e-01],
         [-8.9916e-01, -9.0540e-01, -6.4122e-01,  ..., -7.8573e-01,
          -7.8231e-01, -7.8420e-01],
         [-3.2039e-01, -5.3829e-01, -5.9477e-01,  ..., -3.5390e-01,
          -3.8509e-01, -2.8818e-01]],

        [[ 7.9637e+00,  8.2926e+00,  8.0334e+00,  ...,  7.0202e+00,
           7.0389e+00,  6.8604e+00],
         [ 9.4671e-01,  1.0599e+00,  1.2081e+00,  ...,  2.6190e+00,
           2.7694e+00,  2.7887e+00],
         [-4.8606e-01, -4.0981e-01, -2.6360e-01,  ..., -6.6245e-01,
          -6.1814e-01, -5.7342e-01],
         ...,
         [-3.6136e-01, -5

In [None]:
detections[0][1]

tensor([57, 57, 42, 27, 57, 46, 70, 57, 74, 42, 42, 42, 61, 46, 45, 46, 42, 46,
        29, 57, 10, 42, 42, 57, 57, 77, 74, 29, 72, 40, 61, 42, 57, 45, 72, 54,
        70, 74, 47, 27, 40, 42, 46, 57, 42, 46, 42, 46, 61, 45, 40, 57, 40, 61,
        57, 46, 28, 45, 57, 57, 42, 42, 61,  1, 70, 29, 27,  1,  1,  1, 42,  1,
         1,  1,  1,  1,  1,  1, 57,  1, 61,  1, 40, 45,  1, 42,  1,  1,  1, 29,
         1, 40,  1, 45,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 54,
         1,  1,  1,  1,  1,  1, 70,  1, 72, 40,  1, 57, 72, 25,  1, 42,  1,  1,
        57,  1,  1, 70, 42, 45, 40, 61,  1,  1, 42,  1,  1, 46, 42, 42,  1,  1,
         1,  1, 61,  1, 46, 46, 46,  1])

In [None]:
            true_dict = dict()
            preds_dict = dict()
            true_dict['boxes'] = targets[i]['boxes'].detach().cpu()
            true_dict['labels'] = targets[i]['labels'].detach().cpu()
            preds_dict['boxes'] = outputs[i]['boxes'].detach().cpu()
            preds_dict['scores'] = outputs[i]['scores'].detach().cpu()
            preds_dict['labels'] = outputs[i]['labels'].detach().cpu()
            preds.append(preds_dict)
            target.append(true_dict)

In [None]:
val_

<torch.utils.data.dataloader.DataLoader at 0x2c2f0fd5c10>

In [None]:
val_dataloader.ima

<torch.utils.data.dataloader.DataLoader at 0x2c2f0fd5c10>

In [None]:
bbx

tensor([[ 0.4518,  0.5257,  0.4822,  0.5644],
        [ 0.5574,  0.6089,  0.5941,  0.6754],
        [ 0.6297,  0.4966,  0.6623,  0.5411],
        [ 0.3299,  0.4295,  0.3780,  0.4580],
        [ 0.2389,  0.3988,  0.2765,  0.4366],
        [ 0.2958,  0.5425,  0.3740,  0.5865],
        [ 0.2692,  0.3634,  0.2870,  0.4015],
        [ 0.5660,  0.3491,  0.6670,  0.4734],
        [ 0.2730,  0.3864,  0.2934,  0.4317],
        [ 0.7001,  0.6165,  0.9498,  0.6920],
        [ 0.4286,  0.3451,  0.4460,  0.3789],
        [ 0.1644,  0.5840,  0.1788,  0.5965],
        [ 0.4907,  0.4250,  0.5328,  0.4542],
        [ 0.5360,  0.3169,  0.7613,  0.5337],
        [ 0.2477,  0.3656,  0.2846,  0.4077],
        [ 0.4103,  0.4001,  0.4666,  0.4306],
        [ 0.5219,  0.6560,  0.5662,  0.7297],
        [ 0.3547,  0.4584,  0.3811,  0.4818],
        [ 0.5637,  0.6532,  0.5910,  0.7151],
        [ 0.1565,  0.3567,  0.2251,  0.4388],
        [ 0.6510,  0.5756,  0.9662,  0.7645],
        [ 0.1213,  0.5566,  0.1827

In [None]:
from ssd.decode_results import Processing as processing
with torch.no_grad():
    detections = model(img)

results_per_inputs = processing.decode_results(
        predictions=detections,
        criteria=.5,
        max_output=100,
    )


In [None]:
detections

[(tensor([[ 6.1276e-01,  4.2599e-01,  8.0127e-01,  7.5199e-01],
          [ 9.6533e-01,  8.3229e-01,  1.0001e+00,  9.9713e-01],
          [ 3.3259e-01,  6.6834e-01,  3.9315e-01,  7.5043e-01],
          [ 7.7006e-01,  4.5981e-01,  9.0801e-01,  7.2300e-01],
          [ 3.8036e-01,  4.4255e-01,  5.6315e-01,  9.1908e-01],
          [ 1.2822e-01,  5.6296e-01,  2.2196e-01,  6.8246e-01],
          [ 1.8790e-01,  6.0991e-01,  2.7853e-01,  7.0757e-01],
          [ 5.5640e-01,  4.9235e-01,  6.2572e-01,  6.3830e-01],
          [ 8.8190e-01,  5.5262e-01,  9.8001e-01,  6.5568e-01],
          [ 7.7535e-01,  5.2045e-01,  8.8139e-01,  7.9882e-01],
          [ 5.5577e-01,  4.6483e-01,  6.2474e-01,  6.3195e-01],
          [ 5.7574e-01,  4.9926e-01,  6.2265e-01,  5.3800e-01],
          [ 5.8470e-02,  7.5788e-01,  1.7593e-01,  9.1012e-01],
          [ 4.7279e-01,  4.4053e-01,  5.1677e-01,  4.8856e-01],
          [ 5.0676e-01,  4.0332e-01,  5.7436e-01,  4.7156e-01],
          [ 1.6843e-01,  7.2205e-01,  2.

In [None]:
a = np.array([[1,2,4], [5,6,7]])

In [None]:
np.argwhere(a>5)[:,0]

array([1, 1], dtype=int64)

In [None]:
ploc_i = ploc[0, :, :].unsqueeze(0)
plabel_i = plabel[0, :, :].unsqueeze(0)
r1 = encoder.decode_batch(ploc_i, plabel_i, 0.50, 200)[0]

In [None]:
x = torch.abs(encoder.decode_batch(ploc, plabel, 0.50, 200)[0][0] - r1[0])

In [None]:
encoder.decode_batch(ploc, plabel, 0.50, 200)[0][2] == r1[2]

tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True, True, Tr

In [None]:
r1[1]

tensor([74, 61, 46, 40, 27, 42, 46, 42, 57, 72, 42, 46, 46, 70, 25, 42, 72, 45,
        61, 42, 57, 40, 45, 46, 46, 45, 40, 29, 40, 28, 57, 29, 42, 42,  1, 45,
        27,  1, 29, 70,  1, 42,  1, 45, 61, 46,  1,  1,  1,  1,  1, 57, 70, 61,
        42,  1,  1,  1, 70, 42, 29, 70, 45, 61,  1, 40, 57, 40, 42, 29,  1, 70,
         1,  1,  1, 61,  1, 40, 46,  1,  1,  1,  1,  1,  1,  1,  1,  1, 70,  1,
         1,  1, 57,  1, 40,  1, 46, 42,  1,  1,  1, 46,  1,  1,  1, 46,  1,  1,
        57, 61,  1,  1,  1, 61,  1, 72,  1, 54,  1,  1,  1,  1, 46, 70, 57, 61,
        46,  1,  1, 46,  1,  1, 72,  1, 40, 72,  1, 57,  1, 25,  1, 70, 46,  1,
        46, 42,  1,  1, 61, 42, 42,  1, 42, 45, 46, 46, 70, 46, 40,  1,  1, 61,
         1, 42, 46,  1,  1,  1,  1, 46, 46,  1, 42,  1,  1,  1,  1, 46,  1,  1,
         1,  1, 46,  1, 42,  1, 46,  1,  1,  1, 46, 61,  1, 46, 46, 46, 46, 46,
        46,  1])

In [None]:
r1[0]

tensor([[ 0.5748,  0.4690,  0.6309,  0.5802],
        [ 0.2536,  0.4641,  0.7277,  0.9609],
        [ 0.3244,  0.6973,  0.4002,  0.7791],
        [ 0.3349,  0.6022,  0.3884,  0.7164],
        [ 0.6753,  0.2618,  0.7764,  0.5519],
        [ 0.2016,  0.6022,  0.2537,  0.7164],
        [ 0.0676,  0.6802,  0.1737,  0.8004],
        [ 0.3093,  0.6547,  0.4169,  0.7118],
        [ 0.8837,  0.5908,  0.9687,  0.6708],
        [ 0.8398,  0.4672,  0.9641,  0.5824],
        [ 0.4842,  0.4309,  0.5626,  0.5084],
        [ 0.0312,  0.8038,  0.1026,  0.8874],
        [ 0.0943,  0.7866,  0.2010,  0.9095],
        [ 0.1232,  0.4420,  0.3623,  0.6977],
        [ 0.6536,  0.2542,  0.7732,  0.3644],
        [ 0.2816,  0.6288,  0.3345,  0.7437],
        [ 0.8132,  0.4406,  0.9368,  0.5551],
        [ 0.1958, -0.0039,  0.2876,  0.2704],
        [ 0.1477,  0.3586,  0.6112,  0.8446],
        [ 0.1761,  0.6281,  0.2808,  0.6848],
        [ 0.1476,  0.5471,  0.2554,  0.6641],
        [ 0.2922,  0.5008,  0.4073

In [None]:
r1[0]

tensor([[ 0.5748,  0.4690,  0.6309,  0.5802],
        [ 0.2536,  0.4641,  0.7277,  0.9609],
        [ 0.3244,  0.6973,  0.4002,  0.7791],
        [ 0.3349,  0.6022,  0.3884,  0.7164],
        [ 0.6753,  0.2618,  0.7764,  0.5519],
        [ 0.2016,  0.6022,  0.2537,  0.7164],
        [ 0.0676,  0.6802,  0.1737,  0.8004],
        [ 0.3093,  0.6547,  0.4169,  0.7118],
        [ 0.8837,  0.5908,  0.9687,  0.6708],
        [ 0.8398,  0.4672,  0.9641,  0.5824],
        [ 0.4842,  0.4309,  0.5626,  0.5084],
        [ 0.0312,  0.8038,  0.1026,  0.8874],
        [ 0.0943,  0.7866,  0.2010,  0.9095],
        [ 0.1232,  0.4420,  0.3623,  0.6977],
        [ 0.6536,  0.2542,  0.7732,  0.3644],
        [ 0.2816,  0.6288,  0.3345,  0.7437],
        [ 0.8132,  0.4406,  0.9368,  0.5551],
        [ 0.1958, -0.0039,  0.2876,  0.2704],
        [ 0.1477,  0.3586,  0.6112,  0.8446],
        [ 0.1761,  0.6281,  0.2808,  0.6848],
        [ 0.1476,  0.5471,  0.2554,  0.6641],
        [ 0.2922,  0.5008,  0.4073

In [None]:
detections[0][0].shape

torch.Size([182, 4])

In [None]:
res[0][0].shape

torch.Size([200, 4])

In [None]:
detections[0][0].shape

torch.Size([182, 4])

AttributeError: 'list' object has no attribute 'shape'

In [None]:
encoder.decode_batch(ploc, plabel)[0][2]

tensor([0.0525, 0.0527, 0.0531, 0.0531, 0.0538, 0.0539, 0.0542, 0.0544, 0.0545,
        0.0550, 0.0552, 0.0552, 0.0552, 0.0554, 0.0555, 0.0556, 0.0564, 0.0565,
        0.0566, 0.0566, 0.0567, 0.0568, 0.0571, 0.0572, 0.0584, 0.0587, 0.0589,
        0.0589, 0.0591, 0.0592, 0.0592, 0.0593, 0.0593, 0.0599, 0.0608, 0.0615,
        0.0615, 0.0621, 0.0624, 0.0627, 0.0628, 0.0639, 0.0645, 0.0652, 0.0654,
        0.0662, 0.0664, 0.0668, 0.0669, 0.0672, 0.0678, 0.0692, 0.0693, 0.0694,
        0.0696, 0.0700, 0.0703, 0.0705, 0.0706, 0.0716, 0.0716, 0.0720, 0.0723,
        0.0729, 0.0729, 0.0732, 0.0732, 0.0734, 0.0735, 0.0737, 0.0740, 0.0742,
        0.0748, 0.0748, 0.0748, 0.0749, 0.0753, 0.0757, 0.0759, 0.0760, 0.0762,
        0.0764, 0.0771, 0.0772, 0.0775, 0.0777, 0.0782, 0.0785, 0.0787, 0.0792,
        0.0794, 0.0797, 0.0801, 0.0803, 0.0808, 0.0812, 0.0817, 0.0818, 0.0825,
        0.0832, 0.0836, 0.0838, 0.0842, 0.0852, 0.0855, 0.0856, 0.0857, 0.0867,
        0.0876, 0.0883, 0.0885, 0.0886, 

In [None]:
ploc.shape

torch.Size([4, 4, 8732])

In [None]:
detections[1].shape

torch.Size([4, 81, 8732])

In [None]:
for nbatch, data in enumerate(train_dataloader):
    

In [None]:
data_val[0][4]

tensor([40, 61,  1, 44, 46, 46, 70,  1, 42, 42, 46, 46, 51, 45, 51, 51, 70, 52,
        72,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0])

In [None]:
next(iter(val_dataloader))[4]

tensor([[40, 61,  1,  ...,  0,  0,  0],
        [59, 57, 57,  ...,  0,  0,  0],
        [ 1,  1,  1,  ...,  0,  0,  0],
        ...,
        [ 3,  4,  6,  ...,  0,  0,  0],
        [ 3,  3,  3,  ...,  0,  0,  0],
        [ 3,  3,  3,  ...,  0,  0,  0]])

In [None]:
label_out = torch.zeros(1000, dtype=torch.long)
label = torch.tensor([1,2,3])
label_out[:label.size(0)] = label
label_out 

tensor([1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [None]:
a[4]

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])

In [None]:
b= next(iter(train_dataloader2))

In [None]:
b[4]

(tensor([0, 0, 0,  ..., 0, 0, 0]),
 tensor([0, 0, 0,  ..., 0, 0, 0]),
 tensor([0, 0, 0,  ..., 0, 0, 0]))

In [None]:
for nbatch, data in enumerate(train_dataloader):
    
    #optimizer.zero_grad()
    
    img, img_id, images_sizes,  bbox_data, bbox_labels = data
    
    gloc = Variable(bbox_data.transpose(1, 2).contiguous(), requires_grad=False)
    glabel = Variable(bbox_labels, requires_grad=False)
    
    ploc, plabel = model(img)
    loss = loss_func(ploc=ploc, plabel=plabel, gloc=gloc, glabel=glabel ) 
    
    break

In [None]:
loss

tensor(3.9616, grad_fn=<MeanBackward1>)

In [None]:
ploc.shape

torch.Size([4, 4, 8732])

In [None]:
plabel.shape

torch.Size([4, 81, 8732])

In [None]:
images_sizes

[tensor([189, 600, 426, 437]), tensor([197, 227, 640, 183])]

In [None]:
img_id

tensor([404484, 263644,  43816, 412286])

In [None]:
bbox_sizes

tensor([[[0.0133, 0.0133, 0.0700, 0.0700],
         [0.0400, 0.0133, 0.0700, 0.0700],
         [0.0667, 0.0133, 0.0700, 0.0700],
         ...,
         [0.5000, 0.5000, 0.9558, 0.9558],
         [0.5000, 0.5000, 1.0000, 0.6152],
         [0.5000, 0.5000, 0.6152, 1.0000]],

        [[0.0133, 0.0133, 0.0700, 0.0700],
         [0.0400, 0.0133, 0.0700, 0.0700],
         [0.0667, 0.0133, 0.0700, 0.0700],
         ...,
         [0.5000, 0.5000, 0.9558, 0.9558],
         [0.5000, 0.5000, 1.0000, 0.6152],
         [0.5000, 0.5000, 0.6152, 1.0000]],

        [[0.0133, 0.0133, 0.0700, 0.0700],
         [0.0400, 0.0133, 0.0700, 0.0700],
         [0.0667, 0.0133, 0.0700, 0.0700],
         ...,
         [0.5000, 0.5000, 0.9558, 0.9558],
         [0.5000, 0.5000, 1.0000, 0.6152],
         [0.5000, 0.5000, 0.6152, 1.0000]],

        [[0.0133, 0.0133, 0.0700, 0.0700],
         [0.0400, 0.0133, 0.0700, 0.0700],
         [0.0667, 0.0133, 0.0700, 0.0700],
         ...,
         [0.5000, 0.5000, 0.9558, 0

In [None]:
images_sizes

[tensor([189, 600, 426, 437]), tensor([197, 227, 640, 183])]

In [None]:
data[0].shape

torch.Size([4, 3, 300, 300])

In [None]:
i[0][0][0]

tensor([[2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
        [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
        [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
        ...,
        [1.7180, 1.8037, 1.8722,  ..., 2.0777, 2.0948, 2.0948],
        [1.6324, 1.8208, 1.9064,  ..., 2.0777, 2.0948, 2.0948],
        [1.5982, 1.8379, 1.8722,  ..., 2.0948, 2.0777, 2.0777]])

In [None]:
i[0][2][0]

tensor([[ 0.7591,  0.7933,  0.8789,  ...,  1.6153,  1.5639,  1.6495],
        [ 0.7762,  0.7762,  0.8276,  ...,  1.5125,  1.4783,  1.4783],
        [ 0.1597,  0.2282,  0.2796,  ...,  1.4098,  1.3755,  1.3070],
        ...,
        [-1.7754, -1.6213, -1.7240,  ..., -1.1589, -1.1247, -1.0048],
        [-1.8782, -1.6727, -1.6384,  ..., -1.2788, -1.1075, -0.9705],
        [-1.9124, -1.8782, -1.6555,  ..., -1.2788, -1.1075, -1.1075]])

In [None]:
model = nvidia_ssd()

---loading model weights and saving to ssd folder--- 
---loading model complete--- 


In [None]:
for param in model.parameters():
    param.requires_grad = True
    
for param in model.feature_extractor.parameters():
    param.requires_grad = False

In [None]:
model.feature_extractor.parameters()

<generator object Module.parameters at 0x000002097D8A37B0>

In [None]:
datareader.transform

<utils.utils.SSDTransformer at 0x269ad2b5ca0>

In [None]:
transformers_ssd_train.img_trans

Compose(
    Resize(size=(300, 300), interpolation=bilinear, max_size=None, antialias=warn)
    ColorJitter(brightness=(0.875, 1.125), contrast=(0.5, 1.5), saturation=(0.5, 1.5), hue=(-0.05, 0.05))
    ToTensor()
)

In [None]:
torch.manual_seed(0)
np.random.seed(seed=0)


dboxes = dboxes300_coco()

encoder = Encoder(dboxes)
    #print(dboxes.dboxes.shape) # создал дефолтные bbx [8732, 4]
annotate_file = 'COCOdata\\annotations\\instances_val2017.json'
datareader = CocoDataReader(
        img_folder='COCOdata\\val2017\\',
        annotate_file=annotate_file)


In [None]:
model = nvidia_ssd(
        pretrained_default=True,
        pretrainded_custom=False,
    )

---loading model weights and saving to ssd folder--- 
---loading model complete--- 


In [None]:
configs = Configs()
torch.manual_seed(configs.random_seed)
np.random.seed(seed=configs.random_seed)
    #cocoGt =  get_coco_ground_truth()
    #print(cocoGt) # coco аннотации, пока решил сделать все как в исходнике
    
dboxes_class = dboxes300_coco()
    #print(dboxes.dboxes.shape) # создал дефолтные bbx [8732, 4]
annotate_file = 'COCOdata\\annotations\\instances_val2017.json'
datareader = CocoDataReader(
img_folder='COCOdata\\val2017\\',
annotate_file=annotate_file)
    
    #print(datareader.img_keys)
    
     #input ltrb format, output xywh format
img, img_id, size_image, bbox_sizes, bbox_labels = datareader[0]


dboxes_xywh = dboxes_class(order="xywh").unsqueeze(dim=0)
dboxes = dboxes_class("ltrb")
nboxes = dboxes.size(0)
scale_xy = dboxes_class.scale_xy
scale_wh = dboxes_class.scale_wh

72


In [None]:
ious = calc_iou_tensor(bbox_sizes, dboxes) # [n_boxes, 8732]



In [None]:
 criteria = 0.5

best_dbox_ious, best_dbox_idx  = ious.max(dim =0) # выдает номера (строк) ббох, которые набилее юлизки к якорным, массив размера 8732

best_bbox_ious, best_bbox_idx = ious.max(dim = 1) # номер (столбцов) якорных ббх, близких к переданным размер массива 19 

#best_bbox_idx = номера столбцов, в которых максимум для каждой из строк!!!! т.е. первое число - максимум в первой строке

# помещаем максимум ious =2  на те позиции строк, для которых axis =1 имеет максимум ()
best_dbox_ious.index_fill_(0, best_bbox_idx, 2.0)

idx = torch.arange(0, best_bbox_idx.size(0), dtype=torch.int64)# т.к. для каждой строки мы нашли максимум, то необходимо положить номера строк в правильном порядке
best_dbox_idx[best_bbox_idx[idx]] = idx #  максимумы для каждой из строк!!!! т.е. первое число - максимум в первой строке
masks = best_dbox_ious > criteria
labels_out = torch.zeros(nboxes, dtype=torch.long)




best_dbox_idx.shape


torch.Size([8732])

In [None]:
labelsbest_dbox_idx[best_bbox_idx[idx]]

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  5,  3,  5,  7, 16,  5,
        18])

In [None]:
best_bbox_idx 

tensor([5219, 8558, 8488, 5214, 2663, 2475, 6310, 2357, 5289, 5291,  579,  389,
        3881, 5177, 3881, 3841, 6348, 3881, 6685])

In [None]:
best_dbox_ious[best_bbox_idx]

tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2.])

In [None]:
best_dbox_idx[best_bbox_idx]

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 17, 13, 17, 15, 16, 17,
        18])

In [None]:
a = best_dbox_idx.clone().detach()

In [None]:
best_dbox_ious.shape

torch.Size([8732])

In [None]:
best_bbox_idx.unique()

tensor([ 389,  579, 2357, 2475, 2663, 3841, 3881, 5177, 5214, 5219, 5289, 5291,
        6310, 6348, 6685, 8488, 8558])

In [None]:
best_dbox_idx.shape

torch.Size([8732])