<a href="https://colab.research.google.com/github/khaledlarbi/MVA_DL_TrashDetection/blob/main/anchor_boxes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Preparation-of-the-proposed-region-to-train-the-Fast-RCNN" data-toc-modified-id="Preparation-of-the-proposed-region-to-train-the-Fast-RCNN-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Preparation of the proposed region to train the Fast RCNN</a></span></li><li><span><a href="#Region-proposal-network" data-toc-modified-id="Region-proposal-network-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Region proposal network</a></span></li><li><span><a href="#Training-RPN-network" data-toc-modified-id="Training-RPN-network-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Training RPN network</a></span></li><li><span><a href="#Coco-dataset" data-toc-modified-id="Coco-dataset-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Coco dataset</a></span></li></ul></div>

This notebook aims to provide functions that produce anchor boxes as decribed in the paper.

A box will be describe either as a numpy array $[y^-, x^-, y^+, x^+]$  or as a numpy array $[c_y, c_x, h,w]$

TODO : CHECK +1

# Anchors

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms
import matplotlib.pyplot as plt
import matplotlib.patches as patches #In order to draw the box ! (je sais pas pourquoi j'écris en anglais)
from torchvision import models
import torch.utils.data as data
from PIL import Image
import os
import os.path

In [2]:
def vertice_to_yxhw(anchor):
    res = (np.mean((anchor[0],anchor[2])),np.mean((anchor[1],anchor[3])), anchor[2] - anchor[0] + 1, anchor[3] - anchor[1]+1)
    return np.array(res)

In [3]:
def yxhw_to_vertice(anchor):
    res = (anchor[0] - anchor[2]/2, anchor[1] - anchor[3]/2, anchor[0] + anchor[2]/2, anchor[1] + anchor[3]/2)
    return np.array(res)

In [159]:
def xywh_to_vertice(anchor):
  anchor_perm = (anchor[1] + 0.5*anchor[3] ,anchor[0] + 0.5*anchor[2],anchor[3],anchor[2])
  return yxhw_to_vertice(anchor_perm)

In [4]:
def anchor_box(center, ratio, scale, shape_initial, shape_featured):
    sub_width = shape_initial[0]/shape_featured[0]
    sub_height = shape_initial[1]/shape_featured[1]
    anchor_width = sub_width*scale*np.sqrt(ratio)
    anchor_height = sub_height*scale/np.sqrt(ratio)
    
    ym = center[1] - anchor_height/2
    yp = center[1] + anchor_height/2
    xm = center[0] - anchor_width/2
    xp = center[0] + anchor_width/2
    
    anchor = np.array((ym,xm,yp,xp))
    return(anchor)

In [5]:
def list_centers(shape_initial, shape_featured):
    ratio_h = shape_initial[1]/shape_featured[1]
    ratio_w = shape_initial[0]/shape_featured[0]
    #intiail center is the center at the left top corner
    all_centers = [np.array((ratio_w/2, ratio_h/2),dtype=float) + np.array((ratio_w*i, ratio_h*j),dtype=float) for i in range(int(shape_featured[0])) for j in range(int(shape_featured[1]))]
    return(all_centers)

In [6]:
def anchor_boxes(list_ratios, list_scales, shape_initial, shape_featured):
    list_center = list_centers(shape_initial, shape_featured)
    all_anchors = [anchor_box(center, ratio, scale,shape_initial,shape_featured) for center in list_center for ratio in list_ratios
                   for scale in list_scales]
    return(all_anchors)

In [7]:
def check_anchor_inside(anchor_box, shape_initial):
    ym = anchor_box[0]
    yp = anchor_box[2]
    xm = anchor_box[1]
    xp = anchor_box[3]
    is_inside = (min(xm,xp)>0) & (max(xm,xp)<shape_initial[0]) & (max(yp,ym) < shape_initial[1]) & (min(ym,yp) > 0) 
    return(is_inside)

In [8]:
def iou(box1,box2):
    xm = max(box1[1], box2[1])
    xp = min(box1[3], box2[3])
    ym = max(box1[0], box2[0])
    yp = min(box1[2], box2[2])
    
    intersection = 0
    
    if((xm < xp) &(ym < yp)):
        intersection = (xp - xm)*(yp-ym)
    
    union = (box1[3]-box1[1])*(box1[2] - box1[0]) + (box2[3]-box2[1])*(box2[2] - box2[0]) - intersection
    return(intersection/union)

In [9]:
def iou_anchors_vs_gtbox(list_anchors, list_gt_box):
    res = np.transpose([[iou(anchor, gt_box) for anchor in list_anchors] for gt_box in list_gt_box])
    return(np.array(res))

**TODO** : changer la forme de cette fonction en utilisant que des *arrays*.

In [10]:
#Return an array with :
#for all ground truth box, the anchors which maximize the IOU with it
#for all anchor, the max of the IOU

#the first column of the array is the index and the last the IOU 
def best_anchors_from_iou(dt_anchors_vs_gtbox):
    #index highest by gtbox (cond a)
    dt_anchors_vs_gtbox.argmax(axis = 0)
    ind_argmax = np.where(dt_anchors_vs_gtbox == dt_anchors_vs_gtbox.max(axis = 0))[0]
    cond_a = dt_anchors_vs_gtbox[ind_argmax,:].max(axis = 1)
    
    #highest by anchors box (cond b)
    index = dt_anchors_vs_gtbox.argmax(axis = 1)
    iou_max = dt_anchors_vs_gtbox.max(axis = 1)
    cond_b = dt_anchors_vs_gtbox[np.arange(dt_anchors_vs_gtbox.shape[0]),index]
    
    index_res = np.concatenate((ind_argmax,np.arange(dt_anchors_vs_gtbox.shape[0])))
    res = np.concatenate((cond_a, cond_b), axis=0)
    res = np.column_stack((index_res,res))
    return(res)

In [105]:
#label_from_iou returns a np.array containing for each anchor its label. (+1 if foreground, 0 if background and -1 if not used
#during the learning phase)
#The default thresholds are defined according the original paper about Fatest RCNN.

def label_from_iou(dt_anchors_vs_gtbox,pos_threshold = 0.7, neg_threshold = 0.3):
    label = np.full(dt_anchors_vs_gtbox.shape[0],-1)
    iou_max = dt_anchors_vs_gtbox.max(axis = 1)
    #positive labels : 1
    label[iou_max > pos_threshold] = 1
    print(len(iou_max > pos_threshold))
    #negative labels : 0
    label[iou_max < neg_threshold] = 0
    sum((iou_max < neg_threshold))

    #for anchors whose maximize IOU for a given object : +1
    dt_anchors_vs_gtbox.argmax(axis = 0)
    ind_argmax = np.where(dt_anchors_vs_gtbox == dt_anchors_vs_gtbox.max(axis = 0))[0]
    label[ind_argmax] = 1
    return(label)

In [12]:
def loc(anchor_box, gt_box):
    anchor_box = vertice_to_yxhw(anchor_box)
    gt_box = vertice_to_yxhw(gt_box)
    
    y = (gt_box[0] - anchor_box[0])/anchor_box[2]
    x = (gt_box[1] - anchor_box[1])/anchor_box[3]
    w = np.log(gt_box[3]/anchor_box[3])
    h = np.log(gt_box[2]/anchor_box[2])
    
    return np.array((y,x,h,w))

In [13]:
def deloc(anchor_box, reparam_box):
    anchor_box = vertice_to_yxhw(anchor_box)
    y = anchor_box[0] + (reparam_box[0] * anchor_box[2])
    x = anchor_box[1] + (reparam_box[1] * anchor_box[3])
    h = np.exp(reparam_box[2])*anchor_box[2]
    w = np.exp(reparam_box[3])*anchor_box[3]
    return np.array((y,x,h,w))

In [205]:
def reparam_all_anchors(list_anchors, list_gt_box,pos_threshold = 0.7, neg_threshold = 0.3):
    iou = iou_anchors_vs_gtbox(list_anchors, list_gt_box)
    index_max_gtbox = iou.argmax(axis = 1)
    gt_box_by_anchors = [list_gt_box[i] for i in index_max_gtbox]
    #suboptimal
    res = [loc(anchor, gt_box) for anchor,gt_box in zip(list_anchors, gt_box_by_anchors)] 
    #compute labels
    labels = label_from_iou(iou, pos_threshold, neg_threshold)
    return res,labels

In [15]:
def deparam_all_anchors(list_anchors, list_box_param):
    res = [(deloc(anchor, box_param)) for anchor,box_param in zip(list_anchors, list_box_param)]
    return res

In [110]:
#TODO : heck how to fill when

def index_training_proposal(dt_anchors_vs_gtbox, nsize = 256, pos_ratio = 0.5,pos_threshold = 0.7, neg_threshold = 0.3):
    #number of positive units we need to reach in the training sample (we want a balanced sample)
    nb_pos_to_drawn = round(nsize*pos_ratio)
    lab = label_from_iou(dt_anchors_vs_gtbox, pos_threshold, neg_threshold)
    pos_lab = np.where(lab == 1)[0]
    neg_lab = np.where(lab == 0)[0]
    pos = len(pos_lab)
    neg = len(neg_lab)
    
    if (pos > nb_pos_to_drawn):
        disabled_index_pos = np.random.choice(pos_lab, size=(pos - nb_pos_to_drawn), replace = False)
        lab[disabled_index_pos] = -1
    
    if (neg > nsize - nb_pos_to_drawn):
        if(pos < nb_pos_to_drawn):
            disabled_index_neg = np.random.choice(neg_lab, size=(neg - nsize + pos), replace = False)
        else:
            disabled_index_neg = np.random.choice(neg_lab, size=(neg + nb_pos_to_drawn - nsize), replace = False)
        
        lab[disabled_index_neg] = -1    
    
    res = np.where((lab == 0) | (lab == 1))[0]
    print(dt_anchors_vs_gtbox.max(axis = 1))
    return res

In [90]:
#TODO : heck how to fill when

def batch_training_proposal_RPN(image, feature_shape, ratios, scales,gt_box,nsize = 256, pos_ratio = 0.5, pos_threshold = 0.7, neg_threshold = 0.3):
    #define all anchors using the feature map and the initial picture shapes.
    anchors_boxes = anchor_boxes(ratios, scales, tuple(image.shape[2:]), tuple(feature_shape[2:]))
    #check if each box is inside the initial image
    anchors_boxes = [box for box in anchors_boxes if check_anchor_inside(box, image_torch.shape[2:])]
    #IOU anchors vs gt box
    iou_anc_gt_box = iou_anchors_vs_gtbox(anchors_boxes, gt_box)
    #Index of the units we keep
    ind_for_sample = index_training_proposal(iou_anc_gt_box,nsize, pos_ratio)
    anchors_boxes_reparam,lab_anchors = reparam_all_anchors(anchors_boxes,gt_box,pos_threshold, neg_threshold)
    return ({"image" : image, "boxes" : torch.from_numpy(np.array(anchors_boxes_reparam)[ind_for_sample,:]),
             "labels" : torch.from_numpy(lab_anchors[ind_for_sample])})

In [18]:
image_torch = 800*torch.rand((1,3,800,800))
feature_torch = torch.rand([1,512,50,50])

ratio = [0.5, 1, 2]
anchor_scales = [8, 16, 32]

gt_box = [np.array([20, 30, 400, 500]), np.array([300, 400, 500, 600])]
labels_gt_box = np.array(("chien","chat"))

In [19]:
batch_rpn = batch_training_proposal_RPN(image_torch, feature_torch, ratio, anchor_scales, gt_box,256,0.5,0.7,0.3)

# Preparation of the proposed region to train the Fast RCNN

In [20]:
def clip_predicted_boxes(list_box, th_min, th_max):
    list_box = np.array(list_box)
    return list(np.clip(list_box,th_min,th_max))

In [21]:
#remove all boxes with at least the width or the height less that 16
def boxes_hw_min(list_box, list_score, min_size = 16):
    boxes = np.array(list_box)
    height = boxes[:, 2] - boxes[:, 0]
    width = boxes[:, 3] - boxes[:, 1]
    box_kept = np.where((height > min_size) & (width > min_size))[0]
    list_box_kept = [list_box[j] for j in box_kept]
    list_score = [list_score[j] for j in box_kept]
    return list_box_kept, list_score

In [22]:
def nms(list_box, list_score, top_pre, top_post, thresold):
    list_score = np.array(list_score)
    order = list_score.argsort()[::-1]
    order = order[:top_pre]
    keep = []
    list_box = np.array(list_box)
    
    ym = list_box[:,0]
    xm = list_box[:,1]
    yp = list_box[:,2]
    xp = list_box[:,3]
    areas = (xp - xm + 1) * (yp - ym + 1)

    while len(order)>0:
        i = order[0]
        yym = np.maximum(ym[i], ym[order[1:]])
        xxm = np.maximum(xm[i], xm[order[1:]])
        yyp = np.minimum(yp[i], yp[order[1:]])
        xxp = np.minimum(xp[i], xp[order[1:]])
        
        width = np.maximum(0.0, xxp - xxm + 1)
        height = np.maximum(0.0, yyp - yym + 1)
        intersection = width*height
        ovr = intersection/(areas[i] + areas[order[1:]] - intersection)
        
        ind_to_keep = np.where(ovr <= thresold)[0]
        order = order[ind_to_keep + 1]
        keep.append(i)
    
    keep = keep[:top_post]
    return(list_box[keep,:])

# Region proposal network

In [23]:
#boxes as tensor [N, 5]
def roi_pooling(boxes, feature_map,scale,adaptative_max_pool):
    boxes_coord = boxes[:,1:].mul(scale).long() #scale + round
    res = [feature_map.narrow(0, boxes[i,0].int(),1)[..., boxes_coord[i,1]:(boxes_coord[i,3]+1), boxes_coord[i,0]:(boxes_coord[i,2]+1)] for i in range(boxes_coord.shape[0])]
    res = [adaptative_max_pool(element) for element in res]
    res = torch.cat(res, axis = 0)
    return(res)

In [24]:
#0 in labels_gt_box must be the background
def batch_training_proposal_FastRCNN(feature_map,list_box,list_gt_box,labels_gt_box, nsize = 128, pos_ratio = 0.25, pos_iou_threshold = 0.5,
                                    neg_iou_threshold_p = 0.5, neg_iou_threshold_n = 0.0, adaptative_max_pool = torch.nn.AdaptiveMaxPool2d((7,7),return_indices=False),scale = 1):
    #compute iou between each pair
    dt_anchors_vs_gtbox = iou_anchors_vs_gtbox(list_box,list_gt_box)
    
    #number of positive units we need to reach in the training sample (we want a balanced sample)
    nb_pos_to_drawn = round(nsize*pos_ratio)
    iou = iou_anchors_vs_gtbox(roi_pred, gt_box)
    #compute the maximum for each anchor
    gt_roi_label = np.argmax(iou, axis = 1)
    gt_roi_max = np.max(iou, axis = 1)
    labels = labels_gt_box[gt_roi_label]
    
    #assign the label if greater that pos_iou_threshold
    #assign background if between the two negative thresholds
    gt_pos = np.where((gt_roi_max > pos_iou_threshold))[0]
    gt_neg = np.where((gt_roi_max < neg_iou_threshold_p) & (gt_roi_max > neg_iou_threshold_n))[0] #background -- 0

    #Nb of positives and negatives boxes get using the thresholds
    pos = len(gt_pos)
    neg = len(gt_neg)
    
    #Subsampling from it
    if (pos > nb_pos_to_drawn):
        disabled_index_pos = np.random.choice(range(len(gt_pos)), size=(pos - nb_pos_to_drawn), replace = False)
        gt_pos = np.delete(gt_pos, disabled_index_pos)
    
    if (neg > nsize - nb_pos_to_drawn):
        if(pos < nb_pos_to_drawn):
            disabled_index_neg = np.random.choice(range(len(gt_neg)), size=(neg - nsize + pos), replace = False)
            gt_neg = np.delete(gt_neg, disabled_index_neg)
        else:
            disabled_index_neg = np.random.choice(range(len(gt_neg)), size=(neg + nb_pos_to_drawn - nsize), replace = False)
            gt_neg = np.delete(gt_neg, disabled_index_neg)
        
    
    #if negative : assign background labels with it's "0"
    labels[gt_neg] = "0"
    final_index = np.append(gt_pos,gt_neg)
    
    #Non reparams
    non_reparam = np.array(list_box)[final_index,:]
    #Need to transform from yxhw to xywh
    non_reparam = non_reparam[:,(1,0,3,2)]
    non_reparam = np.hstack((np.zeros((non_reparam.shape[0],1)), non_reparam))
    non_reparam = torch.from_numpy(non_reparam)
    data_for_training =roi_pooling(non_reparam, feature_map,scale, adaptive_max_pool)
    #Reparams
    reparam = [loc(box,gt_box) for box,gt_box in zip(non_reparam, list(np.array(list_gt_box)[gt_roi_label,:]))]
    
    return  data_for_training, reparam,labels[final_index]

In [25]:
#data_x, box_y, label_y = batch_training_proposal_FastRCNN(test,roi_pred,gt_box,labels_gt_box, nsize = 128, pos_ratio = 0.25, pos_iou_threshold = 0.5,
#                                    neg_iou_threshold_p = 0.5, neg_iou_threshold_n = 0.0,adaptative_max_pool = torch.nn.AdaptiveMaxPool2d((7,7),return_indices=False),
#                                 scale = 1/16.)

# Training RPN network

In [26]:
from torchvision import models

resnet50 = models.resnet50(pretrained=True)
image_torch = 800*torch.rand((1,3,800,800))
#We choose the place where we extracted the feature map in order to get H_feature * W_feature around 2400 (papers)
resnet50_features = nn.Sequential(*(list(resnet50.children())[:-5]))
resnet50_features(image_torch).shape

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

https://stackoverflow.com/questions/69480764/what-is-the-difference-between-resnet50-vgg16-etc-and-rcnn-faster-rcnn


In [30]:
class RPN(nn.Module):
    #TODO : remove embedding_dim using wv.shape[1]
    #define all the layers used in model
    def __init__(self,mid_channels, in_channels,nb_anchors,pre_trained_model):
        
        #Constructor
        super().__init__()        
        
        #embedding layer
        self.pre_trained_model = pre_trained_model
        self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv1.bias.data.zero_()
        
        self.reg_layer = nn.Conv2d(mid_channels, nb_anchors *4, 1, 1, 0)
        self.reg_layer.weight.data.normal_(0, 0.01)
        self.reg_layer.bias.data.zero_()

        self.cls_layer = nn.Conv2d(mid_channels, nb_anchors *2, 1, 1, 0)
        # classification layer
        self.cls_layer.weight.data.normal_(0, 0.01)
        self.cls_layer.bias.data.zero_()
       

    def forward(self,img):
        x = self.pre_trained_model(img)
        x = self.conv1(x)
        pred_anchor = self.reg_layer(x)
        pred_anchor = pred_anchor.permute(0, 2, 3, 1).contiguous().view(1, -1, 4)
        
        pred_cls = self.cls_layer(x)
        pred_cls = pred_cls.permute(0, 2, 3, 1).contiguous()
        pred_cls = pred_cls.view(1, -1, 2)

        return pred_anchor, pred_cls

In [31]:
model = RPN(256,256,9,resnet50_features)
image_torch = 800*torch.rand((1,3,224,224)) #scale picture ? mean ? 
res = model(image_torch)

print(res[0].shape)
print(res[1].shape)

torch.Size([1, 28224, 4])
torch.Size([1, 28224, 2])


# Coco dataset

### Load train dataset 

J'ai réussi à utiliser l'API Coco via `torchvision.datasets.CocoDetection` (https://pytorch.org/vision/stable/datasets.html)


In [None]:
#Permet d'utiliser la co des serveurs Google (rip la mienne) et assure une meilleure reproductibilité
!wget https://conservancy.umn.edu/bitstream/handle/11299/214865/dataset.zip?sequence=12&isAllowed=y
!unzip /content/dataset.zip?sequence=12

In [33]:
from torchvision import datasets
from torch.utils.data import DataLoader

In [34]:
# The directory containing the source images
data_path = "dataset/instance_version/train"

# The path to the COCO labels JSON file
labels_path = "dataset/instance_version/instances_train_trashcan.json"

#### Version 4 - resize des images, en ne gardant que les bbox et category_id des targets normalisées, dans un array de dictionnaires (targets de taille variables)

In [50]:
#Attention "bbox": [x,y,width,height]
class CocoDetection_diy_bis(data.Dataset) :
    """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.

    Args:
        root (string): Root directory where images are downloaded to.
        annFile (string): Path to json annotation file.
        resize : (int,int) size of the images wanted 
    """

    def __init__(self, root, annFile, size):
        from pycocotools.coco import COCO
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(self.coco.imgs.keys())
        self.size = size
        self.transform = transforms.Compose([transforms.Resize(size), transforms.ToTensor()])

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
        """
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        target = coco.loadAnns(ann_ids)

        path = coco.loadImgs(img_id)[0]['file_name']

        # Resize des images :
        img = Image.open(os.path.join(self.root, path)).convert('RGB')
        original_size = img.size
        img = self.transform(img)

        # Targets dict :
        targets = {'labels':[],'boxes':[]}

        for elem in target :  
          box = elem['bbox']
          box[0] *= self.size[0] / original_size[0]
          box[1] *= self.size[1] / original_size[1]
          box[2] *= self.size[0] /original_size[0]
          box[3] *= self.size[1] /  original_size[1]
          targets['boxes'].append(box)
          targets['labels'].append(elem['category_id'])

        return img, targets
        


    def __len__(self):
        return len(self.ids)

    def __repr__(self):
        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        fmt_str += '    Root Location: {}\n'.format(self.root)
        tmp = '    Transforms (if any): '
        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        tmp = '    Target Transforms (if any): '
        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
        return fmt_str

In [126]:
def collate_fn_diy (batch) : 
    """
    Parameters : 
    -----------
    batch : list of tuples (img,targets)

    Return : 
    -------
    images : tensor of dim batch_size x 3 x 224 x 224
    targets : list of dict containing : 
        - "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth objects in the target) containing the class labels
        - "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
    """
    imgs, trgts = list(zip(*batch)) # imgs et trgts sont désormais des batch_size-tuples 

    imgs = [img.unsqueeze(0) for img in list(imgs)] #ajout d'une dimension supplémentaire à tous les tenseurs
    images = torch.cat(imgs) # concaténation en un seul tenseur

    targets = []
    for t in list(trgts) : 
      targets.append({'labels' : torch.from_numpy(np.array(t["labels"])), 
                      'boxes' : torch.from_numpy(np.array(t["boxes"]))})
    
    return images, targets

In [127]:
instances_train_4 = CocoDetection_diy_bis(root = data_path, annFile = labels_path, size=(224,224))

loading annotations into memory...
Done (t=0.31s)
creating index...
index created!


In [128]:
# Format DataLoader :
instances_train_dataloader_4 = DataLoader(instances_train_4, batch_size=1, shuffle=True, collate_fn = collate_fn_diy)

In [129]:
test_img_dataload = next(iter(instances_train_dataloader_4))

In [130]:
print(test_img_dataload)

(tensor([[[[0.1647, 0.1647, 0.1725,  ..., 0.0745, 0.0745, 0.0745],
          [0.1765, 0.1765, 0.1804,  ..., 0.0784, 0.0784, 0.0784],
          [0.1882, 0.1843, 0.1725,  ..., 0.0784, 0.0784, 0.0784],
          ...,
          [0.1922, 0.1765, 0.1922,  ..., 0.8000, 0.6980, 0.7137],
          [0.1725, 0.1882, 0.2118,  ..., 0.7647, 0.6353, 0.7137],
          [0.1490, 0.1961, 0.2314,  ..., 0.6941, 0.6588, 0.7098]],

         [[0.1765, 0.1765, 0.1843,  ..., 0.0863, 0.0863, 0.0863],
          [0.1882, 0.1882, 0.1922,  ..., 0.0902, 0.0902, 0.0902],
          [0.2000, 0.1961, 0.1843,  ..., 0.0902, 0.0902, 0.0902],
          ...,
          [0.2471, 0.2196, 0.2196,  ..., 0.8196, 0.7176, 0.7333],
          [0.2039, 0.2118, 0.2196,  ..., 0.7608, 0.6314, 0.7137],
          [0.1569, 0.1961, 0.2157,  ..., 0.6627, 0.6235, 0.6745]],

         [[0.1961, 0.1961, 0.2039,  ..., 0.1059, 0.1059, 0.1059],
          [0.2078, 0.2078, 0.2118,  ..., 0.1098, 0.1098, 0.1098],
          [0.2196, 0.2157, 0.2039,  ..., 

In [64]:
resnet50_features = nn.Sequential(*(list(resnet50.children())[:-5]))
feature_shape = resnet50_features(image_torch).shape

In [111]:
batch_rpn = batch_training_proposal_RPN(image_torch, feature_shape, ratio, anchor_scales, gt_box,256,0.5,1,0.3)

12552
[0.         0.         0.         ... 0.00573348 0.00573348 0.00573348]
12552


In [131]:
image_torch, dict_label = test_img_dataload

In [142]:
gt_box = list(np.array(dict_label[0]['boxes']))
gt_box

[array([9.15032680e-04, 1.34401220e+02, 4.10648366e+01, 9.02197821e+01]),
 array([156.33481013,  77.78235294,  25.66398712,  26.12638328])]

In [160]:
gt_box_good_format = [xywh_to_vertice(element) for element in gt_box]

In [134]:
image_torch

tensor([[[[0.1647, 0.1647, 0.1725,  ..., 0.0745, 0.0745, 0.0745],
          [0.1765, 0.1765, 0.1804,  ..., 0.0784, 0.0784, 0.0784],
          [0.1882, 0.1843, 0.1725,  ..., 0.0784, 0.0784, 0.0784],
          ...,
          [0.1922, 0.1765, 0.1922,  ..., 0.8000, 0.6980, 0.7137],
          [0.1725, 0.1882, 0.2118,  ..., 0.7647, 0.6353, 0.7137],
          [0.1490, 0.1961, 0.2314,  ..., 0.6941, 0.6588, 0.7098]],

         [[0.1765, 0.1765, 0.1843,  ..., 0.0863, 0.0863, 0.0863],
          [0.1882, 0.1882, 0.1922,  ..., 0.0902, 0.0902, 0.0902],
          [0.2000, 0.1961, 0.1843,  ..., 0.0902, 0.0902, 0.0902],
          ...,
          [0.2471, 0.2196, 0.2196,  ..., 0.8196, 0.7176, 0.7333],
          [0.2039, 0.2118, 0.2196,  ..., 0.7608, 0.6314, 0.7137],
          [0.1569, 0.1961, 0.2157,  ..., 0.6627, 0.6235, 0.6745]],

         [[0.1961, 0.1961, 0.2039,  ..., 0.1059, 0.1059, 0.1059],
          [0.2078, 0.2078, 0.2118,  ..., 0.1098, 0.1098, 0.1098],
          [0.2196, 0.2157, 0.2039,  ..., 0

In [162]:
batch_training_proposal_RPN(image_torch, feature_shape, ratio, anchor_scales, gt_box_good_format,256,0.5,0.7,0.3)

12552
[0. 0. 0. ... 0. 0. 0.]
12552


{'boxes': tensor([[ 1.5893,  0.2765,  0.6791,  0.5768],
         [ 1.8640,  0.0768,  1.0168,  0.2427],
         [ 0.1670, -0.0444,  1.0168,  0.2427],
         ...,
         [ 3.4397, -5.6202,  1.0168,  0.2427],
         [ 1.8487, -7.8496,  0.6791,  0.5768],
         [-0.0538, -7.8496,  0.6791,  0.5768]], dtype=torch.float64),
 'image': tensor([[[[0.1647, 0.1647, 0.1725,  ..., 0.0745, 0.0745, 0.0745],
           [0.1765, 0.1765, 0.1804,  ..., 0.0784, 0.0784, 0.0784],
           [0.1882, 0.1843, 0.1725,  ..., 0.0784, 0.0784, 0.0784],
           ...,
           [0.1922, 0.1765, 0.1922,  ..., 0.8000, 0.6980, 0.7137],
           [0.1725, 0.1882, 0.2118,  ..., 0.7647, 0.6353, 0.7137],
           [0.1490, 0.1961, 0.2314,  ..., 0.6941, 0.6588, 0.7098]],
 
          [[0.1765, 0.1765, 0.1843,  ..., 0.0863, 0.0863, 0.0863],
           [0.1882, 0.1882, 0.1922,  ..., 0.0902, 0.0902, 0.0902],
           [0.2000, 0.1961, 0.1843,  ..., 0.0902, 0.0902, 0.0902],
           ...,
           [0.2471, 0.219

In [161]:
gt_box_good_format

[array([1.34401220e+02, 9.15032680e-04, 2.24621002e+02, 4.10657516e+01]),
 array([ 77.78235294, 156.33481013, 103.90873622, 181.99879725])]

In [206]:
 ab = anchor_boxes(ratio, [2,4,8], tuple(image_torch.shape[2:]), tuple(feature_shape[2:]))
 ab = [box for box in ab if check_anchor_inside(box, image_torch.shape[2:])]
lab = label_from_iou(iou_anchors_vs_gtbox(ab,
                     gt_box_good_format))

23556


In [196]:
index_training_proposal(iou_anchors_vs_gtbox(ab,
                     gt_box_good_format),256,0.5)

23556
[0. 0. 0. ... 0. 0. 0.]


array([   59,   338,   349,   614,   648,   730,   736,   743,   750,
         757,   764,   771,   778,   785,   792,   799,   806,   955,
         979,   985,  1085,  1133,  1141,  1149,  1157,  1165,  1173,
        1181,  1189,  1193,  1197,  1205,  1213,  1252,  1293,  1441,
        1543,  1551,  1559,  1567,  1575,  1583,  1587,  1591,  1599,
        1607,  1615,  1623,  1706,  1772,  1826,  1943,  1989,  1998,
        2007,  2016,  2025,  2034,  2043,  2052,  2057,  2061,  2070,
        2079,  2116,  2125,  2126,  2168,  2375,  2382,  2475,  2563,
        2612,  2614,  2646,  2721,  3028,  3078,  3127,  3280,  3295,
        3744,  3881,  3914,  4137,  4201,  4290,  4378,  4423,  4484,
        4683,  4704,  4907,  4910,  4944,  5050,  5443,  5723,  5856,
        5950,  6264,  6670,  6777,  6786,  6917,  6973,  7149,  7230,
        7374,  7501,  7552,  7721,  7763,  7864,  8043,  8058,  8095,
        8134,  8159,  8191,  8230,  8365,  8529,  8727,  8778,  8784,
        8868,  9023,

In [214]:
_,lab = reparam_all_anchors(ab,gt_box_good_format,0.7, 0.3)

23556


In [213]:
np.where((lab == 1))

(array([  736,   743,   750,   757,   764,   771,   778,   785,   792,
          799,   806,  1133,  1141,  1149,  1157,  1165,  1173,  1181,
         1189,  1197,  1205,  1213,  1543,  1551,  1559,  1567,  1575,
         1583,  1591,  1599,  1607,  1615,  1623,  1989,  1998,  2007,
         2016,  2025,  2034,  2043,  2052,  2061,  2070,  2079, 17479,
        17482, 17930, 17939, 17942, 17948, 18378, 18387, 18390, 18396,
        18399, 18402, 18405, 18408, 18414, 18850, 18859, 18862, 18868,
        19322]),)