In [2]:
import PIL
import numpy as np 
import matplotlib.pyplot as plt 
import os
import pandas as pd 
import torch 
import torchvision 
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import torch.nn as nn
from collections import OrderedDict

## Importation

In [3]:

df=pd.read_csv("train_ship_segmentations_v2.csv", index_col=0)
df['label']=df["EncodedPixels"].isna().apply(np.invert)*1
aggregation={'EncodedPixels':lambda rle_codes: ' '.join(map(lambda x:str(x),rle_codes)),'label':sum}
DF=df.groupby("ImageId").agg(aggregation)
DF=DF.reset_index()
display(DF[DF['label']>1].head(15))


trainfiles=os.listdir('train_v2')
load_img = lambda filename: np.array(PIL.Image.open(f"train_v2/{filename}"))

Unnamed: 0,ImageId,EncodedPixels,label
3,000194a2d.jpg,360486 1 361252 4 362019 5 362785 8 363552 10 ...,5
5,00021ddc3.jpg,108287 1 109054 3 109821 4 110588 5 111356 5 1...,9
6,0002756f7.jpg,255784 2 256552 4 257319 7 258087 9 258854 12 ...,2
11,000532683.jpg,458957 14 459725 14 460493 14 461261 14 462029...,2
15,0005d01c8.jpg,56010 1 56777 3 57544 6 58312 7 59079 9 59846 ...,2
45,00113a75c.jpg,401790 1 402557 3 403325 5 404092 7 404859 9 4...,7
68,0017c19d6.jpg,329228 1 329995 3 330762 4 331529 6 332296 8 3...,2
81,001aee007.jpg,496304 1 497071 3 497838 5 498605 7 499372 9 5...,4
117,002943412.jpg,300511 1 301277 4 302043 6 302809 9 303576 11 ...,3
133,002c78530.jpg,258116 2 258884 4 259651 7 260419 6 261189 4 2...,2


## Fonctions pour le labelling 

In [4]:
import os
import cv2
from tqdm import tqdm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread

from skimage.segmentation import mark_boundaries
from skimage.measure import label, regionprops

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def masks_as_image(in_mask_list, all_masks=None):
    # Take the individual ship masks and create a single mask array for all ships
    if all_masks is None:
        all_masks = np.zeros((768, 768), dtype = np.int16)
    #if isinstance(in_mask_list, list):
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += rle_decode(mask)
    return np.expand_dims(all_masks, -1)


def bounding_box(rle_0):
    #rle_0 =DF['EncodedPixels'].iloc[92]
    mask_0 = rle_decode_bbox(rle_0)
    lbl_0 = label(mask_0) 
    props = regionprops(lbl_0)
    box=props[0].bbox
    return box

In [10]:
def IOU(bb1,bb2):    
    assert bb1[1] < bb1[3]
    assert bb1[0] < bb1[2]
    assert bb2[1] < bb2[3]
    assert bb2[0] < bb2[2]

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1[1], bb2[1])
    y_top = max(bb1[0], bb2[0])
    x_right = min(bb1[3], bb2[3])
    y_bottom = min(bb1[2], bb2[2])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1[3] - bb1[1]) * (bb1[2] - bb1[0])
    bb2_area = (bb2[3] - bb2[1]) * (bb2[2] - bb2[0])

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou


#chaine='003b48a9e.jpg'
#bx=bounding_box(DF[DF["ImageId"]==chaine]['EncodedPixels'].iloc[0])
#bx

## Labelling-- Mettre les données en forme pour le forward du RPN

In [11]:
 ## Build anchors for all scales 
def labelling (ImageId,image,scale,rle):
   scales={192:4,96:8,48:16,24:32}
   fe_size=768//scales[scale]
   index=0
   ctr=np.zeros((scale*scale,2))
   ctr_x = np.arange(scales[scale], (fe_size+1) * scales[scale], scales[scale])
   ctr_y = np.arange(scales[scale], (fe_size+1) * scales[scale], scales[scale])
   for x in range(len(ctr_x)):
     for y in range(len(ctr_y)):
       ctr[index,1]=ctr_x[x]-scales[scale]/2
       ctr[index,0]=ctr_y[y]-scales[scale]/2
       index+=1
   anchors_boxes=np.zeros((scale*scale*3,4))
   ratios=[0.5,1,2]
   sub_sample=scales[scale]                     ######### Not sure 
   index=0
   for c in ctr:
     ctr_y,ctr_x=c
     for i in range(len(ratios)):
       h = sub_sample * np.sqrt(ratios[i])*scales[scale]
       w = sub_sample * np.sqrt(1./ ratios[i])*scales[scale]
       anchors_boxes[index, 0] = ctr_y - h / 2.
       anchors_boxes[index, 1] = ctr_x - w / 2.
       anchors_boxes[index, 2] = ctr_y + h / 2.
       anchors_boxes[index, 3] = ctr_x + w / 2.
       index += 1
    
   cls_labels=[]
   for anchor in anchors_boxes:
        #cls_labels=[Max_IOU_box(ImageId,anchor)[0] for anchor in anchors_boxes ]
        temp=Max_IOU_box(ImageId,anchor)
        
        if temp[0] >=0.7:
            anchor=temp[1]   ###  for the regressor, we replace the anchor box by the bounding_box of the object
            cls_labels.append(1)
        elif temp[0]<0.3:
            cls_labels.append(-1)
        else:
            cls_labels.append(0)
        #cls_labels=np.array(cls_labels)
        #cls_labels[cls_labels>=0.7]=1
        #cls_labels[cls_labels<0.3]=-1
        #cls_labels[np.abs(cls_labels) !=1]=0
   return (torch.Tensor(image),torch.Tensor(anchors_boxes),torch.Tensor(cls_labels))     ### We are not trying to predict exactly the bounding box but some approximation 


In [12]:
import os
import cv2
from tqdm import tqdm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread

from skimage.segmentation import mark_boundaries
from skimage.measure import label, regionprops

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def masks_as_image(in_mask_list, all_masks=None):
    # Take the individual ship masks and create a single mask array for all ships
    if all_masks is None:
        all_masks = np.zeros((768, 768), dtype = np.int16)
    #if isinstance(in_mask_list, list):
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += rle_decode(mask)
    return np.expand_dims(all_masks, -1)


def bounding_box(ImageId):
    rle_0 =DF[DF['ImageId']==ImageId]['EncodedPixels'].iloc[0]
    mask_0 = rle_decode(rle_0)
    lbl_0 = label(mask_0) 
    props = regionprops(lbl_0)
    box=[]
    for prop in props:
        box.append(prop.bbox)
    return box

def Max_IOU_box(ImageId,anchor):
    '''return the max of the IOU between the anchor and the box within the Image
     and the index of this box'''
    temp=bounding_box(ImageId)
    temp2=[IOU(anchor,i) for i in temp]
    temp2=np.array(temp2)
    return (max(temp2),np.argmax(temp2))
    
#Max_IOU_box('003b48a9e.jpg',(716, 379, 726, 387))

#df[df.index=='003b48a9e.jpg']

## Téléchargement des données avec labelling à chaque fois 

In [53]:
DF.index
with_boat=list(DF[DF['label']!=0]["ImageId"].iloc[:])
trdata = []
Testdata = []
og = 768
s = 224
counter1 = 0
counter2=0
nber_files=50

  
for filename in with_boat[:nber_files]:
    im = load_img(filename)
    
    if df.query('ImageId=="'+filename+'"')['label'][0] >=1:
        trdata.append(labelling(filename,torch.Tensor(im).permute(2,1,0),48,DF[DF['ImageId']==filename]['EncodedPixels'].iloc[0]))
        
    if (counter1%5==0):
        filename2=with_boat[nber_files+counter1]
        am=load_img(filename2)
        Testdata.append(labelling(filename2,torch.Tensor(im).permute(2,1,0),48,DF[DF['ImageId']==with_boat[nber_files+counter1]]['EncodedPixels'].iloc[0]))
        print(counter1, '/ 100')
    counter1+=1

0 / 100
5 / 100
10 / 100
15 / 100
20 / 100
25 / 100
30 / 100
35 / 100
40 / 100
45 / 100


In [54]:
trainloader=torch.utils.data.DataLoader(trdata,3)
testloader=torch.utils.data.DataLoader(Testdata,3)

## Construction du modèle

In [15]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

## Construction du backbone première partie -- Attention Mechanism and Spatial Mechanism

In [16]:
## We are right after ResNet
## we are impleting fpn with bottom-up augmented path
## and CA or SA mechanism

class CA_SA(torch.nn.Module):
  def __init__(self,shape,batch=1,channel=256):
    super(CA_SA,self).__init__()
    self.channel=channel
    self.batch=batch
    self.shape=shape
    self.residual=nn.Sequential(nn.Conv2d(self.channel,self.channel,kernel_size=3,padding=1,stride=1),nn.ReLU(),nn.Conv2d(self.channel,self.channel,kernel_size=3,padding=1,stride=1))
    self.CA=nn.Sequential(nn.AvgPool2d(kernel_size=self.shape),
                          nn.Conv2d(self.channel,self.channel,kernel_size=1),
                          nn.ReLU(),
                          nn.Conv2d(self.channel,self.channel,kernel_size=1),
                          nn.Softmax(dim=1))
    self.SA=nn.Sequential(nn.Conv2d(self.channel,1,kernel_size=3,stride=1,padding=1),
                          nn.ReLU(),
                          nn.Conv2d(1,1,kernel_size=3,padding=1)
                          )
    ## We put the Softmax layer out :
    self.actv=nn.Softmax(dim=2)
    ## concatenate before this layer : torch.cat()
    self.final_layer=nn.Conv2d(self.channel,self.channel,kernel_size=3,padding=1)
    self.alpha=torch.ones((self.channel,1,1))
    self.beta=torch.ones(self.shape)
    self.conv=nn.Conv2d(2*self.channel,self.channel,kernel_size=3,padding=1)

  def forward(self,inputs):
    inputs=self.residual(inputs)
    self.alpha=self.CA(inputs) #.view(self.batch,self.channel,1,1)   
    ## multiplicate each channel by the corresponding component on alpha:
    A=inputs*self.alpha
    self.beta=self.SA(inputs)
    self.beta=self.actv(self.beta)                ##### view_as à supp
    
    ## multiplicate each pixel by his corresponding coefficient in beta
    B= inputs*self.beta
   
    ## concatenate 
    temp=torch.cat((A,B),dim=1)
    #print("TEST", temp.size())
    temp=self.conv(temp)
    return temp+inputs

## Checking ok 

## Construction du backbone à proprement parler

In [17]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
## building  of fpn structrure :
class backbone(nn.Module):
  def __init__(self,batch):
    super().__init__()
    ## We use resnet from mask_r_cnn pytorch's model
    self.batch=batch
    self.conv=model.backbone.body
    self.first_conv=nn.Conv2d(2048,256,kernel_size=1)
    self.up=nn.Upsample(scale_factor=2)
    self.top_down=nn.ModuleList([nn.Conv2d(1024,256,kernel_size=1),
                             nn.Conv2d(512,256,kernel_size=1),
                             nn.Conv2d(256,256,kernel_size=1)])
    self.AM1=nn.ModuleList([CA_SA(batch=self.batch,shape=(48,48)),
                          CA_SA(batch=self.batch,shape=(96,96)),
                          CA_SA(batch=self.batch,shape=(192,192))])
    
    self.down_top=nn.ModuleList([nn.Conv2d(256,256,kernel_size=3,padding=1,stride=2),
                                nn.Conv2d(256,256,kernel_size=3,padding=1,stride=2),
                                nn.Conv2d(256,256,kernel_size=3,padding=1,stride=2)])
    
    self.AM2=nn.ModuleList([CA_SA(batch=self.batch,shape=(96,96)),
                           CA_SA(batch=self.batch,shape=(48,48)),
                           CA_SA(batch=self.batch,shape=(24,24))])
    
  def forward(self,X):
    resulta=[]
    resultb=[]
    TEMP=self.conv(X)
    temp=TEMP['3']
    temp=self.first_conv(temp)
    resulta.append(("P5",temp))  ## Check if the value stay at it is or change after further modification
    temp=self.up(temp)
    index1=['2','1','0']
    index2=['P4','P3','P2']
    for i in range(0,len(self.top_down)):
      
      temp=temp+self.top_down[i](TEMP[index1[i]])
      temp=self.AM1[i](temp)
      
      resulta.append((index2[i],temp))
      temp=self.up(temp)
      
    resulta=OrderedDict(resulta)
    ## augmented path:
    index2=['P3','P4','P5']
    index3= ['N3','N4','N5']
    temp=resulta['P2']
    resultb.append(('N2',temp))
    for i in range(len(self.down_top)):
      temp=self.down_top[i](temp)
      #print("TEMP  ",temp.size())
      temp=temp+resulta[index2[i]]
      temp=self.AM2[i](temp)
      resultb.append((index3[i],temp))
    result=OrderedDict(resultb)
    return result



In [306]:
index=torch.topk(INPUT[2],1,dim=0)[1]
#INPUT[1][index]
torch.max(INPUT[2])

tensor(-1.)

## BLOC POUR LES LOSS

In [18]:
import math 
import torch.nn.functional as F
class SamePad2d(nn.Module):
    """Mimics tensorflow's 'SAME' padding.
    """

    def __init__(self, kernel_size, stride):
        super(SamePad2d, self).__init__()
        self.kernel_size = torch.nn.modules.utils._pair(kernel_size)
        self.stride = torch.nn.modules.utils._pair(stride)

    def forward(self, input):
        in_width = input.size()[2]
        in_height = input.size()[3]
        out_width = math.ceil(float(in_width) / float(self.stride[0]))
        out_height = math.ceil(float(in_height) / float(self.stride[1]))
        pad_along_width = ((out_width - 1) * self.stride[0] +
                           self.kernel_size[0] - in_width)
        pad_along_height = ((out_height - 1) * self.stride[1] +
                            self.kernel_size[1] - in_height)
        pad_left = math.floor(pad_along_width / 2)
        pad_top = math.floor(pad_along_height / 2)
        pad_right = pad_along_width - pad_left
        pad_bottom = pad_along_height - pad_top
        return F.pad(input, (pad_left, pad_right, pad_top, pad_bottom), 'constant', 0)

In [19]:
class RPN(nn.Module):
    """Builds the model of Region Proposal Network.
    anchors_per_location: number of anchors per pixel in the feature map
    anchor_stride: Controls the density of anchors. Typically 1 (anchors for
                   every pixel in the feature map), or 2 (every other pixel).
    Returns:
        rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax)
        rpn_probs: [batch, W, W, 2] Anchor classifier probabilities.
        rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be
                  applied to anchors.
    """

    def __init__(self, anchors_per_location, anchor_stride, depth):
        super(RPN, self).__init__()
        self.anchors_per_location = anchors_per_location
        self.anchor_stride = anchor_stride
        self.depth = depth

        self.padding = SamePad2d(kernel_size=3, stride=self.anchor_stride)
        self.conv_shared = nn.Conv2d(self.depth, 256, kernel_size=3, stride=self.anchor_stride)   ### kernel_size  3---->16
        self.relu = nn.ReLU(inplace=True)
        self.conv_class = nn.Conv2d(256, 2 * anchors_per_location, kernel_size=1, stride=1)
        self.softmax = nn.Softmax(dim=2)
        self.conv_bbox = nn.Conv2d(256, 4 * anchors_per_location, kernel_size=1, stride=1)

    def forward(self, x):
        # Shared convolutional base of the RPN
        
        x = self.relu(self.conv_shared(self.padding(x)))
        

        # Anchor Score. [batch, anchors per location * 2, height, width].
        rpn_class_logits = self.conv_class(x)

        # Reshape to [batch, anchors, 2]
        rpn_class_logits = rpn_class_logits.permute(0,2,3,1)
        rpn_class_logits = rpn_class_logits.contiguous()
        rpn_class_logits = rpn_class_logits.view(x.size()[0], -1, 2)

        # Softmax on last dimension of BG/FG.
        rpn_probs = self.softmax(rpn_class_logits)

        # Bounding box refinement. [batch, H, W, anchors per location, depth]
        # where depth is [x, y, log(w), log(h)]
        rpn_bbox = self.conv_bbox(x)

        # Reshape to [batch, 4, anchors]
        rpn_bbox = rpn_bbox.permute(0,2,3,1)
        rpn_bbox = rpn_bbox.contiguous()
        rpn_bbox = rpn_bbox.view(x.size()[0], -1, 4)

        return [rpn_class_logits, rpn_probs, rpn_bbox]


In [20]:
def compute_rpn_class_loss(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG.
    """

    # Squeeze last dim to simplify
    rpn_match = rpn_match.squeeze(0)    ### Modif 
    rpn_class_logits=rpn_class_logits.squeeze(0)  

    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
    anchor_class = (rpn_match == 1).long()

    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    indices = torch.nonzero(rpn_match != 0)

    # Pick rows that contribute to the loss and filter out the rest.
    #rpn_class_logits = rpn_class_logits[indices.data[:,0],indices.data[:,1],:]
    rpn_class_logits=rpn_class_logits[indices.data[:,0],indices.data[:,1]]
    anchor_class = anchor_class[indices.data[:,0],indices.data[:,1]]

    # Crossentropy loss
    loss = F.cross_entropy(rpn_class_logits, anchor_class)

    return loss

def compute_rpn_bbox_loss(target_bbox, rpn_match, rpn_bbox):
    """Return the RPN bounding box loss graph.
    target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))].
        Uses 0 padding to fill in unsed bbox deltas.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
    """
    

    # Squeeze last dim to simplify
    rpn_macth = rpn_match.squeeze(0)                   #### Modif 
    rpn_bbox=rpn_bbox.squeeze(0)

    # Positive anchors contribute to the loss, but negative and
    # neutral anchors (match value of 0 or -1) don't.
    indices = torch.nonzero(rpn_match==1)
    
    # Pick bbox deltas that contribute to the loss
    #rpn_bbox = rpn_bbox[indices.data[:,0],indices.data[:,1]]

    #rpn_bbox.size())
    rpn_bbox=rpn_bbox[indices.data[:,0],indices.data[:,1]]
    

    # Trim target bounding box deltas to the same length as rpn_bbox.
    #target_bbox = target_bbox[0,:rpn_bbox.size()[0],:]
    target_bbox=target_bbox[indices.data[:,0],indices.data[:,1]]

    # Smooth L1 loss
    loss = F.smooth_l1_loss(rpn_bbox, target_bbox)

    return loss

## TRAINING

In [55]:
def train(net, optimizer, train_loader, test_loader,scale,n_epoch = 2,
          train_acc_period = 10,
          test_acc_period = 5,
          cuda=True, level="N4"):
  loss_train = []
  loss_test = []
  total = 0
  for epoch in range(n_epoch):  # loop over the dataset multiple times
      torch.save(net,"classifier_epoch"+str(epoch)+".pt")
      running_loss = 0.0
      running_acc = 0.0
      for i, data in enumerate(train_loader, 0):
          # get the inputs
          img, anchor_bbox,anchor_label= data
          if cuda:
            img = img.type(torch.cuda.FloatTensor)
            #labels = labels.type(torch.cuda.LongTensor)
          
          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          img=step1(img)[level]
          outputs = net(img)
          logits,probs,pred_anchor_bbox=outputs
          loss=compute_rpn_class_loss(anchor_label.type(torch.cuda.FloatTensor),logits)
          loss1=loss+compute_rpn_bbox_loss(anchor_bbox.type(torch.cuda.FloatTensor),anchor_label.type(torch.cuda.FloatTensor),pred_anchor_bbox)
          
          
          loss.backward()
          optimizer.step()
          #print("debut stack")
          Z2=probs.view(-1,2)
          predicted=torch.Tensor([torch.argmax(i) for i in Z2])
          #print(" fin stack")
          labels = (anchor_label==1).long().view(-1)
          total += labels.size(0)
          # print statistics
          running_loss = 0.33*loss.item()/3 + 0.66*running_loss
          
          correct = (predicted == labels).sum().item()/labels.size(0)
          running_acc = 0.3*correct + 0.66*running_acc
          if i % train_acc_period == train_acc_period-1:
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss))
            print('[%d, %5d] acc: %.3f' %(epoch + 1, i + 1, running_acc))
            running_loss = 0.0
            total = 0
            # break
      if epoch % test_acc_period == test_acc_period-1:
          cur_acc= accuracy(net, test_loader, cuda=cuda)
          #print('[%d] loss: %.3f' %(epoch + 1, cur_loss))
          print('[%d] acc: %.3f' %(epoch + 1, cur_acc))
      
  print('Finished Training')

In [56]:
#step1=backbone(batch=3)
#net=RPN(3,1,256)
#net.cuda()
#step1.cuda()
learning_rate = 1e-3
optimizer = torch.optim.Adam(net.parameters(),lr=learning_rate)
train(net,optimizer,trainloader,testloader,scale=24)

[1,    10] loss: 0.000
[1,    10] acc: 0.868
[2,    10] loss: 0.000
[2,    10] acc: 0.868
Finished Training


In [52]:
#accuracy(net, testloader, cuda=True,level="N5")
torch.save(net,"RPN_P5_first_step"+".pt")

In [144]:
''''indices=torch.nonzero(label.squeeze(0)==1)
label=label.squeeze(0)
boxes[indices.data[:,0],indices.data[:,1]]
Z1[2].size()'''

compute_rpn_class_loss(label,Z1[0])
#torch.max(Z1[1],0)
#label.view(-1,1)==


tensor([], size=(0, 3, 110592, 2), grad_fn=<IndexBackward>)

In [214]:
Z2=Z1[1].view(3*110592,2)
#a=torch.Tensor([torch.argmax(i) for i in Z1[1][0]])
#b=torch.Tensor([torch.argmax(i) for i in Z1[1][1]])
#c=torch.Tensor([torch.argmax(i) for i in Z1[1][2]])
#fr=torch.stack([a,b,c],dim=0)
#fr=torch.Tensor([torch.argmax(i) for i in Z2])
#(fr==(label==1).long().view(3*110592)).sum()



tensor(220155)

In [34]:
def compute_rpn_class_loss(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG.
    """

    # Squeeze last dim to simplify
    rpn_match = rpn_match.squeeze(0)    ### Modif 
    rpn_class_logits=rpn_class_logits.squeeze(0)  

    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
    anchor_class = (rpn_match == 1).long()

    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    indices = torch.nonzero(rpn_match != 0)

    # Pick rows that contribute to the loss and filter out the rest.
    #rpn_class_logits = rpn_class_logits[indices.data[:,0],indices.data[:,1],:]
    rpn_class_logits=rpn_class_logits[indices.data[:,0],indices.data[:,1]]
    anchor_class = anchor_class[indices.data[:,0],indices.data[:,1]]

    # Crossentropy loss
    loss = F.cross_entropy(rpn_class_logits, anchor_class)

    return loss

def compute_rpn_bbox_loss(target_bbox, rpn_match, rpn_bbox):
    """Return the RPN bounding box loss graph.
    target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))].
        Uses 0 padding to fill in unsed bbox deltas.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
    """
    

    # Squeeze last dim to simplify
    rpn_macth = rpn_match.squeeze(0)                   #### Modif 
    rpn_bbox=rpn_bbox.squeeze(0)

    # Positive anchors contribute to the loss, but negative and
    # neutral anchors (match value of 0 or -1) don't.
    indices = torch.nonzero(rpn_match==1)
    if len(indices)==0:
        return torch.Tensor([0])
    
    # Pick bbox deltas that contribute to the loss
    #rpn_bbox = rpn_bbox[indices.data[:,0],indices.data[:,1]]

    #rpn_bbox.size())
    rpn_bbox=rpn_bbox[indices.data[:,0],indices.data[:,1]]
    

    # Trim target bounding box deltas to the same length as rpn_bbox.
    #target_bbox = target_bbox[0,:rpn_bbox.size()[0],:]
    target_bbox=target_bbox[indices.data[:,0],indices.data[:,1]]

    # Smooth L1 loss
    loss = F.smooth_l1_loss(rpn_bbox, target_bbox)

    return loss

In [48]:
def accuracy(net, test_loader, cuda=True,level="N2"):
  net.eval()
  correct = 0
  total = 0
  loss = 0
  with torch.no_grad():
      for data in test_loader:
          img, anchor_bbox,anchor_label= data
          if cuda:
            img = img.type(torch.cuda.FloatTensor)
          img=step1(img)[level]
          outputs = net(img)
          logits,probs,pred_anchor_bbox=outputs
          # loss+= criterion(outputs, labels).item()
          Z2=probs.view(-1,2)
          predicted=torch.Tensor([torch.argmax(i) for i in Z2])
          labels = (anchor_label==1).long().view(-1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
          if total > 100:
            break
  net.train()
  #print('Accuracy of the network on the test images: %d %%' % (
     # 100 * correct / total))
  # return (100.0 * correct / total, loss/total)
  return 100.0 * correct/total

## BROUILLON 

In [None]:
DF.index
with_boat=list(DF[DF['label']!=0]["ImageId"].iloc[:])

image_test=load_img('000532683.jpg')
INPUT=labelling('000532683.jpg',image_test,48,DF['EncodedPixels'].iloc[11])
#index=torch.where(INPUT[2]==1)
#index[0].size()
choisi=INPUT[1][index].squeeze(0)
#choisi=bounding_box(DF['EncodedPixels'].iloc[175])
import cv2
img_clone=np.copy(image_test)
plt.figure(figsize=(9,9))
#for x in range(TEST):
#for choisi in choisie:
cv2.rectangle(img_clone,((int)(choisi[3]),(int)(choisi[2])),((int)(choisi[1]),(int)(choisi[0])),color=(255,0,0),thickness=2)
plt.imshow(img_clone)
plt.show()

In [297]:
import os
import cv2
from tqdm import tqdm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread

from skimage.segmentation import mark_boundaries
from skimage.measure import label, regionprops

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def masks_as_image(in_mask_list, all_masks=None):
    # Take the individual ship masks and create a single mask array for all ships
    if all_masks is None:
        all_masks = np.zeros((768, 768), dtype = np.int16)
    #if isinstance(in_mask_list, list):
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += rle_decode(mask)
    return np.expand_dims(all_masks, -1)


def bounding_box(ImageId):
    rle_0 =DF[DF['ImageId']==ImageId]['EncodedPixels'].iloc[0]
    mask_0 = rle_decode(rle_0)
    lbl_0 = label(mask_0) 
    props = regionprops(lbl_0)
    box=[]
    for prop in props:
        box.append(prop.bbox)
    return box

def Max_IOU_box(ImageId,anchor):
    '''return the max of the IOU between the anchor and the box within the Image
     and the index of this box'''
    temp=bounding_box(ImageId)
    temp2=[IOU(anchor,i) for i in temp]
    temp2=np.array(temp2)
    return (max(temp2),np.argmax(temp2))
    
#Max_IOU_box('003b48a9e.jpg',(716, 379, 726, 387))

#df[df.index=='003b48a9e.jpg']

(1.0, 1)

(682, 44, 698, 68)

In [317]:
'''re=[]
for i in labelling(load_img(chaine),96,DF[DF["ImageId"]==chaine]['EncodedPixels'].iloc[0])[1]:
    re.append(IOU(bx,i))
re=torch.Tensor(re)
torch.max(re)'''
a=labelling(chaine,load_img(chaine),24,DF[DF["ImageId"]==chaine]['EncodedPixels'].iloc[0])

In [29]:

step1=backbone(batch=3)
net=RPN(3,1,256)
#net.cuda()
#step1.cuda()
learning_rate = 1e-3
optimizer = torch.optim.Adam(net.parameters(),lr=learning_rate)

for j in trainloader:
    i=next(iter(trainloader))
    Z=step1(i[0])
    Z1=net(Z['N5'])
    #Z3=compute_rpn_class_loss(i[2],Z1[1])
    Z4=compute_rpn_bbox_loss(i[1],i[2],Z1[2])
    break

In [30]:
Z4

0