In [3]:
import torchvision.datasets
import os

In [4]:
import matplotlib.pyplot as plt

In [5]:
def dataset(path):
    return torchvision.datasets.ImageFolder(
        root=path)

In [6]:
path_buff = '/Users/redabelhaj/Downloads/africa_dataset/only_buff'
path_zebra = '/Users/redabelhaj/Downloads/africa_dataset/only_zebra'
path_rhino = '/Users/redabelhaj/Downloads/africa_dataset/only_rhino'
path_elephant = '/Users/redabelhaj/Downloads/africa_dataset/only_elephant'

In [7]:
path_buff_2 = path_buff + '/buffalo/'
path_zebra_2 = path_zebra + '/zebra/'
path_rhino_2 = path_rhino + '/rhino/'
path_elephant_2 = path_elephant + '/elephant/'

In [8]:
buffalos = dataset(path_buff)
zebras = dataset(path_zebra)
rhinos = dataset(path_rhino)
elephants = dataset(path_elephant)

In [9]:
buffalos_imgs = [buffalos[i][0] for i in range(len(buffalos))]
zebras_imgs = [zebras[i][0] for i in range(len(zebras))]
rhinos_imgs = [rhinos[i][0] for i in range(len(rhinos))]
elephants_imgs = [elephants[i][0] for i in range(len(elephants))]

In [10]:
def get_file_paths(path):
    list_exts = os.listdir(path)
    names = []
    for n in list_exts:
        if n[-1]=='t':
            names.append(path+n)
    return names

In [11]:
def get_rectangles(path):
    file_paths = get_file_paths(path)
    numbers = [int(path[-7]+path[-6]+path[-5]) for path in file_paths]
    n = max(numbers)
    d_rectangles = {i : [] for i in range(1+n)}
    for path in file_paths:
        number = int(path[-7]+path[-6]+path[-5])
        #print(number)
        with open(path, "r") as file:
            for line in file.readlines():
                l = line.split(' ')
                coords = float(l[1]), float(l[2]), float(l[3]), float(l[4].strip())
                d_rectangles[number-1].append(coords)
    return d_rectangles


In [218]:
def get_rectangles_labels(path):
    file_paths = get_file_paths(path)
    numbers = [int(path[-7]+path[-6]+path[-5]) for path in file_paths]
    n = max(numbers)
    d_rectangles = {i : [] for i in range(1+n)}
    for path in file_paths:
        number = int(path[-7]+path[-6]+path[-5])
        #print(number)
        with open(path, "r") as file:
            for line in file.readlines():
                l = line.split(' ')
                coords = float(l[1]), float(l[2]), float(l[3]), float(l[4].strip())
                label = int(l[0])
                d_rectangles[number-1].append((label, coords))
    return d_rectangles
    

In [12]:
def get_images(images, path):
    N = len(images)
    d_rectangles = get_rectangles(path)
    #print(d_rectangles)
    res = []
    for i in range(N):
        rectangles = d_rectangles[i]
        for rect in rectangles:
            x,y,w_r,h_r = rect
            w,h = images[i].size
            x_abs = w*x
            y_abs = h*y
            bottom = y_abs + h_r*h/2
            top = y_abs - h_r*h/2
            left = x_abs - w_r*w/2
            right = x_abs + w_r*w/2
            box_crop= (left, top, right, bottom)
            cropped_img =images[i].crop(box_crop)
            res.append(cropped_img)
    return res
        
        

In [230]:
def get_detection_dataset(images, rect_labels):
    res = []
    for i in range(len(images)):
        im = images[i]
        list_rect_lab = rect_labels[i]
        res.append((im, list_rect_lab))
    return res


In [240]:
rect_labels = get_rectangles_labels(path_buff_2)
ds_buf = get_detection_dataset(buffalos_imgs, rect_labels)

rect_labels = get_rectangles_labels(path_zebra_2)
ds_zeb = get_detection_dataset(zebras_imgs, rect_labels)

rect_labels = get_rectangles_labels(path_rhino_2)
ds_r = get_detection_dataset(rhinos_imgs, rect_labels)

rect_labels = get_rectangles_labels(path_elephant_2)
ds_el = get_detection_dataset(elephants_imgs, rect_labels)



In [253]:
ds_detect_tot = ds_buf + ds_zeb+ds_r + ds_el
ds_detect = r.sample(ds_detect_tot, len(ds_detect_tot))

In [254]:
ds_detect[0]

(<PIL.Image.Image image mode=RGB size=1379x1034 at 0x175710390>,
 [(2, (0.465625, 0.539906, 0.925, 0.920188))])

In [256]:
t = torchvision.transforms.ToTensor()
ds_detect_t = [(interpolate(t(image),140), label) for (image, label) in ds_detect]

In [247]:
buf_images = get_images(buffalos_imgs,path_buff_2)

KeyboardInterrupt: 

In [14]:
z_images = get_images(zebras_imgs,path_zebra_2)

In [15]:
r_images = get_images(rhinos_imgs,path_rhino_2)

In [16]:
e_images =  get_images(elephants_imgs,path_elephant_2)

In [17]:
def create_pil_dataset(b,z,r,e):
    res = []
    for img in b:
        res.append((img, 0))
    for img in z:
        res.append((img, 1))
    for img in r:
        res.append((img, 2))
    for img in e:
        res.append((img, 3))
    return res

In [18]:
dataset = create_pil_dataset(buf_images, z_images, r_images, e_images)

In [19]:
import torchvision.transforms
import torch.nn.functional as F

In [20]:
def interpolate(img, res):
    if img.ndim ==4:
        img_perm = F.interpolate(img, size = res)
        img_perm = img_perm.permute(0,1,3,2)
        img_perm = F.interpolate(img_perm, size =res)
        img_perm = img_perm.permute(0,1,3,2)
        return img_perm
    elif img.ndim==3:
        img_perm = F.interpolate(img, size = res)
        img_perm = img_perm.permute(0,2,1)
        img_perm = F.interpolate(img_perm, size =res)
        img_perm = img_perm.permute(0,2,1)
        return img_perm
    else:
        raise Exception("tensor dimension should be 3 or 4")
    

In [21]:
def transform(data, res):
    t = torchvision.transforms.ToTensor()
    trans_data = [(interpolate(t(image),res), label) for (image, label) in data]
    return trans_data
    

In [23]:
import random as r

In [24]:
shuf_ds =  r.sample(dataset, len(dataset))

In [25]:
trainset = shuf_ds[:2000]
testset = shuf_ds[2000:]

In [26]:
trainset = transform(trainset, 100)
testset = transform(testset, 100)

In [27]:
import torch

In [28]:
from get_data import *
from resnetclass import *
from gridsearch import *
from training import *

In [29]:
resnet = get_resnet(width = 17, resolution=140,depth=[3,3,2,2], num_classes = 4)

In [30]:
path = '/Users/redabelhaj/Desktop/INF473V/Projet/models/resnetaf.txt'

In [31]:
resnet.load_state_dict(torch.load(path,map_location=torch.device('cpu')))

<All keys matched successfully>

In [33]:
accuracy(resnet, testset, num_points = len(testset), cuda = False)

99.55156950672645

# Detection with R-CNN

In [42]:
#import requests

In [32]:
#response = requests.get("http://farm9.staticflickr.com/8445/7751093276_740e66ae99_z.jpg")
#image1 = open("image1.jpg", "wb")
#image1.write(response.content)
#image1.close()

#response = requests.get("http://farm1.staticflickr.com/138/358863752_c83b367fef_z.jpg")
#image2 = open("image2.jpg", "wb")
#image2.write(response.content)
#image2.close()

#response = requests.get("http://farm4.staticflickr.com/3624/3342769458_3bc41b3cce_z.jpg")
#image3 = open("image3.jpg", "wb")
#image3.write(response.content)
#image3.close()





In [33]:
import cv2

In [470]:
#image1 = cv2.cvtColor(cv2.imread("image1.jpg"),cv2.COLOR_BGR2RGB)
#image2 = cv2.cvtColor(cv2.imread("image2.jpg"),cv2.COLOR_BGR2RGB)
#image3 = cv2.cvtColor(cv2.imread("image3.jpg"),cv2.COLOR_BGR2RGB)

In [471]:
#l = selectivesearch(image1)

In [472]:
def selectivesearch(img):
  """
  SelectiveSearch : 
  - image au format openCV  
  - renvoie une liste de quadruplets (x,y,w,h) représentant les zones de 
    prédiction
  """
  ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
  ss.setBaseImage(img)
  # Deux modes de détections sont possibles : Fast et Quality
  #ss.switchToSelectiveSearchFast()
  ss.switchToSelectiveSearchQuality()
  return ss.process()

def showrects(img,rects,maxRect=3000):
  """
  showrects : affiche l'image img en y dessinant les rectangles énumérés dans 
  la liste rects.
  """
  # On crée une copie de l'image
  imOut = img.copy()
  # Pour chaque rectangle proposé, si il est dans les 1000 plus probables,
  # on le dessine sur l'image
  for i,rect in enumerate(rects):
      if (i < maxRect):
          x, y, w, h = rect
          cv2.rectangle(imOut, (x, y), (x+w, y+h), (0, 0, 255), 1, cv2.LINE_AA)
  plt.imshow(imOut)

def filter_rects(rects, min_size=30, max_size = 400):
  """ 
  filter_rects: prend en argument une liste de rectangles et renvoie une liste contenant uniquement
  ceux dont les côtés sont de taille comprise entre min_size et max_size.
  """
  new_list = []

  for rect in rects:
    _,_,w,h = rect 
    if w >= min_size and w <= max_size and h >= min_size and h <= max_size :
      new_list.append(rect)
  return new_list

In [57]:
# transformation de l'image au format 224x224 attendu
# par le modèle
resize = transforms.Compose([
                                 transforms.ToPILImage(),
                                 transforms.Resize(140),
                             transforms.CenterCrop(140)
])
# normalisation attendue par le modèle
transform = transforms.Compose([
      resize, 
      transforms.ToTensor()
])
# test de la transformation sur une image exemple.
#image1 =transform(image1)

In [63]:
def get_labeled_boxes(img,maxRect=2000,threshold=.5,iou=0.5):
  """ 
  get_labeled_boxes : prend en argument
    - une image (img),
    - un nombre maximum de zones de prédictions à tester (maxRect),
    - un seuil d'acceptation d'une zone de prédiction (threshold)
    - un iou
  """
  predictions = []
  rects = selectivesearch(img)
  rects = filter_rects(rects)
  m = min(maxRect, len(rects))
  for i in tqdm.notebook.tqdm(range(m)):
    # crop l'image avec rects[i]
    (x,y,w,h) = rects[i]
    img_rect = img[y:y+h,x:x+w]
    # on transforme l'image d'abord et on la resize 
    img_rect = transform(img_rect)
    img_rect= img_rect.resize(1,3,140,140)


    res_clf = resnet(img_rect)

    ## on convertit en probas avec softmax
    probs = torch.nn.functional.softmax(res_clf)
    proba_max, ind  = torch.max(probs), torch.argmax(probs)

    if proba_max > threshold:
      # on conserve cette prédiction
      triplet = rects[i], int(ind), float(proba_max)
      predictions.append(triplet)

  return predictions 

In [68]:
#preds = get_labeled_boxes(image1,maxRect=2000,threshold=.9,iou=0.7)

HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))






In [474]:
#labels = {0 : "buffalo", 1 : "zebra", 2:'rhino', 3:'elephant'}


def show_objs(img,labeled_regions):
  """ 
  - img : image
  - labeled_regions : triplets (rect,label,score) :
    - rect : quadruplet (x,y,w,h)
    - label : un entier entre 0 et 1000
    - score : un flottant entre 0 et 1
  """
  imOut = img.copy()
  
  for rect,label,score in labeled_regions:
      x, y, w, h = rect
      cv2.rectangle(imOut, (x, y), (x+w, y+h), (0, 0, 255), 1, cv2.LINE_AA)
      cv2.putText(imOut, "{} : {:.2f}".format(labels[label],score), (x,y+10),
                  cv2.FONT_HERSHEY_SIMPLEX,.6,(0,255,255),2,cv2.LINE_AA)
    
  plt.imshow(imOut)

In [475]:
#show_objs(image1, preds[:10])

In [476]:
def IoU(rect1,rect2):
  """ 
  Renvoie un float avec le IoU entre rect1 et rect2 :
  rect1, rect2 : quadruplets (x,y,w,h) 
  """
  x1,y1,w1,h1 = rect1
  x2,y2,w2,h2 = rect2
  xA,yA = max(x1,x2),max(y1,y2)
  xB,yB = min(x1+w1,x2+w2),min(y1+h1,y2+h2)
  inter = max(0,xB - xA)*max(0,yB-yA)
  union = w1*h1 + w2*h2 - inter
  return inter/union

In [117]:
def get_labeled_boxes2(img,maxRect=2000,threshold=.99,iou=0.2):
  """ 
  get_labeled_boxes : prend en argument
    - une image (img),
    - un nombre maximum de zones de prédictions à tester (maxRect),
    - un seuil d'acceptation d'une zone de prédiction (threshold)
    - un iou
  """
  predictions = []
  rects = selectivesearch(img)
  rects = filter_rects(rects)
  m = min(maxRect, len(rects))
  for i in tqdm.notebook.tqdm(range(m)):
    # crop l'image avec rects[i]
    (x,y,w,h) = rects[i]
    img_rect = img[y:y+h,x:x+w]
    # on transforme l'image d'abord et on la resize 
    img_rect = transform(img_rect)
    img_rect= img_rect.resize(1,3,140,140)
    res_clf = resnet(img_rect)
    ## on convertit en probas avec softmax
    probs = torch.nn.functional.softmax(res_clf)
    proba_max, ind  = torch.max(probs), torch.argmax(probs)
    if proba_max > threshold:
      # on conserve cette prédiction
      triplet = rects[i], int(ind), float(proba_max)

      #on va chercher dans les prédiction déja faites 

      ajouter = True 
      for tpt in predictions:
        rct, idx, _ = tpt
        if idx==int(ind) and IoU(rct, rects[i]) > iou:
          # on a deja détecté ce rectagle
          ajouter = False
      if ajouter :
        predictions.append(triplet) 

  return predictions 

In [76]:
#labeled_regions2 = get_labeled_boxes2(image1)

HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))






In [153]:
from torchsummary import summary
import torch.nn as nn
import torch.nn.functional as f

In [434]:
class Rcnn(nn.Module):
    def __init__(self, resnet, num_classes):
        super(Rcnn, self).__init__()
        self.conv1 = resnet.conv1
        self.bn1 = resnet.bn1
        self.cnn = resnet.layers
        self.num_classes = num_classes
        # choper la taille de la feature map
        r,d,w = resnet.resolution,resnet.depth,resnet.width
        w_actu,r_actu = w,r
        for nb in resnet.depth:
            w_actu*=2
            r_actu=roundsp(r_actu)
        # taille de la feature map : w_actu@r_actu*r_actu
        out_size = int(w_actu*r_actu*r_actu)
        #print(out_size)
        self.classifier = nn.Linear(out_size, num_classes+1)
        self.bboxreg = nn.Linear(out_size, 4)
        
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.cnn(out)
        out = out.view(out.size(0), -1)
        
        clf = self.classifier(out)
        
        bbox = self.bboxreg(out)
        bbox = F.sigmoid(bbox) ## les 4 nombres seront entre 0 et 1
        return clf, bbox


In [435]:
def rcnn(resnet, num_classes, bn1_dict, conv1_dict, cnn_dict):
    """ renvoie un rcnn avec cnn loadé et bloqué"""
    model = Rcnn(resnet, num_classes)
    model.bn1.load_state_dict(bn1_dict)
    model.conv1.load_state_dict(conv1_dict)
    model.cnn.load_state_dict(cnn_dict)
    
    for p in model.bn1.parameters():
        p.requires_grad=False
    for p in model.conv1.parameters():
        p.requires_grad=False
    for p in model.cnn.parameters():
        p.requires_grad=False
    return model
        
    

In [436]:
def get_state_dicts(resnet):
    return resnet.bn1.state_dict(), resnet.conv1.state_dict(), resnet.layers.state_dict()

In [437]:
b,c,cnn= get_state_dicts(resnet)

In [438]:
rcnn_model = rcnn(resnet, 4, b,c,cnn)

In [439]:
summary(rcnn_model, (3,140,140))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 17, 140, 140]             459
       BatchNorm2d-2         [-1, 17, 140, 140]              34
            Conv2d-3           [-1, 34, 70, 70]           5,236
       BatchNorm2d-4           [-1, 34, 70, 70]              68
              ReLU-5           [-1, 34, 70, 70]               0
            Conv2d-6           [-1, 34, 70, 70]          10,438
       BatchNorm2d-7           [-1, 34, 70, 70]              68
            Conv2d-8           [-1, 34, 70, 70]           5,236
       ResNetBlock-9           [-1, 34, 70, 70]               0
           Conv2d-10           [-1, 34, 70, 70]          10,438
      BatchNorm2d-11           [-1, 34, 70, 70]              68
             ReLU-12           [-1, 34, 70, 70]               0
           Conv2d-13           [-1, 34, 70, 70]          10,438
      BatchNorm2d-14           [-1, 34,

In [211]:
# reste à définir la loss et le bon dataset

In [369]:
def iou_yolo(rect1, rect2, glob_im):
    _,W,H = im.size()
    x1, y1, w1, h1 = rect1 
    x2, y2, w2, h2 = rect2
    xa = x1 - w1/2
    xb = x2 - w2/2
    ya = y1 - h1/2
    yb = y2 - h2/2
    ha, hb = h1*H, h2*H
    wa, wb = w1*W, w2*W
    ra = [xa, ya, wa, ha]
    rb = [xb, yb, wb, hb]
    return IoU(ra,rb)
    
    
    

In [370]:
def is_positive(region, list_rec_lab, glob_im):
    res = False
    for _, rec in list_rec_lab:
        i = iou_yolo(region, rec, glob_im)
        if i>.5:
            res=True
            region_pos = rec
            return res, rec
    return False

In [418]:
def sample_regions(img, list_rec_lab, region_prop):
    #print(region_prop)
    res = []
    n_pos, n_neg = 0, 0
    for region in region_prop:
        isp = is_positive(region, list_rec_lab, img)
        if isp==False and n_neg<96:
            res.append((region, list_rec_lab[0][0]))
        elif type(isp)==tuple:
            _, rec = isp
            res.append((region, list_rec_lab[0][0], rec))
    return res

In [397]:
def crop_tensor(image, region):
    x,y,w_r,h_r = region
    
    _,w,h = image.size()
    x_abs ,y_abs = w*x ,h*y
    y_top = y_abs - h_r*h/2
    x_left = x_abs - w_r*w/2
    return image[:, int(x_left):int(x_left+w_r*w), int(y_top):int(y_top+h_r*h)]

In [398]:
def get_regions(img_t):
    #tr = torchvision.transforms.ToPILImage()
    img_pil = img_t.permute(1,2,0).numpy()
    _,W,H = img_t.size()
    rects = selectivesearch(img_pil)
    res = []
    for rect in rects:
        x,y,w,h = rect
        x_c = (x + w/2)/W
        y_c = (y + h/2)/H
        w_r = w/W
        h_r = h/H
        res.append((x_c, y_c, w_r, h_r))
    return res

In [468]:
def train_rcnn(model, optimizer, ds_detect_t, n_epoch = 40):
    loss_clf = torch.nn.CrossEntropyLoss()
    loss_reg = torch.nn.SmoothL1Loss()
    for _ in tqdm.notebook.tqdm(range(n_epoch)):
        for data in tqdm.notebook.tqdm(ds_detect_t):
            optimizer.zero_grad()
            image, list_rect_lab = data
            region_prop = get_regions(image)
            
            regions = sample_regions(image, list_rect_lab, region_prop)
            for reg in regions:
                if len(reg)==2:
                    # region negative 
                    region, label = reg
                    ipt_net = crop_tensor(image, region)
                    ipt_net = interpolate(ipt_net, 140)
                    ipt_net = ipt_net.resize(1, 3, 140, 140)
                    #return ipt_net
                    clf, bbox = model(ipt_net)
                    loss = loss_clf(clf, torch.tensor([4]))
                    loss.backward()
                else:
                    # region positive
                    region, label, gt_rect = reg
                    ipt_net = crop_tensor(image, region)
                    ipt_net = interpolate(ipt_net, 140)
                    ipt_net = ipt_net.resize(1, 3, 140, 140)
                    
                    #return ipt_net
                    
                    clf, bbox = model(ipt_net)
                    #print(gt_rect)
                    loss = loss_reg(bbox, torch.tensor(gt_rect)) + loss_clf(clf, torch.tensor([label]))
        
                    loss.backward()      
            optimizer.step()
    return model

In [477]:
#rcnn_model
#opt = torch.optim.Adam(rcnn_model.parameters())
#rcnn_model = train_rcnn(rcnn_model, opt, ds_detect_t)
