# CVS HW

Add here the path for the videos, then 'run all'.





In [0]:
path = '/content/HW/'

In [0]:
!gdown https://drive.google.com/uc?id=1Lnjdb48lx9urYtETpDBZc8wM7iNY-QCd
!gdown https://drive.google.com/uc?id=1grVMSVkhGplNKDOE7XOzF-RrUe_EkJRF

In [0]:
!git clone https://github.com/AlexeyAB/darknet

In [0]:
%cd darknet

In [0]:
# change makefile to have GPU and OPENCV enabled
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!make

In [0]:
!cp cfg/yolov3.cfg cfg/yolov3_testing.cfg

!sed -i 's/max_batches = 500200/max_batches = 16000/' cfg/yolov3_testing.cfg
!sed -i 's/steps=400000,450000/steps=12800,14400/' cfg/yolov3_testing.cfg
!sed -i '610 s@classes=80@classes=8@' cfg/yolov3_testing.cfg
!sed -i '696 s@classes=80@classes=8@' cfg/yolov3_testing.cfg
!sed -i '783 s@classes=80@classes=8@' cfg/yolov3_testing.cfg
!sed -i '603 s@filters=255@filters=39@' cfg/yolov3_testing.cfg
!sed -i '689 s@filters=255@filters=39@' cfg/yolov3_testing.cfg
!sed -i '776 s@filters=255@filters=39@' cfg/yolov3_testing.cfg


In [0]:
%cd ..

In [0]:
# Homework dataset
!wget http://deeplearning.iit.bme.hu/CVS/HW.zip
!unzip -qq HW.zip
!rm HW.zip

In [0]:
import torch
import torch.nn as nn
import cv2
from torchvision import datasets, transforms, models
from PIL import Image
from google.colab.patches import cv2_imshow

traffic_classes = ['Bump', 'Bumpy road', 'Bus stop', 'Children', 'Crossing (blue)', 'Crossing (red)', 'Cyclists',
     'Danger (other)', 'Dangerous left turn', 'Dangerous right turn', 'Give way', 'Go ahead', 'Go ahead or left',
     'Go ahead or right', 'Go around either way', 'Go around left', 'Go around right', 'Intersection', 'Limit 100',
     'Limit 120', 'Limit 20', 'Limit 30', 'Limit 50', 'Limit 60', 'Limit 70', 'Limit 80', 'Limit 80 over',
     'Limit over', 'Main road', 'Main road over', 'Multiple dangerous turns', 'Narrow road (left)',
     'Narrow road (right)', 'No entry', 'No entry (both directions)', 'No entry (truck)', 'No stopping', 'No takeover',
     'No takeover (truck)', 'No takeover (truck) end', 'No takeover end', 'No waiting', 'One way road',
     'Parking', 'Road works', 'Roundabout', 'Slippery road', 'Stop', 'Traffic light', 'Train crossing',
     'Train crossing (no barrier)', 'Wild animals', 'X - Priority','X - Turn Left', 'X - Turn Right']

# Convolutional module (Conv+ReLU+BatchNorm)
class Conv(nn.Module):
    
    # Constructor gets in and output channels and stride
    def __init__(self, in_channels, channels, stride=1):
        super(Conv, self).__init__()
        
        # Create 2D Convolution (3x3)
        self.conv = nn.Conv2d(in_channels, channels, kernel_size=3, stride=stride, padding=1, bias=False)
        
        # Create Batchnorm
        self.bn = nn.BatchNorm2d(channels)
        
    # Overwrite forward
    def forward(self,x):
        # Call the layers in the proper order
        return self.bn(torch.relu(self.conv(x)))

class ConvNet(nn.Module):
    
    # Constructor gets channel number of the image and the first filter
    def __init__(self, base_channels=16, in_channels=3, num_classes=55):
        super(ConvNet, self).__init__()
        
        # First two filters
        self.c11 = Conv(in_channels, base_channels)
        self.c12 = Conv(base_channels, base_channels)
        
        # Downscale using strided convolution and expand channels
        self.d1 = Conv(base_channels, base_channels*2, 2)
        
        # Repeat this 4 times
        self.c21 = Conv(base_channels*2, base_channels*2)
        self.c22 = Conv(base_channels*2, base_channels*2)
        
        self.d2 = Conv(base_channels*2, base_channels*4, 2)
        
        self.c31 = Conv(base_channels*4, base_channels*4)
        self.c32 = Conv(base_channels*4, base_channels*4)
        
        self.d3 = Conv(base_channels*4, base_channels*8, 2)
        
        self.c41 = Conv(base_channels*8, base_channels*8)
        self.c42 = Conv(base_channels*8, base_channels*8)
        
        self.d4 = Conv(base_channels*8, base_channels*16, 2)
        
        self.c51 = Conv(base_channels*16, base_channels*16)
        self.c52 = Conv(base_channels*16, base_channels*16)
        
        # Input image is 32x32 -> after 5 downscaling the activation map is 1x1
        self.d5 = Conv(base_channels*16, base_channels*32, 2)
        
        # Classifier is a normal 1x1 convolution that produces num_classes class scores
        # This layer does not have BatchNorm of ReLU
        self.classifier = nn.Conv2d(base_channels*32,num_classes,kernel_size=1)
        
    def forward(self,x):
        # Class all the layers
        x = self.d1(self.c12(self.c11(x)))
        x = self.d2(self.c22(self.c21(x)))
        x = self.d3(self.c32(self.c31(x)))
        x = self.d4(self.c42(self.c41(x)))
        x = self.d5(self.c52(self.c51(x)))
        
        # Squeeze removes dimensions that have only 1 element
        # Output of the conv layer is (batch_size x num_classes x 1 x 1)
        # After squeeze is becomes (batch_size x num_classes)
        return torch.squeeze(self.classifier(x))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=torch.load('/content/traffic_sign_model_HC_final.pth')
model.eval()

imsize = 32
loader = transforms.Compose([transforms.Scale(imsize), transforms.ToTensor()])
                         
def predict_traffic(model, x,y,w,h, image, classes):
  img = image[y:y+h, x:x+w]
  img = cv2.resize(img,(32,32))
  imsize = 32
  loader = transforms.Compose([transforms.Scale(imsize), transforms.ToTensor()]) 

  img = Image.fromarray(img)
  img = loader(img).float()
  img = img.unsqueeze(0) 
  img = img.cuda()

  output = model(img)
  output = list(output)
  lst = sorted(output)
  pred = max(output)
  name = traffic_classes[output.index(pred)]
  return name

In [0]:
import cv2
import numpy as np
import glob
import random
from google.colab.patches import cv2_imshow
from scipy.optimize import fsolve
import math
import os

# Load Yolo
net = cv2.dnn.readNet("/content/yolov3_training_1600.weights", "/content/darknet/cfg/yolov3_testing.cfg")

def equations(p):
  xp, yp = p
  return ((6.0604821777343750e+02)*xp+(3.1442626953125000e+02)*z-u*z,(6.0498577880859375e+02)*yp+(2.4605038452148438e+02)*z-v*z)

# Name custom object
classes = ["SUV","plane","traffic sign","angry","evil","happy","sad","truck"]

classNames = ['traffic sign', 'vehicle', 'cactus']
subclassNames = [
    ['Bump', 'Bumpy road', 'Bus stop', 'Children', 'Crossing (blue)', 'Crossing (red)', 'Cyclists',
     'Danger (other)', 'Dangerous left turn', 'Dangerous right turn', 'Give way', 'Go ahead', 'Go ahead or left',
     'Go ahead or right', 'Go around either way', 'Go around left', 'Go around right', 'Intersection', 'Limit 100',
     'Limit 120', 'Limit 20', 'Limit 30', 'Limit 50', 'Limit 60', 'Limit 70', 'Limit 80', 'Limit 80 over',
     'Limit over', 'Main road', 'Main road over', 'Multiple dangerous turns', 'Narrow road (left)',
     'Narrow road (right)', 'No entry', 'No entry (both directions)', 'No entry (truck)', 'No stopping', 'No takeover',
     'No takeover (truck)', 'No takeover (truck) end', 'No takeover end', 'No waiting', 'One way road',
     'Parking','Road works', 'Roundabout', 'Slippery road', 'Stop','Traffic light', 'Train crossing',
     'Train crossing (no barrier)','Wild animals','X - Priority','X - Turn Left','X - Turn Right'],
    ['SUV','truck','plane'],
    ['happy','sad','angry','evil']]

myFolderList = sorted([f.path for f in os.scandir(path) if f.is_dir()])
print(myFolderList)
if str(path + '__pycache__') in myFolderList:
  myFolderList.remove(str(path + '__pycache__'))

layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))

# Loop through all the images
full_dict = {}
for f in myFolderList:
  images_path = sorted(os.listdir( f + "/rgb/"))
  depth_path = sorted(os.listdir( f + "/depth/"))

  for img_path in range(len(images_path)):# images_path:
      current_img_dict = {}

      # Load image
      img = cv2.imread( f + "/rgb/" + images_path[img_path])
      depth_img = cv2.imread( f + "/depth/" + depth_path[img_path], -1)
      height, width, channels = img.shape

      # Detecting objects
      blob = cv2.dnn.blobFromImage(img, 0.00392, (640, 480), (0, 0, 0), True, crop=False)

      net.setInput(blob)
      outs = net.forward(output_layers)

      # Showing informations on the screen
      class_ids = []
      confidences = []
      boxes = []
      list_3d = []
      centers = []
      for out in outs:
          for detection in out:
              scores = detection[5:]
              class_id = np.argmax(scores)
              confidence = scores[class_id]
              if confidence > 0.3:
                  # Object detected
                  center_x = int(detection[0] * width)
                  center_y = int(detection[1] * height)
                  centers.append([center_x, center_y])

                  w = int(detection[2] * width)
                  h = int(detection[3] * height)

                  # Rectangle coordinates
                  x = int(center_x - w / 2)
                  y = int(center_y - h / 2)

                  u = center_x
                  v = -(center_y - 480)

                  sum = 0
                  cntr = 0
                  for y1 in range(7):
                    for x1 in range(7):
                      try:
                        z = float(depth_img[center_y-3+y1, center_x-3+x1])
                        sum = sum + z
                        cntr = cntr + 1
                      except:
                        pass
                  z_avg = sum/cntr
                  z = float(z_avg / 1000)

                  x1, y1 = fsolve(equations, (1, 1))
                  list_3d.append([x1, y1, z])
                  boxes.append([x, y, w, h])
                  confidences.append(float(confidence))
                  class_ids.append(class_id)

      indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.2)

      objects = [] #list for all predictions predictions
      current = [] # buffer for one object
      poses = []

      font = cv2.FONT_HERSHEY_PLAIN

      #no object detected
      if len(boxes) == 0:
        for i in range(9):
          objects.append(0)

      for i in range(len(boxes)):
          if i in indexes:
              x, y, w, h= boxes[i]
              x1, y1, z = list_3d[i]
              label = str(classes[class_ids[i]])
              color = colors[class_ids[i]]
              
              if x < 0 :
                x =0
              if y < 0:
                y = 0

              if label == 'angry' or label == 'evil' or label =='happy' or label =='sad':
                classID = 2
                subclassID = subclassNames[2].index(label)

              elif label == "SUV" or label =="plane" or label =="truck": 
                classID = 1
                subclassID = subclassNames[1].index(label)

              elif label == "traffic sign":
                classID = 0                
                label = predict_traffic(model, x,y,w,h, img, classes)
                subclassID = subclassNames[0].index(label)

              #cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
              #cv2.putText(img, label, (x, y + 30), font, 1, color, 1)

              #create dictionary for results
              current.append(centers[i][0])
              current.append(centers[i][1])
              current.append(w)
              current.append(h)
              current.append(classID)
              current.append(subclassID)
              current.append(float(x1))
              current.append(float(y1))
              current.append(float(z))

              objects.append(current)
              current = []

      poses = [0.,  0.,  0., 0.,  0.,  0.,  0., 0.,  0.,  0.,  0., 0.]
      current_img_dict['poses'] = poses
      current_img_dict['objects'] = objects
      full_dict[f[9:] + "/rgb/" + images_path[img_path]] = current_img_dict

#print(full_dict)
filename = "predictions.pickle"
import pickle
pickle.dump( full_dict, open( filename, "wb" ) )



In [0]:
import pickle
from HW.evaluate import evaluate

file = open('/content/predictions.pickle','rb')
predictions = pickle.load(file)
evaluate(predictions)