## Lecture 14. object detection (advanced)

물체의 종류와 위치 파악, 테두리 그리기, bounding box...  
그리드 형식의 데이터 처리 : CNN (Convolutional Neural Network / 합성곱 신경망)  

Training : label 준비 -> 인공지능에게 학습  
Inference (Prediction) : 추론  
Training -> inference  

**YOLO : You Only Look Once (CNN 기반, object detection)**  
resizing -> CNN 통과 -> detection complete (drawing bounding box)  

---------

**이미 training된 지능을 사용 (pre-trained model weights)**

• Pre-trained model weights:
https://pjreddie.com/media/files/yolov3.weights   

• Model configuration file (text file): https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg   

• COCO class names (text file): https://github.com/pjreddie/darknet/blob/master/data/coco.names

In [5]:
# YOLO step 0 : shape -> height, weight, channel 값 가짐

import cv2

img = cv2.imread("poker.jpg")
h, w, c = img.shape
print ("h, w, c : ", h, w, c)

cv2.imshow ('poker', img)
cv2.waitKey()

# mac에서 opencv 팝업창 안닫힐때 뒤에 항상 붙여주기
cv2.destroyAllWindows()
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)


h, w, c :  1020 1280 3


-1

In [6]:
# YOLO step 1 : DNN이 인식할 수 있는 값 = blob 
# 이미지를 blob으로 변환

import cv2
import numpy as np

img = cv2.imread("poker.jpg")
h, w, c = img.shape
print ("h, w, c : ", h, w, c)

# get blob from original img 
# 원본, 소수값으로 변환 (비율로 변환 / min-max normalization : 0~1), 고정된 크기로 변환 (정사각형), (0, 0, 0), BGR을 RGB로 할 것인가?, 고정된 크기로 변환 시에 자르기 가능?
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
print ('blob shape:', blob.shape) 
# blob shape : (batch, channel, 가로크기, 세로크기)

cv2.imshow ("blob", blob[0, 0, :, :])
cv2.waitKey()

# mac에서 opencv 팝업창 안닫힐때 뒤에 항상 붙여주기
cv2.destroyAllWindows()
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)

h, w, c :  1020 1280 3
blob shape: (1, 3, 416, 416)


-1

In [7]:
# YOLO step 2

import cv2
import numpy as np

img = cv2.imread("poker.jpg")
h, w, c = img.shape
print ("h, w, c : ", h, w, c)

blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
print ('blob shape:', blob.shape) 

# 파일 읽기
with open("coco.names", "r") as f : 
    classes = [line.strip() for line in f.readlines()]
    
print (classes)

'''# mac에서 opencv 팝업창 안닫힐때 뒤에 항상 붙여주기
cv2.destroyAllWindows()
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)'''

h, w, c :  1020 1280 3
blob shape: (1, 3, 416, 416)
['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


'# mac에서 opencv 팝업창 안닫힐때 뒤에 항상 붙여주기\ncv2.destroyAllWindows()\ncv2.waitKey(1)\ncv2.waitKey(1)\ncv2.waitKey(1)\ncv2.waitKey(1)'

In [10]:
# YOLO step 3

import cv2
import numpy as np

img = cv2.imread("poker.jpg")
h, w, c = img.shape
print ("h, w, c : ", h, w, c)

blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
print ('blob shape:', blob.shape) 

# 파일 읽기
with open("coco.names", "r") as f : 
    classes = [line.strip() for line in f.readlines()]
    
# load pre-trained YOLO model (net : network)
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)

# set output layers (layer 중에서 output을 내기 위한 layer 지정)
layer_names = net.getLayerNames()
output_layers = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]
print (output_layers)

h, w, c :  1020 1280 3
blob shape: (1, 3, 416, 416)
['yolo_82', 'yolo_94', 'yolo_106']


In [11]:
# YOLO step 4 

import cv2
import numpy as np

img = cv2.imread("poker.jpg")
h, w, c = img.shape
print ("h, w, c : ", h, w, c)

blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
print ('blob shape:', blob.shape) 

# 파일 읽기
with open("coco.names", "r") as f : 
    classes = [line.strip() for line in f.readlines()]
    
# load pre-trained YOLO model (net : network)
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)

# set output layers (layer 중에서 output을 내기 위한 layer 지정)
layer_names = net.getLayerNames()
output_layers = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]
print (output_layers)

# detect objects
net.setInput(blob)
outs = net.forward(output_layers)
print ("shape of the first output", outs[0].shape)
print (outs[0][0, :5])

h, w, c :  1020 1280 3
blob shape: (1, 3, 416, 416)
['yolo_82', 'yolo_94', 'yolo_106']
shape of the first output (507, 85)
[2.8508145e-02 4.3023549e-02 2.6924250e-01 1.6787274e-01 1.3246119e-09]


In [13]:
import cv2
import numpy as np

# read image
img = cv2.imread('people.jpg')
height, width, channel = img.shape
print('original image shape:', height, width, channel)

# get blob from image
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
print('blob shape:', blob.shape)

# read coco object names
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

print('number of classes =', len(classes))

# load pre-trained yolo model from configuration and weight files
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)

# set output layers
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
print('output layers:', output_layers)

# detect objects
net.setInput(blob)
outs = net.forward(output_layers)

# get bounding boxes and confidence socres
class_ids = []
confidence_scores = []
boxes = []

for out in outs: # for each detected object

    for detection in out: # for each bounding box

        scores = detection[5:] # scores (confidence) for all classes
        class_id = np.argmax(scores) # class id with the maximum score (confidence)
        confidence = scores[class_id] # the maximum score

        if confidence > 0.5:
            # bounding box coordinates
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)

            # rectangle coordinates
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidence_scores.append(float(confidence))
            class_ids.append(class_id)

print('number of dectected objects =', len(boxes))

# non maximum suppression
indices = cv2.dnn.NMSBoxes(boxes, confidence_scores, 0.5, 0.4)
print('number of final objects =', len(indices))

# draw bounding boxes with labels on image
colors = np.random.uniform(0, 255, size=(len(classes), 3))
font = cv2.FONT_HERSHEY_PLAIN

for i in range(len(boxes)):
    if i in indices:
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        print(f'class {label} detected at {x}, {y}, {w}, {h}')
        color = colors[i]
        cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
        cv2.putText(img, label, (x, y - 10), font, 1, color, 2)

cv2.imshow('Objects', img)
cv2.waitKey()
# mac에서 opencv 팝업창 안닫힐때 뒤에 항상 붙여주기
cv2.destroyAllWindows()
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)
cv2.waitKey(1)


original image shape: 427 640 3
blob shape: (1, 3, 416, 416)
number of classes = 80
output layers: ['yolo_82', 'yolo_94', 'yolo_106']
number of dectected objects = 30
number of final objects = 7
class person detected at 184, 21, 148, 295
class person detected at 322, 37, 156, 294
class person detected at 23, 114, 184, 278
class person detected at 412, 85, 208, 307
class laptop detected at 308, 257, 146, 107
class person detected at 68, 127, 114, 164
class tie detected at 111, 212, 20, 73


-1