<a href="https://colab.research.google.com/github/nanpolend/machine-learning/blob/master/Object_tracking_%E7%89%A9%E4%BB%B6%E8%BF%BD%E8%B9%A4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1>光頭傑夫製作</h1><img src="https://rolqiw.bl.files.1drv.com/y4mYqkObWmQmdJFd8pab8t8m7d_mFhh3_YG63fiAnPaG6gjahHFt0n087pF97x1W2wemm7hd4ZdiPD2OxoPtqcCD_4-Jy-fojnt97kGGWR4Hnq0-9Ny61afd7WVHVTmF_9pcr45OFDFSwLKByeQU1q9TjwKk-HsNmWVsW1eGQ4Gi-9G4pQ5DYoTxswYJ5r5dLF-kKsA1InEyrqmV3kF5pm4WQ?width=658&height=584&cropmode=none" width=160px align="left" />
<!--1789344913-75-->

## <font color=red>物件追蹤
------
    如果今天我們拿在手上的檔案不是照片，是一段影片，那我們可以辨識嗎?  沒錯，這就是物件追蹤  
    照片是靜態的，影片可是會動的，那影響辨識結果的因素可多了!!有外觀變化，快速運動，模糊，縮放變化等等

## <font color=red>OpenCV Tracking API

## <font color=red>處理架構
--------
### [GOTURN模型下載](https://github.com/spmallick/goturn-files)
### [GOTURN用法](https://blog.csdn.net/LuohenYJ/article/details/89083351)

## <font color=red>把影片讀進來

In [None]:
import cv2
import numpy as np

video = cv2.VideoCapture("soccer2.mp4")
ret, frame = video.read()


## <font color=red>框選你想要追蹤的東西

In [None]:
bbox = cv2.selectROI(frame)
print(bbox)
cv2.destroyAllWindows()

(389, 197, 84, 279)


## <font color=red>啟動辨識機器人

In [None]:
#有移到Legacy的方法，要用下面的方法導入
# tracker = cv2.legacy.TrackerMedianFlow_create()
tracker = cv2.TrackerGOTURN_create()
ret = tracker.init(frame,bbox)

## <font color=red>咱們就一路辨識下去吧!!

In [None]:
while True:

    # Read next frame
    ret, frame = video.read()

    # Check if frame was read
    if ret == False:
        break

    # Update tracker
    found, bbox = tracker.update(frame)

    # If object found, draw bbox
    # x,y,w,h
    if found:
        # Top left corner
        topLeft = (int(bbox[0]), int(bbox[1]))
        # Bottom right corner
        bottomRight = (int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3]))
        # Display bounding box
        cv2.rectangle(frame, topLeft, bottomRight, (0,0,255), 2)
    else:
        # Display status
        cv2.putText(frame, "Object not found", (20,70),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255), 2)

    # Display frame
    cv2.imshow("Tracker",frame)
    k = cv2.waitKey(5)
    if k == 27:
        break
cv2.destroyAllWindows()

## <font color=red>還有哪些API可以用
--------
### [API改版](https://github.com/opencv/opencv_contrib/blob/4fc6995375e384b8e25bd6c8693fa8dd2ed4bb0a/modules/tracking/samples/samples_utility.hpp#L13-L28)

In [None]:
#有移到Legacy的方法，要用下面的方法導入
# tracker = cv2.legacy.TrackerMedianFlow_create()
tracker = cv2.TrackerKCF_create()
ret = tracker.init(frame,bbox)

In [None]:
#有移到Legacy的方法，要用下面的方法導入
# tracker = cv2.legacy.TrackerMedianFlow_create()
tracker = cv2.TrackerMIL_create()
ret = tracker.init(frame,bbox)

## <font color=red>換dlib的追蹤版本試試看
----
![image.png](attachment:image.png)

## <font color=red>導入工具

In [None]:
import cv2
import numpy as np
import dlib

## <font color=red>影片讀進來

In [None]:
video = cv2.VideoCapture("people.mp4")
ret, frame = video.read()

## <font color=red>框選你想要追蹤的東西

In [None]:
bbox = cv2.selectROI(frame)
cv2.destroyAllWindows()

## <font color=red>把框選的座標轉成dlib格式

In [None]:
# need to convert the rectangle given by the cv2.selectROI function into Dlib's rectangle type
(topLeftX, topLeftY, w, h) = bbox
bottomRightX = topLeftX + w
bottomRightY = topLeftY + h
#dlib是用四角座標
dlibRect = dlib.rectangle(topLeftX, topLeftY, bottomRightX, bottomRightY)

## <font color=red>啟動dlib辨識機器人

In [None]:
#要餵dlib吃RGB
rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
#初始化，並啟動tracker
tracker = dlib.correlation_tracker()
tracker.start_track(rgb, dlibRect)

## <font color=red>咱們就一路辨識下去吧!!

In [None]:
while True:

    # Read next frame
    ret, frame = video.read()

    # Check if frame was read
    if ret == False:
        break
    # Convert frame to RGB
    rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    # 看新的一幀，繼續track
    tracker.update(rgb)

    #獲得追蹤物件的新位址
    objectPosition = tracker.get_position()

    topLeftX = int(objectPosition.left())

    topLeftY = int(objectPosition.top())

    bottomRightX = int(objectPosition.right())

    bottomRightY = int(objectPosition.bottom())

    # 畫一下新位址
    cv2.rectangle(frame, (topLeftX, topLeftY), (bottomRightX, bottomRightY), (0,0,255), 2)

    # Display frame
    cv2.imshow("Tracker",frame)
    k = cv2.waitKey(5)
    if k == 27:
        break
cv2.destroyAllWindows()

## <font color=red>難道追蹤都要選擇ROI嗎?
------
    當然不用，你只要可以自動識別物件，然後丟給追蹤API，那就開始追蹤囉!!

## <font color=red>前傳-MobileNet SSD上場
------
    這是一個CNN的模型，是用caffe做出來的模型，可以提供20個類別的辨識能力

## <font color=red>導入工具

In [None]:
import cv2
import numpy as np

## <font color=red>導入模型

In [None]:
net = cv2.dnn.readNetFromCaffe('MobileNetSSD_deploy.prototxt.txt', 'MobileNetSSD_deploy.caffemodel')

## <font color=red>建立辨識類別列表

In [None]:
categories = { 0: 'background', 1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat',
              9: 'chair', 10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', 14: 'motorbike', 15: 'person',
              16: 'pottedplant', 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'}

## <font color=red>讀入辨識目標

In [None]:
image = cv2.imread('frame.png')
cv2.imshow("Tracker",image)
cv2.waitKey()
cv2.destroyAllWindows()
image.shape

(720, 1280, 3)

## <font color=red>把目標轉成模型需要的blob格式

In [None]:
#調整成固定尺寸，才有辦法辨識，所以就調成300*300
#0.007843=>scalefactor:圖像各通道數值的縮放比例
#後面127.5，用於各通道減去的值，以降低光照的影響
#簡單的說，我們需要透過這個過程把圖片弄成MobileNetSSD可以吃的尺寸啦!
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300, 300), (127.5,127.5,127.5))

## <font color=red>啟動機器人，偵測吧!!

In [None]:
net.setInput(blob)
detections = net.forward()

In [None]:
#資料內容[0,歸屬類別,信心程度，四點座標]
detections

array([[[[ 0.        , 15.        ,  0.9279718 ,  0.81247264,
           0.29258215,  0.91479486,  0.6521735 ],
         [ 0.        , 15.        ,  0.48497418,  0.31316453,
           0.27825695,  0.37916064,  0.6593019 ],
         [ 0.        ,  0.        ,  0.        ,  0.        ,
           0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ,
           0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ,
           0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ,
           0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ,
           0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ,
           0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  0.        ,
           0.        ,  0.        ,  0.   

In [None]:
detections.shape

(1, 1, 100, 7)

## <font color=red>顯示結果

In [None]:
#我們需要有原本圖片尺寸的資訊，因為後面需要把偵測結果恢復真實值，才能顯示
(h, w) = image.shape[:2]
#做個顏色表，讓不同類別可以顯示不同顏色
#需要轉成list格式，後面畫框框的顏色才塞得進去，它不讀np.array
colors = np.random.randint(0, 255, size=(len(categories), 3)).tolist()

for i in np.arange(0, detections.shape[2]):
    #取出信心程度
    confidence = detections[0, 0, i, 2]
    #把大於0.2的拿出來特別處理
    if confidence >0.2:
        #下面就是類別判定結果，可能是人啊!可能是車啊!
        idx = int(detections[0, 0, i, 1])
        #取出座標點，但是這裡顯示的是比例，所以我們透過原本的圖片尺寸做還原
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        #座標沒有小數，所以再處理一下
        (startX, startY, endX, endY) = box.astype("int")
        #配一個類別專屬顏色，把框畫出來
        cv2.rectangle(image, (startX, startY), (endX, endY), colors[idx], 2)
        #準備一下label
        label = "{}: {:.2f}%".format(categories[idx], confidence * 100)
        #準備一下show label的位置
        #如果上面沒空間就放下面一點
        y = startY - 15 if startY - 15>15 else startY + 15
        cv2.putText(image, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
cv2.imshow("Output", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

## <font color=red ><小測驗>用MobileNet SSD辨識一下資料夾裡面的影像檔soccer2吧!

## <font color=red ><答案>

In [None]:
import cv2
import numpy as np

net = cv2.dnn.readNetFromCaffe('MobileNetSSD_deploy.prototxt.txt', 'MobileNetSSD_deploy.caffemodel')

categories = { 0: 'background', 1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat',
              9: 'chair', 10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', 14: 'motorbike', 15: 'person',
              16: 'pottedplant', 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'}

colors = np.random.randint(0, 255, size=(len(categories), 3)).tolist()

video = cv2.VideoCapture("soccer2.mp4")

while True:
    # grab the next frame from the video file
    (grabbed, frame) = video.read()

    # check to see if we have reached the end of the video file
    if frame is None:
        break
    (h, w, c) = frame.shape

    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300), (127.5,127.5,127.5))
    net.setInput(blob)
    detections = net.forward()

    if len(detections) > 0:
        #看一下前面的detection.shape你就會知道了
        for i in np.arange(0, detections.shape[2]):
            #取出信心值
            confidence = detections[0, 0, i, 2]
            if confidence >0.2:
                #類別編號
                idx = int(detections[0, 0, i, 1])
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")
                cv2.rectangle(frame, (startX, startY), (endX, endY), colors[i], 2)

                label = "{}: {:.2f}%".format(categories[idx], confidence * 100)
                y = startY - 15 if startY - 15>15 else startY + 15
                cv2.putText(frame, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[i], 2)


    cv2.imshow("Tracker",frame)
    k = cv2.waitKey(20)
    if k == 27:
        break
cv2.destroyAllWindows()

## <font color=red >上面的方法，叫做"逐頁辨識"，但是我要的是追蹤...

## <font color=red ><小測驗>幫我想一下，怎麼讓MobileNet SSD辨識後，開始自動追蹤...

## <font color=red ><答案>

In [None]:
import cv2
import numpy as np
import dlib

net = cv2.dnn.readNetFromCaffe('MobileNetSSD_deploy.prototxt.txt', 'MobileNetSSD_deploy.caffemodel')

categories = { 0: 'background', 1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', 5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat',
              9: 'chair', 10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', 14: 'motorbike', 15: 'person',
              16: 'pottedplant', 17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor'}

#做一個顏色集
colors = np.random.randint(0, 255, size=(len(categories), 3)).tolist()

video = cv2.VideoCapture("soccer2.mp4")


#我們需要記住我們一開始辨識出來的東西，方便後面追蹤
trackers = []
labels = []
idxs=[]

while True:
    (grabbed, frame) = video.read()
    # check to see if we have reached the end of the video file
    if frame is None:
        break
    (h, w, c) = frame.shape

    #沒有tracker的時候，就是要先找到要追蹤的目標
    if len(trackers) == 0:

            blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300),  (127.5,127.5,127.5))
            net.setInput(blob)
            detections = net.forward()

            if len(detections) > 0:
                #針對每個偵測到的物件開始來做處理
                for i in np.arange(0, detections.shape[2]):
                    confidence = detections[0, 0, i, 2]
                    if confidence >0.2:
                        idx = int(detections[0, 0, i, 1])
                        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                        (startX, startY, endX, endY) = box.astype("int")
                        cv2.rectangle(frame, (startX, startY), (endX, endY), colors[idx], 2)

                        label = "{}: {:.2f}%".format(categories[idx], confidence * 100)
                        y = startY - 15 if startY - 15>15 else startY + 15
                        cv2.putText(frame, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[i], 2)

                        #上面已經抓到物件
                        #那就建立tracker，把座標給tracker
                        tracker = dlib.correlation_tracker()
                        rect = dlib.rectangle(startX, startY, endX, endY)
                        #開始追蹤
                        tracker.start_track(frame, rect)

                        #把這些鎖定的資料和追蹤器都記錄起來
                        #每個物件都有自己的追蹤器和標註
                        trackers.append(tracker)
                        labels.append(label)
                        idxs.append(idx)


    #如果有了追蹤目標，那就是一路給他追蹤下去囉!!
    else:
           for (tracker, label, idx) in zip(trackers, labels,idxs):
                   #有新的frame進來，那就問一下追蹤器，有改變嗎?
                   #有就更新一下囉!!
                   tracker.update(frame)
                   pos = tracker.get_position()

                   startX = int(pos.left())
                   startY = int(pos.top())
                   endX = int(pos.right())
                   endY = int(pos.bottom())

                   cv2.rectangle(frame, (startX, startY), (endX, endY),colors[idx], 2)
                   y = startY - 15 if startY - 15>15 else startY + 15
                   cv2.putText(frame, label, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)



    cv2.imshow("Tracker",frame)
    k = cv2.waitKey(20)
    if k == 27:
        break
cv2.destroyAllWindows()