# Sample example

In [1]:
from absl import flags
import sys
FLAGS = flags.FLAGS
FLAGS(sys.argv[:1])

import time # 프레임 당 시간 계산하기위해서 필요함
import numpy as np
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
from yolov3_tf2.models import YoloV3
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import convert_boxes

from deep_sort import preprocessing # NMS
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from deep_sort.tracking_utils import match_detections_with_tracks
from tools import generate_detections as gdet # feature generation

class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] # [car, person, ... ]
yolo = YoloV3(classes=len(class_names))
yolo.load_weights('./weights/yolov3.tf')

max_cosine_distance = 0.5 # 0.5보다 클 경우 유사하다는 의미
nn_budget = None
nms_max_overlap = 0.8

# class 선언
model_filename = 'model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget)
tracker = Tracker(metric)

# frame 단위로 video capture
vid = cv2.VideoCapture('./data/video/test.mp4')

codec = cv2.VideoWriter_fourcc(*'XVID')
vid_fps = int(vid.get(cv2.CAP_PROP_FPS)) # CAP_PROP_FPS는 float을 반환해서 int로 바꿔줌 
vid_width, vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('./data/video/results_deepsort_sample_data.avi', codec, vid_fps, (vid_width, vid_height))

from _collections import deque
pts = [deque(maxlen=30) for _ in range(1000)]

counter = []

while True:
    _, img = vid.read() # img : ndarray (height, width,channel) 한장씩 받아옴
    if img is None:
        print('Completed')
        break

    img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_in = tf.expand_dims(img_in, 0) # detector에 넣어주기 위해서 (1,height, width, channel)로 만들어줌 >> tf.Tensor로 바뀜
    img_in = transform_images(img_in, 416)  # 1. image resize >> 416으로, 2. 픽셀값 255로 나눠줌. 

    t1 = time.time()
    
    '''
    numpy array로 받음
    bounding box 갯수 : 100개
    boxes, 3D shape (1, 100, 4) ; 4 : l,t,r,b
    scores, 2D shape (1, 100) : confidence score
    classes, 2D shape(1, 100) : box에 있는 물체의 class 번호
    nums, 1D shape(1,) : 감지된 물체의 총 수
    '''
    boxes, scores, classes, nums = yolo.predict(img_in)
    
    classes = classes[0]
    names = []
    for i in range(len(classes)):
        names.append(class_names[int(classes[i])])
    names = np.array(names)
    converted_boxes = convert_boxes(img, boxes[0]) # box shape : x_min, y_min, w, h로 바뀜
    features = encoder(img, converted_boxes)

    # 박스정보 : left, top, width, height
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                  zip(converted_boxes, scores[0], names, features)]

    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]

    tracker.predict() # 칼만 필터로 예측
    tracker.update(detections)

    cmap = plt.get_cmap('tab20b') # 숫자를 색상에 매핑
    colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] # 20개 색상 생성

    current_count = int(0)

    for track in tracker.tracks:
        # update하지 않음
        if not track.is_confirmed() or track.time_since_update > 1:
            continue

        bbox = track.to_tlbr() # cv2 출력에 사용됨 min_x, min_y, max_x, max_y
        class_name = track.get_class()
        color = colors[int(track.track_id) % len(colors)]
        color = [i*255 for i in color]

        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2) # lt, rb
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
                    +len(str(track.track_id)))*17,int(bbox[1])), color, -1) # id box
        cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
                    (255,255,255), 2)
        
        '''
        center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2)) # x,y center 좌표
        pts[track.track_id].append(center)

        # 중심점 진행방향 line 그리기
        for j in range(1, len(pts[track.track_id])):
            if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                continue
            thickness = int(np.sqrt(64/float(j+1))*2)
            cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)
            
        height, width, _ = img.shape
        cv2.line(img, (0, int(3*height/6)), (width, int(3*height/6)), (0, 255, 0), thickness=2)
        #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
        #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)

        center_y = int(((bbox[1])+(bbox[3]))/2)

        if center_y <= int(3*height/6+height/30) and center_y >= int(3*height/6-height/30):
            if class_name == 'car' or class_name == 'truck':
                counter.append(int(track.track_id))
                current_count += 1

    total_count = len(set(counter))

    cv2.putText(img, 'Total Vehicle Count: ' + str(total_count), (0,130), 0, 1, (0,0,255), 2)
    cv2.putText(img, 'Current Vehicle Count: ' + str(current_count), (0,80), 0, 1, (0,0,255), 2)
    '''
    
    fps  = 1./(time.time()-t1)
    cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
    cv2.namedWindow("output", cv2.WINDOW_NORMAL)
    cv2.resizeWindow('output', 1024, 768)
    cv2.imshow('output', img)
    out.write(img)

    if cv2.waitKey(1) == ord('q'): # 휴식
        break
vid.release()
out.release()
cv2.destroyAllWindows()






Completed


## VisDrone

In [17]:
# 이미지 파일 사용 시
from absl import flags
import sys
FLAGS = flags.FLAGS
FLAGS(sys.argv[:1])

import time # 프레임 당 시간 계산하기위해서 필요함
import numpy as np
import cv2
import matplotlib.pyplot as plt
import glob

import tensorflow as tf
from yolov3_tf2.models import YoloV3
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import convert_boxes

from deep_sort import preprocessing # NMS
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from deep_sort.tracking_utils import match_detections_with_tracks
from tools import generate_detections as gdet # feature generation

class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] # [car, person, ... ]
yolo = YoloV3(classes=len(class_names))
yolo.load_weights('./weights/yolov3.tf')

max_cosine_distance = 0.5 # 0.5보다 클 경우 유사하다는 의미
nn_budget = None
nms_max_overlap = 0.8

# class 선언
model_filename = 'model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget)
tracker = Tracker(metric)

vis_directory = 'C:/Users/Son\Documents/aiffel/SIA-MOT/VisDrone2019-MOT-val/VisDrone2019-MOT-val/sequences/uav0000086_00000_v/'
images_filepaths = sorted(glob.glob(vis_directory+"*"))
print(len(images_filepaths))

a = cv2.imread(images_filepaths[1])
a_height, a_width = a.shape[0], a.shape[1]
codec = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('./data/video/deepsort_vis_results.avi', codec, 30, (a_width, a_height))

detections_dict = {}
for frame_id, img in enumerate(images_filepaths):
    if img is None:
        print('Completed')
        break
    img = cv2.imread(img)
    img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_in = tf.expand_dims(img_in, 0) # detector에 넣어주기 위해서 (1,height, width, channel)로 만들어줌 >> tf.Tensor로 바뀜
    img_in = transform_images(img_in, 416)  # 1. image resize >> 416으로, 2. 픽셀값 255로 나눠줌. 

    t1 = time.time()
    
    '''
    numpy array로 받음
    bounding box 갯수 : 100개
    boxes, 3D shape (1, 100, 4) ; 4 : l,t,r,b
    scores, 2D shape (1, 100) : confidence score
    classes, 2D shape(1, 100) : box에 있는 물체의 class 번호
    nums, 1D shape(1,) : 감지된 물체의 총 수
    '''
    boxes, scores, classes, nums = yolo.predict(img_in)
    
    classes = classes[0]
    names = []
    for i in range(len(classes)):
        names.append(class_names[int(classes[i])])
    names = np.array(names)
    converted_boxes = convert_boxes(img, boxes[0]) # box shape : x_min, y_min, w, h로 바뀜
    features = encoder(img, converted_boxes)

    # 박스정보 : left, top, width, height
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                  zip(converted_boxes, scores[0], names, features)]

    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]
    
    boxs = np.array([d.to_xyxy() for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    
    
    tracker.predict() # 칼만 필터로 예측
    tracker.update(detections)
    
    # 현재 detection box와 현재추적중인 track사이의 IoU matching
    # detections에 track_id 넣어주기 위해 matching 시키는 것으로 추정
    track_id = match_detections_with_tracks(boxs=boxs, tracks=tracker.tracks)
    for i,detection in enumerate(detections):
        detection.add_track_id(track_id[i])
    
    mask = np.array([detection.track_id is not None for detection in detections], dtype=bool)
    classes = classes[mask]
    detections = list(np.array(detections)[mask])

    
    detections_dict[frame_id+1]= detections
    
    cmap = plt.get_cmap('tab20b') # 숫자를 색상에 매핑
    colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] # 20개 색상 생성

    current_count = int(0)

    for track in tracker.tracks:
        # update하지 않음
        if not track.is_confirmed() or track.time_since_update > 1:
            continue

        bbox = track.to_tlbr() # cv2 출력에 사용됨 min_x, min_y, max_x, max_y
        class_name = track.get_class()
        color = colors[int(track.track_id) % len(colors)]
        color = [i*255 for i in color]

        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2) # lt, rb
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
                    +len(str(track.track_id)))*17,int(bbox[1])), color, -1) # id box
        cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
                    (255,255,255), 2)
        
        '''
        center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2)) # x,y center 좌표
        pts[track.track_id].append(center)

        # 중심점 진행방향 line 그리기
        for j in range(1, len(pts[track.track_id])):
            if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                continue
            thickness = int(np.sqrt(64/float(j+1))*2)
            cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)
            
        height, width, _ = img.shape
        cv2.line(img, (0, int(3*height/6)), (width, int(3*height/6)), (0, 255, 0), thickness=2)
        #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
        #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)

        center_y = int(((bbox[1])+(bbox[3]))/2)

        if center_y <= int(3*height/6+height/30) and center_y >= int(3*height/6-height/30):
            if class_name == 'car' or class_name == 'truck':
                counter.append(int(track.track_id))
                current_count += 1

    total_count = len(set(counter))

    cv2.putText(img, 'Total Vehicle Count: ' + str(total_count), (0,130), 0, 1, (0,0,255), 2)
    cv2.putText(img, 'Current Vehicle Count: ' + str(current_count), (0,80), 0, 1, (0,0,255), 2)
    '''
    
    fps  = 1./(time.time()-t1)
    cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
    cv2.namedWindow("output", cv2.WINDOW_NORMAL)
    cv2.resizeWindow('output', 1024, 768)
    cv2.imshow('output', img)
    out.write(img)

    if cv2.waitKey(1) == ord('q'): # 휴식
        break
vid.release()
out.release()
cv2.destroyAllWindows()

464








NameError: name 'vid' is not defined

## Metric

In [18]:
from metric.io import read_results, unzip_objs

import os
import numpy as np
import copy
import motmetrics as mm
mm.lap.default_solver = 'lap'


data_root = "C:/Users/Son/Documents/aiffel/SIA-MOT/VisDrone2019-MOT-val/VisDrone2019-MOT-val/annotations"
seq_name = 'uav0000086_00000_v'
data_type = 'mot'

gt_filename = os.path.join(data_root, seq_name + '.txt')

# {frame_num : (tlwh, target_id, score)}
gt_frame_dict = read_results(gt_filename, data_type, is_gt=True)
gt_ignore_frame_dict = read_results(gt_filename, data_type, is_ignore=True)

In [19]:
from cython_bbox import bbox_overlaps as bbox_ious

def ious(atlwhs, btlwhs):
    """
    Compute cost based on IoU
    :type atlbrs: list[tlbr] | np.ndarray
    :type atlbrs: list[tlbr] | np.ndarray

    :rtype ious np.ndarray
    """
    atlbrs = np.concatenate([atlwhs[:,:2], atlwhs[:,2:] + atlwhs[:,:2]], axis=1)
    btlbrs = np.concatenate([btlwhs[:,:2], btlwhs[:,2:] + btlwhs[:,:2]], axis=1)
    
    ious = np.zeros([atlbrs.shape[0], btlbrs.shape[0]], dtype=np.float)
    if ious.size == 0:
        return ious
    
    trk_tlwhs_list = [i for i in trk_tlwhs]
    gt_tlwhs_list = [i for i in gt_tlwhs]
    
    ious = bbox_ious(
        np.ascontiguousarray(atlbrs, dtype=np.float),
        np.ascontiguousarray(btlbrs, dtype=np.float)
    )

    return ious

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from cython_bbox import bbox_overlaps as bbox_ious


In [20]:
from deep_sort.matching import ious

accs=[]
names=[]
similarity_scores=[]
num_tracker_dets=set()
num_gt_dets=set()
gtrue_ids=[]
tracker_ids=[]
for i in range(1, len(gt_frame_dict)+1):

    # results
    trk_objs = detections_dict.get(i,[])
    trk_tlwhs = []
    trk_ids = []
    for a in trk_objs:
        trk_tlwhs.append(a.tlwh)
        trk_ids.append(a.track_id)
    trk_tlwhs = np.asarray(trk_tlwhs)
    trk_ids = np.asarray(trk_ids)
    
    # gts
    gt_objs = gt_frame_dict.get(i, [])
    gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]

    # ignore boxes
    ignore_objs = gt_ignore_frame_dict.get(i, [])
    ignore_tlwhs = unzip_objs(ignore_objs)[0]

    # remove ignored results
    keep = np.ones(len(trk_tlwhs), dtype=bool)
    iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
    if len(iou_distance) > 0:
        match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
        match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
        match_ious = iou_distance[match_is, match_js]

        match_js = np.asarray(match_js, dtype=int)
        match_js = match_js[np.logical_not(np.isnan(match_ious))]
        keep[match_js] = False
        trk_tlwhs = trk_tlwhs[keep]
        trk_ids = trk_ids[keep]
    
    ious_ = ious(gt_tlwhs, trk_tlwhs)
    iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
    
    acc = mm.MOTAccumulator(auto_id=True)
    acc.update(gt_ids, trk_ids, iou_distance)
    rtn_events=False

    if rtn_events and iou_distance.size > 0 and hasattr(acc, 'last_mot_events'):
        events = acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
    else:
        events = None
    accs.append(acc)
    names.append(i)
    
    # save
    similarity_scores.append(ious_)
    num_tracker_dets = num_tracker_dets | set(trk_ids)
    num_gt_dets = num_gt_dets | set(gt_ids)
    gtrue_ids.append(np.array(gt_ids))
    tracker_ids.append(trk_ids)

# array로 바꾸기
similarity_scores = np.asarray(similarity_scores)
num_tracker_dets=len(num_tracker_dets)
num_gt_dets= len(num_gt_dets)
gtrue_ids=np.asarray(gtrue_ids)
tracker_ids=np.asarray(tracker_ids)

  similarity_scores = np.asarray(similarity_scores)
  gtrue_ids=np.asarray(gtrue_ids)
  tracker_ids=np.asarray(tracker_ids)


In [21]:
metric_dir='C:/Users/Son/Documents/aiffel/SIA-MOT/workplace/yolov3_deepsort/metric_input'

if os.path.isdir(metric_dir):
    pass
else:
    os.makedirs(metric_dir)
    
np.save(metric_dir + "/similarity_scores",similarity_scores)
np.save(metric_dir + "/num_tracker_dets",num_tracker_dets)
np.save(metric_dir + "/num_gt_dets",num_gt_dets)
np.save(metric_dir + "/gtrue_ids",gtrue_ids)
np.save(metric_dir + "/traker_ids",tracker_ids)


In [None]:
similarity_scores = np.load(metric_dir+"/similarity_scores.npy",allow_pickle=True)
num_tracker_dets=np.load(metric_dir+"/num_tracker_dets.npy",allow_pickle=True)
num_gt_dets=np.load(metric_dir+"/num_gt_dets.npy",allow_pickle=True)
gtrue_ids=np.load(metric_dir+"/gtrue_ids.npy",allow_pickle=True)
tracker_ids=np.load(metric_dir+"/traker_ids.npy",allow_pickle=True)

In [22]:
def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
    names = copy.deepcopy(names)
    if metrics is None:
        metrics = mm.metrics.motchallenge_metrics
    metrics = copy.deepcopy(metrics)

    mh = mm.metrics.create()
    summary = mh.compute_many(
            accs,
            metrics=metrics,
            names=names,
            generate_overall=True
        )

    return summary

In [23]:
s = get_summary(accs, names)
s

Unnamed: 0,mota,num_switches,idp,idr,idf1,precision,recall
1,0.388889,0,0.818182,0.500000,0.620690,0.818182,0.500000
2,0.361111,0,0.782609,0.500000,0.610169,0.782609,0.500000
3,0.388889,0,0.818182,0.500000,0.620690,0.818182,0.500000
4,0.416667,0,0.800000,0.555556,0.655738,0.800000,0.555556
5,0.361111,0,0.782609,0.500000,0.610169,0.782609,0.500000
...,...,...,...,...,...,...,...
461,0.140845,0,0.857143,0.169014,0.282353,0.857143,0.169014
462,0.126761,0,0.846154,0.154930,0.261905,0.846154,0.154930
463,0.140845,0,0.812500,0.183099,0.298851,0.812500,0.183099
464,0.162162,0,0.833333,0.202703,0.326087,0.833333,0.202703


In [25]:
s['mota'].mean()

0.2688300362030626

In [26]:
from metric.MOTA import CLEAR

# 실제 적용 예시코드
# 데이터셋에서 필요한 정보를 추출하여 data 배열을 생성
data = {
    'num_tracker_dets': num_tracker_dets,
    'num_gt_dets': num_gt_dets,
    'gt_ids': gtrue_ids,
    'tracker_ids': tracker_ids,
    'similarity_scores': similarity_scores,
    'num_timesteps' : len(images_filepaths)
}

# CLEAR 객체 생성
clear = CLEAR()

# eval_sequence 메서드를 사용하여 MOTA 계산
mota = clear.eval_sequence(data)['MOTA']


CLEAR Config:
THRESHOLD            : 0.5                           
PRINT_CONFIG         : True                          


In [27]:
mota

-0.4036101687816727