In [82]:
import cv2
import os
from random import shuffle
import random
from sklearn import svm
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
import progressbar

sift = cv2.SIFT()

def concat(descs, kp):
    return np.concatenate((descs, np.asarray((kp.pt[0], kp.pt[1]), dtype=np.float32)))
        

def concat2(descs, kp):
    return np.concatenate((descs[:-2], np.asarray((descs[-2] - kp[0], descs[-1] - kp[1]), dtype=np.float32)))

#dot[0] - x; dot[1] - y;
#box[0][0] - x1; box[0][1] - x2
#box[1][0] - x2; box[1][1] - y2
def dot_in_box(box, dot):
    if dot[0] >= box[0][0] - 5 and dot[0] <= box[1][0] + 5 and dot[1] >= box[0][1] - 5 and dot[1] <= box[1][1] + 5:
        return True
    else:
        return False

def get_bbox(value):
    bbox = []
    while True:
        pos_a = value.find('(')
        if pos_a == -1:
            return bbox
        pos_b = value.find(')')
        if pos_b == -1:
            return bbox
        substr = value[pos_a + 1:pos_b]
        box = [int(i) for i in substr.split(',')]
        bbox.append([(box[1], box[0]),(box[3], box[2])])
        if pos_b + 1 >= len(value):
            return bbox
        value = value[pos_b + 1:]

#get data
annotations_path = "/home/hotoru/datasets/cvpr15/annotations/drones/"
data_path = "/home/hotoru/datasets/cvpr15/videos/drones/"

annotations = os.listdir(annotations_path)
data = os.listdir(data_path)

annotations.sort()
data.sort()

for i in range(0, len(annotations)):
    annotations[i] = annotations_path + annotations[i]

for i in range(0, len(data)):
    data[i] = data_path + data[i]

# Parameters for lucas kanade optical flow
lk_params = dict( winSize  = (20,20),
                  maxLevel = 2,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))


#extract features from files

dataset = []
# for file in range(0, len(data)):
for file in range(0, 3):
    annotation_file = open(annotations[file], 'r').readlines()
    vCap = cv2.VideoCapture(data[file])
    print("file: " + str(file))
    bar = progressbar.ProgressBar(maxval=len(annotation_file), \
    widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
    
    bar.start()
    ret = vCap.set(1, 0)
    
    (ret, old_frame) = vCap.read()
    for i in range(1, len(annotation_file)):
        bbox = get_bbox(annotation_file[i])
        ret = vCap.set(1, i)
        (ret, new_frame) = vCap.read()
        if ret == False: break
        if len(bbox) > 0:
            (kp, descs) = sift.detectAndCompute(new_frame, None)
            features = []
            pos_elem = 0
            neg_elem = 0
            for k in range(0, len(kp)):
                dot_in = False
                for box in bbox:
                    if dot_in_box(box, kp[k].pt):
                        dot_in = True
                if dot_in:
                    features.append([concat(descs[k], kp[k]), 1])
                    pos_elem = pos_elem + 1
                else:
                    features.append([concat(descs[k], kp[k]), 0])
                    neg_elem = neg_elem + 1
                    
            features = sorted(features, key = lambda x: x[1])
#             print("pos: " + str(pos_elem) + " neg: " + str(neg_elem))
#             pos_elem = int(pos_elem / 5.0)
#             neg_len = neg_elem
#             if int(pos_elem * 4) < neg_elem:
#                 neg_len = int(pos_elem * 4)
#             features = features[0:neg_len] + features[-pos_elem:]
            kp_flow = []
            for point in features:
                kp_flow.append(np.asarray((point[0][-2],point[0][-1]), dtype=np.float32))
            kp_flow = np.asarray(kp_flow, dtype=np.float32)
            
            kp1, st, err = cv2.calcOpticalFlowPyrLK(new_frame, old_frame, kp_flow, None, **lk_params)
            kp1_new = kp1[st==1]
            
            for box in bbox:
                cv2.rectangle(new_frame, box[0], box[1], (20, 220, 20), 2)
            img = new_frame.copy()
            img = cv2.drawKeypoints(img, kp, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
            for p in range(0, len(kp_flow)):
                cv2.arrowedLine(img, (kp1[p][0], kp1[p][1]), (kp_flow[p][0], kp_flow[p][1]),(80,220,150),1)
                features[p][0] = concat2(features[p][0], kp_flow[p])
                
            dataset = dataset + features
        old_frame = new_frame
        bar.update(i)
        cv2.imshow("vid", img)
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
cv2.destroyAllWindows()
bar.finish()

#create dataset
print("len dataset: " + str(len(dataset)))
shuffle(dataset)
pos_elems = 0
neg_elems = 0
X = []
Y = []
for elem in dataset:
    X.append(elem[0])
    Y.append(elem[1])
    if elem[1] == 1:
        pos_elems = pos_elems + 1
    else:
        neg_elems = neg_elems + 1
        
print("pos: " + str(pos_elems) + " neg: " + str(neg_elems))



X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

X_train_normalized = preprocessing.normalize(X_train, norm='l2')
X_test_normalized = preprocessing.normalize(X_test, norm='l2')


# min_max_scaler = preprocessing.MinMaxScaler()
# X_train_minmax = min_max_scaler.fit_transform(X_train)
# X_test_minmax = min_max_scaler.fit_transform(X_test)



file: 0


[                                                                        ]   0%

file: 1




file: 2




len dataset: 55524
pos: 11949 neg: 43575


In [86]:
print(X_train[0])

print(X_train_normalized[0])


[   1.    0.    0.    0.  103.   60.    0.    0.  140.    3.    0.    0.
   17.   37.    5.   15.  174.   13.    0.    0.    0.    0.    0.    8.
   30.    0.    0.    0.    3.    2.    2.   11.    6.    0.    0.    0.
  123.   64.    0.    1.  122.    5.    0.    0.   30.   50.    5.    8.
  174.   27.    0.    0.    0.    0.    1.    8.   47.    1.    0.    0.
    2.    2.    6.   15.   11.    1.    0.    0.  150.   24.    0.    1.
   81.    3.    0.    0.   47.   28.    2.    6.  174.   32.    0.    0.
    0.    1.    1.    5.   68.    5.    0.    0.    1.    1.    1.   12.
   14.    2.    0.    0.  116.   19.    0.    0.   42.    3.    0.    0.
   40.   10.    0.    1.  174.   38.    0.    0.    1.    0.    0.    1.
   74.   15.    0.    0.    0.    0.    1.    7.    0.    0.]
[ 0.00195228  0.          0.          0.          0.20108445  0.11713657
  0.          0.          0.27331867  0.00585683  0.          0.          0.0331887
  0.07223422  0.00976138  0.02928414  0.33969606  0

In [87]:
print("training svm")
clf = svm.LinearSVC()
# clf = svm.SVC(cache_size=5000)
clf.fit(X_train_normalized, y_train)
val = clf.score(X_test_normalized, y_test)
print("res " + str(val))

training svm
res 0.80945520036


In [91]:

from sklearn.cluster import DBSCAN
def boundRect(kp):
    box = [[kp[0][0], kp[0][1]], [kp[0][0], kp[0][1]]] #x1, y1; x2, y2
    rad = [0, 0]
    for p in kp:
        if p[0] < box[0][0]:
            box[0][0] = p[0]
            rad[0] = p[2]
        elif p[0] > box[1][0]:
            box[1][0] = p[0]
            rad[1] = p[2]
        if p[1] < box[0][1]:
            box[0][1] = p[1]
            rad[0] = p[2]
        elif p[1] > box[1][1]:
            box[1][1] = p[1]
            rad[1] = p[2]
    return [(box[0][0] - rad[0], box[0][1] - rad[0]), (box[1][0] + rad[1], box[1][1] + rad[1])]

import random

#testing
num_file = 1
annotation_file = open(annotations[num_file], 'r').readlines()
vCap = cv2.VideoCapture(data[num_file])

ret = vCap.set(1, 0)
(ret, old_frame) = vCap.read()
for i in range(1, len(annotation_file) - 10):
    bbox = get_bbox(annotation_file[i])
    ret = vCap.set(1, i)
    (ret, new_frame) = vCap.read()
    if ret == False: break
    (kp, descs) = sift.detectAndCompute(new_frame, None)
    kp_flow = []
    for point in kp:
        kp_flow.append(np.asarray((point.pt[0],point.pt[1]), dtype=np.float32))
    kp_flow = np.asarray(kp_flow, dtype=np.float32)
    kp1, st, err = cv2.calcOpticalFlowPyrLK(new_frame, old_frame, kp_flow, None, **lk_params)
    
    features = [concat(descs[k], kp[k]) for k in range(0, len(kp))]
    features = [concat2(features[k], kp1[k]) for k in range(0, len(kp))]
    descs_minmax = preprocessing.normalize(features, norm='l2')
    
#     descs_minmax = [concat(descs[k], kp[k]) for k in range(0, len(kp))]
    lables = clf.predict(descs_minmax)
    kp_pred = []
    kp_norm = []
    for k in range(0, len(lables)):
        if lables[k] == 1:
            kp_pred.append(kp[k])
#             kp_pred.append((kp[k].pt[0], kp[k].pt[1]))
            kp_norm.append([int(kp[k].pt[0]), int(kp[k].pt[1]), int(kp[k].size / 2)])
#             cv2.circle(frame, (kp_norm[-1][0], kp_norm[-1][1]), int(kp_norm[-1][2]), (200, 20, 20), 5)

#     cluster_num = 4

    old_frame = new_frame.copy()
#     if len(kp_norm) < cluster_num:
#         cluster_num = len(kp_norm)
    print("kp_norm: " + str(len(kp_norm)))
    if len(kp_norm) > 0:
        db = DBSCAN(eps=50, min_samples=3).fit(kp_norm)
        n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
        print("n_clusters: " + str(n_clusters_))
        print(" db.lables_: "+ str(len(db.labels_)))
#         kmeans = KMeans(n_clusters=cluster_num, random_state=0, max_iter=500).fit(kp_norm)
        colors = []
        clusters = []
        for l in range(0, n_clusters_):
            clusters.append([])
            colors.append((random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
            for k in range(0, len(kp_norm)):
#                 if kmeans.labels_[k] == l:
                if db.labels_[k] == l:
                    clusters[l].append(np.asarray((kp_norm[k][0], kp_norm[k][1], kp_norm[k][2]), dtype=np.int))
            if len(clusters[l]) > 0:
                bound_box = boundRect(clusters[l])
                cv2.rectangle(new_frame, bound_box[0], bound_box[1], (50, 50, 250), 2)
#         for k in range(0, len(kp_norm)):
#             cv2.circle(new_frame, (kp_norm[k][0], kp_norm[k][1]), kp_norm[k][2], colors[db.labels_[k]], 1)
#             cv2.circle(new_frame, (kp_norm[k][0], kp_norm[k][1]), kp_norm[k][2], colors[kmeans.labels_[k]], 1)
            
#     print("kp: " + str(len(kp)) + " kp_pred: " + str(len(kp_pred)))
    
#     frame = cv2.drawKeypoints(frame, kp, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

#     for box in bbox:
#         cv2.rectangle(frame, box[0], box[1], (20, 220, 20), 2)
    
    cv2.imshow("vid", new_frame)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()   

kp_norm: 0
kp_norm: 3
n_clusters: 0
 db.lables_: 3
kp_norm: 3
n_clusters: 0
 db.lables_: 3
kp_norm: 8
n_clusters: 1
 db.lables_: 8
kp_norm: 5
n_clusters: 1
 db.lables_: 5
kp_norm: 5
n_clusters: 1
 db.lables_: 5
kp_norm: 12
n_clusters: 1
 db.lables_: 12
kp_norm: 8
n_clusters: 2
 db.lables_: 8
kp_norm: 7
n_clusters: 1
 db.lables_: 7
kp_norm: 11
n_clusters: 0
 db.lables_: 11
kp_norm: 8
n_clusters: 1
 db.lables_: 8
kp_norm: 1
n_clusters: 0
 db.lables_: 1
kp_norm: 8
n_clusters: 1
 db.lables_: 8
kp_norm: 2
n_clusters: 0
 db.lables_: 2
kp_norm: 0
kp_norm: 4
n_clusters: 0
 db.lables_: 4
kp_norm: 2
n_clusters: 0
 db.lables_: 2
kp_norm: 6
n_clusters: 1
 db.lables_: 6
kp_norm: 3
n_clusters: 0
 db.lables_: 3
kp_norm: 1
n_clusters: 0
 db.lables_: 1
kp_norm: 2
n_clusters: 0
 db.lables_: 2
kp_norm: 7
n_clusters: 1
 db.lables_: 7
kp_norm: 6
n_clusters: 1
 db.lables_: 6
kp_norm: 8
n_clusters: 2
 db.lables_: 8
kp_norm: 7
n_clusters: 1
 db.lables_: 7
kp_norm: 11
n_clusters: 1
 db.lables_: 11
kp_norm: 5
n