In [None]:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import patches
import cv2, os, time
import shutil # for folders

FPS_LIMIT = 20

# a) Capture Images
def clear(cam: cv2.VideoCapture):
    # release cam and clear windows
    cam.release()
    cv2.destroyAllWindows()

def capture_video(cam: cv2.VideoCapture):
    Im = []
    # cam = cv2.VideoCapture(1)
    cv2.namedWindow("capture")
    
    # count = 0
    # IMG_PATH = "imgs"
    # G_IMG_PATH = "g_imgs"

    prev = 0
    recording = False
    while True:
        ret, img = cam.read()
        
        if not ret:
            print("failed")
            break

        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        cv2.imshow("capture", img)

        k = cv2.waitKey(1) # 1ms wait
        
        if k % 256 == 32: # space
            recording = True
        
        if (time.time() - prev > 1./FPS_LIMIT and recording):
            Im.append(img_gray)
            # count += 1
            # cv2.imwrite(f"{G_IMG_PATH}/{count}.png")
            prev = time.time()

        if k % 256 == 8 or k % 256 == 27:
            # backspace or esc
            break
    
    # cam.release()
    clear(cam)
    
    return Im

def load_video(path: str, type: str):
    Im = []
    
    files = (os.listdir(path))
    n = len(files)
    for i in range(1,n+1):
        Im.append(cv2.imread(f"{path}/{i}.{type}", cv2.IMREAD_GRAYSCALE))
        # print(f"{path}/{i}.{type}")
    # for file in files:
    #     Im.append(cv2.imread(file, cv2.IMREAD_GRAYSCALE))
    return Im

def load_raw_output(path: str, type: str): # load color
    Im = []
    
    files = (os.listdir(path))
    n = len(files)
    for i in range(1,n+1):
        Im.append(cv2.imread(f"{path}/{i}.{type}", cv2.IMREAD_COLOR))
    return Im

def get_box(frame0):
    return cv2.selectROI("ROI", frame0, fromCenter=False)


# Exercise 1

In [147]:
# cam = cv2.VideoCapture(0, cv2.CAP_FIREWIRE)
cam = cv2.VideoCapture(1)
Im = capture_video(cam)
# Im = load_video("armD32im1", "png")

In [None]:
# Im = load_video("armD32im1", "png")

In [148]:
xt, yt, w, h = get_box(frame0=Im[0])
template = Im[0][yt:yt+h, xt:xt+w]

Select a ROI and then press SPACE or ENTER button!
Cancel the selection process by pressing c button!


In [149]:
dim = [Im[0].shape[1], Im[0].shape[0]]
# dim = [template.shape[1], template.shape[0]]
X = np.arange(0, dim[0], dtype=np.float32)
Y = np.arange(0, dim[1], dtype=np.float32)
Xq, Yq = np.meshgrid(X, Y)

# template_mesh = cv2.remap(template, Xq, Yq, cv2.INTER_LINEAR)

In [151]:
# r(p) = y - f(p) = temp - img(x,y + p)
# r(p) = img(x,y + p) - temp
# phi = r^T r


def min_ssd(frame, template, p, x0, y0, width, height):
    # max_x = frame.shape[1] - width
    # max_y = frame.shape[0] - height
    # x = max(min(int(x0 + p[0]), max_x), 0)
    # y = max(min(int(y0 + p[1]), max_y), 0)

    x = int(x0 + p[0])
    y = int(y0 + p[1])
    
    # X = np.arange(0, template.shape[1], dtype=np.float32)
    # Y = np.arange(0, template.shape[0], dtype=np.float32)
    # Xt, Yt = np.meshgrid(X, Y)

    u, v = np.inf, np.inf

    bbox = frame[y:y+height, x:x+width]
    # bbox = cv2.remap(bbox, Xt, Yt, cv2.INTER_LINEAR)
    
    # dIm = np.float32(bbox) - np.float32(template)
    # r = np.float32(bbox) - np.float32(template)
    max_it = 120
    i=0
    # while np.linalg.norm(r) >= 0.5:
    # print(x,y)
    while np.linalg.norm([u, v]) > 0.1 and i < max_it:
        i+=1
        r = np.float32(bbox) - np.float32(template)
        # r[r<=25] = 0
        r[abs(r)<=40] = 0
        # r_v, r_u = np.gradient(np.float64(r))
        # r_v = np.gradient(np.float64(r), axis = 0).flatten()
        # r_u = np.gradient(np.float64(r), axis = 1).flatten()

        r_v = np.gradient(np.float64(bbox), axis = 0).flatten()
        r_u = np.gradient(np.float64(bbox), axis = 1).flatten()
        
        # r_v = np.gradient(np.float64(template), axis = 0).flatten()
        # r_u = np.gradient(np.float64(template), axis = 1).flatten()
        
        # r_v = r_v/np.linalg.norm(r_v)
        # r_u = r_u/np.linalg.norm(r_u)
        # r_u = r_u/width
        # r_v = r_u/height

        J_r = np.column_stack((r_u, r_v))
        
        s,_,_,_ = np.linalg.lstsq(J_r, -r.flatten())
        
        u = s[0]
        v = s[1]
        p[0] += 0.5 * u
        p[1] += 0.5 * v

        # x = max(min(int(x0 + p[0]), max_x), 0)
        # y = max(min(int(y0 + p[1]), max_y), 0)

        x = int(x0 + p[0])
        y = int(y0 + p[1])

        bbox = frame[y:y+height, x:x+width]
        # bbox = cv2.remap(bbox, Xq, Yq, cv2.INTER_LINEAR)

        # print(np.mean(J_r))
        # print(u,v)
    # print(u,v)
    return (x,y)


In [152]:
p = [0, 0]
tracks = [[xt, yt]]
boxes = [template]

count = 0
for frame in Im[1:]:
    frame_mapped = cv2.remap(frame, Xq, Yq, cv2.INTER_LINEAR)
    # count+=1
    # u,v = min_ssd
    # boxes.append(min_ssd(frame, template, p, xt, yt, w, h))
    tracks.append(min_ssd(frame_mapped, template, p, xt, yt, w, h))
    # tracks.append(min_ssd(frame, template, p, xt, yt, w, h))
    # print(p)
    # print(count)

  s,_,_,_ = np.linalg.lstsq(J_r, -r.flatten())


In [153]:
len(Im)

146

In [154]:
# folder clean up
try:
    shutil.rmtree('output_imgs')
    shutil.rmtree('video')
    print("deleted")
except:
    print("folder(s) dne")

os.mkdir("output_imgs")
os.mkdir("video")
print("folders created")

deleted
folders created


In [145]:
for i in range(len(tracks)):
    fig = plt.figure()
    fig.set_size_inches(1. * dim[0] / dim[1], 1, forward = False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.imshow(Im[i], cmap="gray")
    rect = patches.Rectangle(tracks[i], w, h, linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)

    plt.savefig(f"output_imgs/{i+1}.png", dpi = dim[1])
    plt.clf()
    plt.close()

cv2_fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = load_raw_output("output_imgs", "png")
output_video = cv2.VideoWriter("video/ex2_output.mp4", cv2_fourcc, FPS_LIMIT, (dim[0], dim[1]), True)

for frame in out:
    output_video.write(frame)

cv2.destroyAllWindows()
output_video.release()

# Exercise 2

In [None]:
N_LVLS = 2
SCALE_FACTOR = 1 # 2^SCALE_FACTOR
TRUE_SCALE = 2**SCALE_FACTOR

In [None]:
# def pyramid(frame, template, p, x0, y0, width, height):
def get_pyramid(frame, lvls = 3, sf = 1):
    pyr = [frame] # bottom -> top
    for i in range(lvls):
        next_lvl = pyr[i]
        for _ in range(sf):
            next_lvl = cv2.pyrDown(next_lvl)

        pyr.append(next_lvl)
    
    return pyr[::-1] # top -> bottom

In [None]:
def pyr_ssd(frame, template, p, x0, y0, width, height):
    x = int(x0 + p[0])
    y = int(y0 + p[1])

    U = np.array([0, 0])
    u, v = np.inf, np.inf

    bbox = frame[y:y+height, x:x+width]
    # bbox = cv2.remap(bbox, Xt, Yt, cv2.INTER_LINEAR)
    
    # dIm = np.float32(bbox) - np.float32(template)
    # r = np.float32(bbox) - np.float32(template)
    max_it = 80
    i=0
    # while np.linalg.norm(r) >= 0.5:
    # print(x,y)
    while np.linalg.norm([u, v]) > 0.1 and i < max_it:
        i+=1
        r = np.float32(bbox) - np.float32(template)
        
        r[abs(r)<=40] = 0
        
        r_v = np.gradient(np.float64(bbox), axis = 0).flatten()
        r_u = np.gradient(np.float64(bbox), axis = 1).flatten()
        
        J_r = np.column_stack((r_u, r_v))
        
        s,_,_,_ = np.linalg.lstsq(J_r, -r.flatten())
        
        u = s[0]
        v = s[1]
        p[0] += 0.5 * u
        p[1] += 0.5 * v
        U[0] += 0.5 * u
        U[1] += 0.5 * v
        # x = max(min(int(x0 + p[0]), max_x), 0)
        # y = max(min(int(y0 + p[1]), max_y), 0)

        x = int(x0 + p[0])
        y = int(y0 + p[1])

        bbox = frame[y:y+height, x:x+width]
        # bbox = cv2.remap(bbox, Xq, Yq, cv2.INTER_LINEAR)

        # print(np.mean(J_r))
        # print(u,v)
    # print(u,v)
    # return (x,y)
    return U


In [None]:
p = np.array([0, 0])
tracks = [[xt, yt]]

temp_pyr = get_pyramid(template, lvls=N_LVLS, sf=SCALE_FACTOR)


for frame in Im[1:]:
    
    frame_pyr = get_pyramid(frame, lvls=N_LVLS, sf=SCALE_FACTOR)
    
    U = np.array([0, 0])
    for i in range(N_LVLS):
        curr_template = temp_pyr[i]
        curr_frame = frame_pyr[i]
        # if i < N_LVLS:
        P = (p // ((2**SCALE_FACTOR) ** (N_LVLS - i))) + U
        x0 = xt // ((2**SCALE_FACTOR) ** (N_LVLS - i))
        y0 = yt // ((2**SCALE_FACTOR) ** (N_LVLS - i))
        # width = w // ((2**SCALE_FACTOR) * (N_LVLS - i))
        # height = h // ((2**SCALE_FACTOR) * (N_LVLS - i))
        # print(template.shape, w, h)
        width = curr_template.shape[1]
        height = curr_template.shape[0]
        

        U += pyr_ssd(curr_frame, curr_template, P, x0, y0, width, height)
        
        # else:
        #     x0 = xt
        #     y0 = yt
        
        # U = ssd(curr_frame, curr_template, U)
        U = U * (2**SCALE_FACTOR)
        print(U)
    
    p += U
    # regular ssd
    pyr_ssd(frame, template, p, xt, yt, w, h)
    tracks.append([int(xt + p[0]), int(yt + p[1])])
    # u,v lower = u,v * TRUE_SCALE

In [None]:
j = 0
frm = Im[0]



ps = get_pyramid(frm)
tps = get_pyramid(template)
x0 = xt // ((2**SCALE_FACTOR) ** (N_LVLS - j))
y0 = yt // ((2**SCALE_FACTOR) ** (N_LVLS - j))
width = tps[j].shape[1]
height = tps[j].shape[0]

# xs = np.arange(0, width, width / dim[0], dtype=np.float32)
# ys = np.arange(0, height, height / dim[1], dtype=np.float32)

# XSQ, YSQ = np.meshgrid(xs, ys)
# sstss = cv2.remap(Im[0], XSQ, YSQ, cv2.INTER_LINEAR)
# print(ps[j][y0:y0+height, x0:x0+width] == tps[0])

# 90, 160
print(ps[0][y0:y0+height, x0:x0+width].shape)
# print(tps[0].shape)
print(x0)

In [None]:
plt.imshow(ps[0], cmap='gray')
# plt.imshow(ps[0][y0:y0+height, x0:x0+width], cmap='gray')

In [None]:
plt.imshow(tps[j], cmap='gray')