#Computer Vision Assignment 4
Metehan Seyran <br/>
150170903

In [63]:
import moviepy.video.io.VideoFileClip as mpy
import cv2
import numpy as np
from scipy import signal
import moviepy.editor as mpy_editor

In [57]:
def prepareVideo(images_list, part_name="1"):
  clip = mpy_editor.ImageSequenceClip(images_list, fps = 25)
  clip.write_videofile("hw4_"+part_name+".mp4", codec='libx264')

In [210]:
def locate_hand(img):
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  mask = cv2.inRange(img, (0, 195, 1), (183, 255, 191)) ## Color range to mask hand
  mask = cv2.erode(mask, np.ones((3,3), dtype="uint8")) ## Getting rid of noisy points
  mask = cv2.dilate(mask, np.ones((3,3), dtype="uint8"))

  x, y = np.where(mask==255)

  right_hand = (x <= 400) ## Making sure it will not select a pixel from foot
  x = x[right_hand]
  y = y[right_hand]

  return x, y

In [219]:
def LucasKanadeOF(img1, img2, points, window_size=7, gauss_filter=(7,7)):

  w = window_size//2
  point_x1, point_y1 = points

  ## Smoothing the images
  img1 = cv2.GaussianBlur(img1, gauss_filter, 1)
  img2 = cv2.GaussianBlur(img2, gauss_filter, 1)

  ## Normalizing the images
  img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) /255.
  img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) /255.

  grad_filter_x = np.array([[1,-1], [1,-1]])
  grad_filter_y = np.array([[1, 1], [-1, -1]])
  grad_filter_t = np.ones((2,2))

  ## Calculate gradients (Ix, Iy, It)
  grad_x = signal.convolve2d(img1, grad_filter_x, mode="same")
  grad_y = signal.convolve2d(img1, grad_filter_y, mode="same")
  grad_t = signal.convolve2d(img2, grad_filter_t, mode="same") + signal.convolve2d(img1, -grad_filter_t, mode="same")
  
  ## Crop the region of the point
  grad_x = grad_x[point_x1-w:point_x1+w+1, point_y1-w:point_y1+w+1]
  grad_y = grad_y[point_x1-w:point_x1+w+1, point_y1-w:point_y1+w+1]
  grad_t = grad_t[point_x1-w:point_x1+w+1, point_y1-w:point_y1+w+1]

  ATA = np.array([[np.sum(grad_x**2), np.sum(grad_x * grad_y)], [np.sum(grad_y * grad_x), np.sum(grad_y**2)]])
  Ab = -np.array([np.sum(grad_x * grad_t), np.sum(grad_y*grad_t)])
  A_inv = np.linalg.pinv(ATA)
  u = np.dot(A_inv, Ab)

  return (point_x1, point_y1), u

## Part 1

In [213]:
biped_vid = mpy.VideoFileClip("./biped_1.avi")
frame_count = biped_vid.reader.nframes
video_fps = biped_vid.fps
frames = []
for i in range(frame_count):
  walker_frame = biped_vid.get_frame(i*1.0/video_fps)
  frames.append(walker_frame)

In [None]:
new_frames = []
x, y = locate_hand(frames[0])
x = x[-1]
y = y[-1]
for i in range(len(frames)-1):
  start_point, u = LucasKanadeOF(frames[i], frames[i+1], (x, y), window_size=7)
  x = int(x + int(u[1]))
  y = int(y + int(u[0]))
  new_img = frames[i].copy()
  arrowed_img = cv2.arrowedLine(new_img, (start_point[1], start_point[0]), (int(start_point[1] + u[0]*5), int(start_point[0] + u[1]*5)), (255,255,255), 2)

  new_frames.append(arrowed_img)

prepareVideo(new_frames, "1")

## Part 2

In [231]:
biped_vid2 = mpy.VideoFileClip("./biped_2.avi")
frame_count = biped_vid2.reader.nframes
video_fps = biped_vid2.fps
frames2 = []
for i in range(frame_count):
  walker_frame = biped_vid2.get_frame(i*1.0/video_fps)
  frames2.append(walker_frame)

In [None]:
points = [(183, 210), (315, 308), (183, 308), (315, 210)]
ground_truth = [[] for i in range(5)]
new_frames2 = []

x, y = locate_hand(frames2[0])
x, y = x[-1], y[-1]

for i in range(len(frames2)-1):

  img_w_arrow = frames2[i].copy()

  for j in range(4):
    start_point, u = LucasKanadeOF(frames2[i], frames2[i+1], points[j], window_size=7)
    ## clip the OF vector between [-1e3, 1e3]
    u[0] = max(min(u[0], 1e3), -1e3)
    u[1] = max(min(u[1], 1e3), -1e3)
    ground_truth[j].append(u)
    img_w_arrow = cv2.arrowedLine(img_w_arrow, (start_point[1], start_point[0]), (int(start_point[1] + u[0]), int(start_point[0] + u[1])), (3,169,244), 3)
  
  start_point, u = LucasKanadeOF(frames2[i], frames2[i+1], (x, y), window_size=5, gauss_filter=(7,7))
  x = int(x + int(u[1]))
  y = int(y + int(u[0]))
  ground_truth[4].append(u)
  img_w_arrow = cv2.arrowedLine(img_w_arrow, (start_point[1], start_point[0]), (int(start_point[1] + u[0]*10), int(start_point[0] + u[1]*10)), (3,169,244), 3)

  new_frames2.append(img_w_arrow)

prepareVideo(new_frames2, "2")

##Part 3

In [None]:
biped_vid3 = mpy.VideoFileClip("./biped_3.avi")
frame_count = biped_vid3.reader.nframes
video_fps = biped_vid3.fps
frames3 = []
for i in range(frame_count):
  walker_frame = biped_vid3.get_frame(i*1.0/video_fps)
  frames3.append(walker_frame)

In [None]:
points = [(183, 210), (315, 308), (183, 308), (315, 210)]
of_vectors2 = [[] for i in range(5)]
new_frames3 = []

for i in range(len(frames3)-1):
  
  x5, y5 = locate_hand(frames3[i])
  point5 = (x5[-1], y5[-1])

  img_w_arrow = frames3[i].copy()

  for j in range(4):
    start_point, u = LucasKanadeOF(frames3[i], frames3[i+1], points[j], 7)
    u[0] = max(min(u[0], 1e3), -1e3)
    u[1] = max(min(u[1], 1e3), -1e3)
    of_vectors2[j].append(u)
    img_w_arrow = cv2.arrowedLine(img_w_arrow, start_point, (int(start_point[0] + u[0]), int(start_point[1] + u[1])), (255, 0, 0), 3)

  start_point, u = LucasKanadeOF(frames3[i], frames3[i+1], point5, 7)
  of_vectors2[4].append(u)
  img_w_arrow = cv2.arrowedLine(img_w_arrow, start_point, (int(start_point[0] + u[0]*25), int(start_point[1] + u[1]*25)), (255,0,0), 3)

  new_frames3.append(img_w_arrow)
print(np.array(of_vectors2).shape)
prepareVideo(new_frames3, "3")

In [242]:
def calculateMSE(moving_wall, ground_truth):
  point_mse_errors = [[] for i in range(5)]

  for i in range(5):
    point_mse_errors[i].append(np.mean((np.array(moving_wall[i]) - np.array(ground_truth[i]))**2))

  return point_mse_errors

errors = calculateMSE(of_vectors2, ground_truth)

print("Errors for corners of rectangular region: ", errors[:4])
print("Error of the OF vector on hand: ", errors[-1][0])

Errors for corners of rectangular region:  [[4301.529953942566], [5252.8377964897845], [1853.9063920105161], [5152.690419572105]]
Error of the OF vector on hand:  490.15535982335575
