### Simple AR Program
1) Compute point correspondences (2D and AR tag) <br>
2) Estimate the pose of the camera <br>
3) Project 3D content to the image plane

In [13]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
import glob

import warnings
warnings.filterwarnings("ignore")

**Inputting the video frames**

In [14]:
FRAMES_PATH = "data/apriltagims/*"

video_frame_paths = glob.glob(FRAMES_PATH)
video_frame_paths.sort()

In [15]:
initial_points = 1.0e+02 * np.array([[1.981631469726562,3.165294189453125],
                                    [3.786268920898438,3.424402770996094],
                                    [4.036800842285157,1.686005859375000],
                                    [2.333528289794922,1.491907043457031]])

K = 1.0e+02 * np.array([[7.661088867187500, 0, 3.139585628047498],
                        [0, 7.699354248046875,  2.503607131410900],
                        [0 ,                  0,   0.010000000000000 ]])
num_frames = 166
tag_width = 0.13;
tag_height = 0.13;
cube_depth = 0.13;

corner_pts = np.array([[  tag_width/2,  tag_height/2],
               [-tag_width/2,  tag_height/2],
               [-tag_width/2, -tag_height/2],
               [tag_width/2, -tag_height/2]])

**Implement KLT Tracking**

In [16]:
video_imgs = []

for frame_path in video_frame_paths:
    frame = cv2.imread(frame_path)
    video_imgs.append(frame)
    
video_imgs=np.array(video_imgs)

In [17]:
class KLTtrack():
    def __init__(self,imglist,trackpts):
        self.vid = imglist
        self.pts = trackpts
        # Parameters for lucas kanade optical flow
        self.lk_params = dict(winSize  = (31,31),
                  maxLevel = 3,
                  criteria = (cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT, 5, 0.03))

    def pointTracker(self):
        # Create some random colors
        color = np.random.randint(0,255,(self.pts.shape[0],3))
        # Take first frame and find corners in it
        old_frame = self.vid[0,:,:,:]
        old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
        p0 = np.floor(self.pts).reshape(-1,1,2).astype(np.float32)
        
        # Create a mask image for drawing purposes
        mask = np.zeros_like(old_frame)
        corners = []
        corners.append(self.pts)

        for i in range(1,len(self.vid)):
            frame = video_imgs[i,:,:,:]
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            # calculate optical flow
            p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **self.lk_params)
            # Select good points
            good_new = p1[st==1]
            good_old = p0[st==1]
            # draw the tracks
            for i,(new,old) in enumerate(zip(good_new, good_old)):
                a,b = new.ravel()
                c,d = old.ravel()
                mask = cv2.line(mask, (a,b),(c,d), color[i].tolist(), 2)
                frame = cv2.circle(frame,(a,b),5,color[i].tolist(),-1)
            img = cv2.add(frame,mask)

            k = cv2.waitKey(5) & 0xff
            if k == 27:
                break
            # Now update the previous frame and previous points
            old_gray = frame_gray.copy()
            corners.append(good_new)
            p0 = good_new.reshape(-1,1,2)
            
        return np.array(corners)

In [18]:
def track_corners(video_frames, initial_pts):
        
    tracker = KLTtrack(video_frames, initial_points)
    
    return tracker.pointTracker()

In [19]:
corners = track_corners(video_imgs, initial_points)

In [20]:
corners.shape

(166, 4, 2)

**Homography Estimation**

In [31]:
from utils import vectorize, solve_homography

**Pose Estimation (from Homography) of co-planar points**

In [32]:
def find_pose_from_homography(H, K):
    '''
    function for pose prediction of the camera from the homography matrix, given the intrinsics 
    
    :param H(np.array): size(3x3) homography matrix
    :param K(np.array): size(3x3) intrinsics of camera
    :Return t: size (3 x 1) vector of the translation of the transformation
    :Return R: size (3 x 3) matrix of the rotation of the transformation (orthogonal matrix)
    '''
    
    
    #to disambiguate two rotation marices corresponding to the translation matrices (t and -t), 
    #multiply H by the sign of the z-comp on the t-matrix to enforce the contraint that z-compoment of point
    #in-front must be positive and thus obtain a unique rotational matrix
    H = H*np.sign(H[2,2])
    
    #H=K*[R|t]
    H_ = np.invert(K)@H
    
    h1,h2,h3 = H[:,0], H[:1] , H[:,2]
    
    R_ = np.array((h1,h2,np.cross(h1,h2))).T
    
    U, S, V = np.linalg(R_)
    
    R = U@np.array([[1,0,0],
                   [0,1,0],
                    [0,0,np.linalg.det(U@V.T)]])
    
    t = h3/np.linalg.norm(h1).reshape(-1,1)
    
    return R,t

In [None]:
def project_points(render_points, K, R, t);
    '''
    function to project world coordinated or renders (virtual) of world coordinate onto image plane
    
    :param K:size(3x3) intrinsic camera matrix
    :param R:size(3x3)
    
    '''


In [42]:
def draw_ar_cube(points, im):
    '''
    Uses projection equation to create images from points
    
    :param points(np.array): size(2x8) : projected points to connect
    :param im(np.array): size (nxm) to render with the image
    
    Returns
    im_out - (np.array): size(nxm) with the cube drawn on the image
    '''
    
    pass

**Projecting Points**

In [24]:
a = np.array([1,2,3])

In [25]:
b = np.array((a,a,a))

In [36]:
b[:,2].T

array([3, 3, 3])