# Part 1 - Localization from one camera

### Importing libraries

In [115]:
from setup import parse_config_file
import sys
import numpy as np
import time

from sklearn.decomposition import PCA
from scipy import io
from scipy.spatial.distance import cdist

import cv2 
from cv2 import DMatch

### Getting features from video

This function implements the SIFT algorithm to find keypoints of the image

In [116]:
def get_features(vid_capture, frames_to_process):
    features = np.zeros((1, frames_to_process), dtype=object)
    frames = []
    keypoints_array = []
    current_frame = 0 
    while(vid_capture.isOpened()):
        # vid_capture.read() methods returns a tuple, first element is a bool 
        # and the second is frame
        for _ in range(100):
            ret, frame = vid_capture.read()
            
        sift = cv2.SIFT_create()
        if ret == True:
            # getting keypoints and descriptor
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            keypoints, descriptor = sift.detectAndCompute(frame_gray, None)
            keypoints_array.append(keypoints)

            # getting the location of each keypoint
            x_location = []
            y_location = []
            for keypoint in keypoints:
                x_location.append(keypoint.pt[0])
                y_location.append(keypoint.pt[1])
            ## (x, y, d)
            concatenation = np.insert(np.transpose(descriptor), [0, 1], [x_location, y_location], axis=0)
            #print(f'concatenation: {np.shape(concatenation)}') 
            features[0, current_frame] = concatenation

            current_frame += 1
            frames.append(frame)
            static_frame = cv2.drawKeypoints(frame, keypoints, None, color=(0, 255, 0))
            cv2.imshow('Static Keypoints', static_frame)
            
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break

            if current_frame == frames_to_process:
                break
        else:
            break
    # print(features.shape)
    vid_capture.release()
    cv2.destroyAllWindows()
    return features, frames, keypoints_array

### Finding matches from keypoint descriptors from two images

In [117]:
def match_features(features1, features2, matches_size = 100, num_features= 64):
    C = np.vstack((features1, features2))
    
    # PCA
    pca = PCA(n_components=num_features)
    reconstructed = pca.fit_transform(C)
    
    features1 = reconstructed[:len(features1), :]
    features2 = reconstructed[len(features1):, :]
    
    # Euclidean distance
    D = cdist(features1, features2, 'euclidean')
    
    # Sorting distances and finding nearest neighbors
    I = np.argsort(D, axis=1)
    nearest_neighbor = D[np.arange(len(D)), I[:, 0]]
    second_nearest_neighbor = D[np.arange(len(D)), I[:, 1]]
    confidences = nearest_neighbor / second_nearest_neighbor
    
    # Filtering non-zero confidences
    i = np.where(confidences)[0]
    matches = np.column_stack((i, I[i]))
    confidences = 1.0 / confidences[i]
    
    # Sorting by confidence and selecting top 100 matches
    sorted_indices = np.argsort(confidences)[::-1]
    matches = matches[sorted_indices][:matches_size, :]
    confidences = confidences[sorted_indices][:matches_size]

    matches = [DMatch(_queryIdx=int(match[0]), 
                      _trainIdx=int(match[1]), 
                      _distance=float(D[int(match[0]), int(match[1])])) 
                          for match in matches]
    
    return matches, confidences

### Creating homography matrix  from sets of points

From two sets of points this function compute the homography between them

In [118]:
def create_homography_matrix(src_points, dst_points):
    A = []
    b = [] 
    for i in range(len(src_points)):
        x, y = src_points[i]
        u, v = dst_points[i]
        A.append([x, y, 1, 0, 0, 0, -u*x, -u*y])
        A.append([0, 0, 0, x, y, 1, -v*x, -v*y])

        b.append(u)
        b.append(v)

    A = np.array(A)

    h = np.dot((np.dot(np.linalg.inv(np.dot(A.T,A)),A.T)), b)
    h = np.append(h, 1) 
    h = h.reshape(3,3)
    
    return h

### Homography from features

In [119]:
def homographies_from_features(vid_capture, features, frames_to_process):
    homographies = []
    for i in range(frames_to_process):
        for j in range(i+1, frames_to_process):
            matches, confidences = match_features(np.transpose(features[0, i][2:]), np.transpose(features[0, j][2:]), matches_size=100)

            src_points = []
            dst_points = []
            matched_img = cv2.drawMatches(frames[i], keypoints_array[i], frames[j], keypoints_array[j], matches, None, flags=2)
            h, w, _ = matched_img.shape
            matched_img = cv2.resize(matched_img, (int(3*w/4), int(3*h/4)), interpolation = cv2.INTER_LINEAR)
            cv2.imshow('Static Keypoints', matched_img)
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break

            for match in matches:
                src_points.append(features[0, i][0:2, match.queryIdx])
                dst_points.append(features[0, j][0:2, match.trainIdx])
                
            homography = [i+1, j+1]
            homography.extend(create_homography_matrix(src_points, dst_points).flatten())
            homographies.append(homography)
    cv2.destroyAllWindows()
    return homographies

### Homography from corresponding points (map)

In [120]:
def homographies_from_corresponding_points(pts_in_map_from_config, pts_in_frame_from_config):
    homographies = []
    for i in range(len(pts_in_map_from_config)):
        pts_in_map = np.array(pts_in_map_from_config[i][1:], dtype=float)
        pts_in_map = pts_in_map.reshape(int(len(pts_in_map)/2), 2)
        pts_in_frame = np.array(pts_in_frame_from_config[i][1:], dtype=float)
        pts_in_frame = pts_in_frame.reshape(int(len(pts_in_frame)/2), 2)
        homography = [0, int(pts_in_frame_from_config[i][0])]
        homography.extend(create_homography_matrix(pts_in_map, pts_in_frame).flatten())
        homographies.append(homography)
    return homographies

### `processing_video.py` main

In [121]:
config_path = "./conf_file.cpg"
config = parse_config_file(config_path)

vid_capture = cv2.VideoCapture(config['videos'])
frames_to_process = 5

features, frames, keypoints_array = get_features(vid_capture, frames_to_process)
data={'features': features}
io.savemat(config['keypoints_out'], data)

### `compute_transform.py` main

In [None]:
config_path = "./conf_file.cpg"
config = parse_config_file(config_path)

vid_capture = cv2.VideoCapture(config['videos'])
frames_to_process = 5

if config['transforms'][0][0] == 'homography':
    if config['transforms'][0][1] == 'all':
        homographies = homographies_from_features(vid_capture, features, frames_to_process)
    elif config['transforms'][0][1] == 'map':
        if len(config['pts_in_map']) != len(config['pts_in_frame']):
            print("Different amount of pts_in_map and pts_in_frame defined inside the config file")
            sys.exit(1)
        homographies = homographies_from_corresponding_points(config['pts_in_map'], config['pts_in_frame'])
    data={'transforms': np.array(homographies).transpose()}
    io.savemat(config['transforms_out'], data)
else:
    print("The only acceptable type is \"homography\"")