In [None]:
!pip3 install kornia
!pip3 install kornia_moons
!pip3 install kaleido

2. Использовать решение на базе нейронных сетей. Любые идеи.

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from kornia.feature import LoFTR
from tqdm import tqdm

import plotly.graph_objects as gr
import numpy as np
import torch
import time
import cv2
import os


In [4]:
# function for vizualize cemera trajectory and direction
def visualize_trajectory(rotation, positions, title='Camera motion'):
    fig = gr.Figure()

    # add trajectory trace
    fig.add_trace(gr.Scatter3d(x=positions[:, 0], y=positions[:, 1], z=positions[:, 2],
                               marker=dict(size=1.2, color='purple')))

    # add camera orientation traces
    for (p, r) in zip(positions, rotation):
        point2 = p + 0.5 * r[:, 2]
        fig.add_trace(gr.Scatter3d(x=[p[0], point2[0]], y=[p[1], point2[1]], z=[p[2], point2[2]],
                                   mode='lines', line=dict(width=2, color='red')))

    fig.update_layout(title=title, showlegend=False)
    fig.show()


In [5]:
# use LoFTR matcher to get general points from two frames
def process_imgs_LoFTR(matcher, input_dict: dict):
    CONFIDENCE_TH = 0.85

    with torch.inference_mode():
        corrs = matcher(input_dict)

    confidence = corrs['confidence'].cpu().numpy()

    del_indexes = list()
    for i in range(len(confidence)):
        if confidence[i] < CONFIDENCE_TH:
            del_indexes.append(i)

    kps1 = np.delete(corrs['keypoints0'].cpu().numpy(), del_indexes, axis=0)
    kps2 = np.delete(corrs['keypoints1'].cpu().numpy(), del_indexes, axis=0)

    return kps1, kps2


In [17]:
# function for create camera trajectory by video
def process_video_LoFTR(input_path: str):
    global SCALE

    cap = cv2.VideoCapture(input_path)

    # camera matrix
    K = np.array([[3000, 0 , cap.get(cv2.CAP_PROP_FRAME_WIDTH) / (2 * SCALE)],
                  [0, 3000, cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / (2 * SCALE)],
                  [0, 0, 1]])

    # create Local Feature Matching with Transformers
    matcher = LoFTR(pretrained='outdoor')
    matcher = matcher.eval().cuda()

    # create_array with points of camera trajectory
    trajectory = np.array([[0, 0, 0]])

    # create general list with rotations matrix corresponds to each video frame
    rotations_list = [np.zeros((3, 3))]

    # camera positions
    positions = [np.array([0, 0, 0])]

    cam_matrix = np.eye(4)
    T = np.eye(4)

    prev_frame_cuda = None

    while True:
        is_success, frame = cap.read()

        if not is_success:
            break

        # convert frame to one-dimential gray image, than resize
        frame_gray = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY),
                                (int(frame.shape[0] / SCALE), int(frame.shape[1] / SCALE)),
                                cv2.INTER_CUBIC)

        # reshape image to get shape (batch x channels x H x W)
        frame_gray = frame_gray.reshape(1, 1, frame_gray.shape[1],
                                        frame_gray.shape[0])
        # copy image to gpu and normalize
        frame_cuda = torch.from_numpy(frame_gray).cuda() / 255.

        if prev_frame_cuda is not None:
            input_dict = {'image0': prev_frame_cuda, 'image1': frame_cuda}
            kps1, kps2 = process_imgs_LoFTR(matcher, input_dict)

            # calculate essential matrix to match camera positons between 2 frames
            e_mat, mask = cv2.findEssentialMat(kps1, kps2, K, method=cv2.LMEDS,
                                               threshold=1.)
            _, R, t, _ = cv2.recoverPose(e_mat, kps1, kps2, K, mask=mask)

            T[:3, :3] = R
            T[:3, 3] = t.T

            cam_matrix = np.dot(cam_matrix, T)
            trajectory = np.vstack([trajectory, cam_matrix[:3, 3]])
            rotations_list.append(cam_matrix[:3, :3])

            positions.append(positions[-1] + np.dot(R, t).T[0])

        # save current gray frame as previous to calculate matrix in next step
        prev_frame_cuda = frame_cuda

    cap.release()

    return rotations_list, trajectory, np.array(positions)


In [None]:
video_path = '/content/drive/MyDrive/peleng-cources/HW_10/videos/video_5fps.mp4'
data_folder = '/content/drive/MyDrive/peleng-cources/HW_10/saved_data'

if not os.path.exists(data_folder):
    os.mkdir(data_folder)

SCALE = 2

# get camera rotations and trajectory
rotations_list, trajectory, positions = process_video_LoFTR(video_path)

# visualize camera motion, get fps value from path
start = video_path.rfind('_')
end = video_path.find('fps')
title = f'Camera motion, LoFTR, video fps = {video_path[start+1:end]}'
visualize_trajectory(rotations_list, trajectory, title)

# save camera rotations and trajectory to npz-file
npz_filename = os.path.join(data_folder,
                            f'data_LoFTR_LMEDS_{video_path[start+1:end]}fps.npz')
np.savez(npz_filename, R=rotations_list, trajectory=trajectory,
         positions=positions)
