# Image Matching and Homography Estimation with OpenCV and LightGlue

In [1]:
import os
import cv2 
import time
import json
import math
import torch
import numpy as np
from vidstab import VidStab
import matplotlib.pyplot as plt

from lightglue import viz2d
from lightglue import LightGlue, SuperPoint, DISK
from lightglue.utils import load_image, rbd
import CSRansac

In [5]:
class cfg:
    img0 = "img0.png"
    img1 = "img1.png"
    
    size = (640, 480)
    interpolation = cv2.INTER_AREA
    
    lightglue = {
        "extractor": "SuperPoint", # SuperPoint, DISK
        "device": "cuda", # cpu, cuda
        "max_kpts": 2048,
        "homography": {
            "method": cv2.RANSAC,
            "ransacReprojThreshold": 3.0
        }
    }

## LightGlue

In [6]:
class cfg:
    img0 = "img0.png"
    img1 = "img1.png"
    
    size = (640, 480)
    interpolation = cv2.INTER_AREA
    
    lightglue = {
        "extractor": "SuperPoint", # SuperPoint, DISK
        "device": "cuda", # cpu, cuda
        "max_kpts": 2048,
        "homography": {
            "method": cv2.RANSAC,
            "ransacReprojThreshold": 3.0
        }
    }

def load_img(file, size, interpolation):
    img = cv2.imread(file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, size, interpolation=interpolation)
    return img


def get_homography(src_pts, dst_pts, method, ransacReprojThreshold):
    homography, mask = cv2.findHomography(
        src_pts, 
        dst_pts, 
        method=method, 
        ransacReprojThreshold=ransacReprojThreshold
    )
    return homography, mask

def match_lightglue(img0, img1, cfg):
    img0 = load_image(img0)
    img1 = load_image(img1)
    
    if cfg["extractor"] == "SuperPoint":
        extractor = SuperPoint(max_num_keypoints=cfg["max_kpts"]).eval().to(cfg["device"])
        # matcher = LightGlue(features='superpoint').eval().to(cfg["device"])
        matcher = LightGlue(features='superpoint', depth_confidence=-1, width_confidence=-1).eval().to(cfg["device"])

    # if cfg["extractor"] == "DISK":
    #     extractor = DISK(max_num_keypoints=cfg["max_kpts"]).eval().to(cfg["device"])  # load the extractor
    #     matcher = LightGlue(features='disk').eval().to(cfg["device"])  # load the matcher

    # extract local features
    feats0 = extractor.extract(img0.to(cfg["device"]))  # auto-resize the image, disable with resize=None
    feats1 = extractor.extract(img1.to(cfg["device"]))
    
    # match the features
    matches01 = matcher({'image0': feats0, 'image1': feats1})
    feats0, feats1, matches01 = [rbd(x) for x in [feats0, feats1, matches01]]  # remove batch dimension
    
    # get results
    kpts0 = feats0["keypoints"]
    kpts1 = feats1["keypoints"]
    matches = matches01['matches']  # indices with shape (K,2)
    points0 = kpts0[matches[..., 0]]  # coordinates in img0, shape (K,2)
    points1 = kpts1[matches[..., 1]]  # coordinates in img1, shape (K,2)
        
    return {
        "points0": points0,
        "points1": points1,
        "matches01": matches01, 
        "matches": matches,
        "kpts0": kpts0,
        "kpts1": kpts1,
        "img0": img0,
        "img1": img1
    }

def lightglue2opencv(points0, points1, matches, kpts0, kpts1, img0, img1, **kwargs):
    return {
        "src_pts": points0.cpu().numpy().reshape(-1, 1, 2),
        "dst_pts": points1.cpu().numpy().reshape(-1, 1, 2),
        "kp0": cv2.KeyPoint_convert(kpts0.cpu().numpy()),
        "kp1": cv2.KeyPoint_convert(kpts1.cpu().numpy()),
        "matches": tuple(
            cv2.DMatch(matches[i][0].item(), matches[i][1].item(), 0.) 
            for i in range(matches.shape[0])
        ),
        "img0": cv2.cvtColor((255 * img0).cpu().numpy().astype(np.uint8).transpose(1, 2, 0), cv2.COLOR_RGB2GRAY),
        "img1": cv2.cvtColor((255 * img1).cpu().numpy().astype(np.uint8).transpose(1, 2, 0), cv2.COLOR_RGB2GRAY)
    }

def change_coord(homography_lightglue, x, y):
    H = homography_lightglue
    source_coord = np.array([x, y, 1], dtype='float32')
    transformed_coord = np.dot(H, source_coord.T)
    transformed_coord = transformed_coord / transformed_coord[2] # 정규 좌표로 변환
    transformed_coord = transformed_coord.astype('float32')
    transformed_coord_2d = transformed_coord[:2].tolist()
    
    return transformed_coord_2d

## Dataset 전처리

In [4]:
aircraft_datasets = "D:/aircraft_datasets"

lables = os.path.join(aircraft_datasets + "/label")

In [5]:
origin_coordinate = []

# 원점 좌표값 불러오기
for label_file in os.listdir(lables):
    label_path = os.path.join(lables, label_file)
    with open(label_path, "r") as f:
        json_file = json.load(f)
        coord = json_file["targetAnnotation"]
        coord[0] = coord[0] * 640
        coord[1] = coord[1] * 480
        origin_coordinate.append(coord)

print(origin_coordinate)
print(len(origin_coordinate))


[[319.171968, 270.55248], [320.0, 265.24536], [344.464896, 256.02912], [313.576128, 257.29579199999995], [325.48172800000003, 168.083808], [315.939648, 202.48910399999997], [325.479232, 168.080352], [312.391232, 306.426768], [320.0, 265.23864], [331.487168, 26.902847999999988], [316.5232, 203.087808], [329.47750399999995, 59.02296000000001], [320.0, 337.57583999999997], [324.136448, 161.35992000000002], [309.34656, 253.744368], [321.263104, 248.872656], [332.852352, 236.02262399999998], [326.04812799999996, 203.801712], [318.48947200000003, 251.060496], [320.964672, 255.825552], [321.25523200000003, 215.70609599999997], [319.453312, 225.751632], [319.45344, 180.868992], [321.200512, 215.63779200000002], [321.227712, 215.671728], [316.37516800000003, 230.084016], [316.20556799999997, 231.432768], [320.89824, 312.286224], [320.950912, 198.62135999999998], [315.928128, 231.49977600000003], [320.895168, 257.614128], [320.82163199999997, 257.47713600000003], [320.820608, 257.477952], [320.6

In [6]:
video_dir = os.path.join(aircraft_datasets, "video")
output_dir = os.path.join(aircraft_datasets, "frames_from_video")
stabilized_frame_path = os.path.join(aircraft_datasets, "stabilized_frame")

In [12]:
# #동영상에서 각 프레임을 이미지 파일로 저장하는 코드
# for video_filename in os.listdir(video_dir):
#     # video 파일 경로
#     video_path = os.path.join(video_dir, video_filename)
    
#     # video 파일 이름에서 확장자 제거하여 동영상 이름 추출
#     video_name = os.path.splitext(video_filename)[0]
    
#     # 해당 동영상의 프레임 저장 폴더 생성
#     video_output_dir = os.path.join(output_dir, video_name)
#     os.makedirs(video_output_dir, exist_ok=True)
    
#     # 동영상 파일 로드
#     video = cv2.VideoCapture(video_path)

#     # 프레임 카운터 초기화
#     frame_count = 0

#     while True:
#         # 동영상에서 프레임을 읽음
#         ret, frame = video.read()
#         if not ret:
#             break  # 동영상 끝에 도달하면 중단
        
#         # 프레임을 이미지 파일로 저장
#         frame_filename = os.path.join(video_output_dir, f'frame_{frame_count:04d}.jpg')
#         frame = cv2.resize(frame, (640, 480))
#         cv2.imwrite(frame_filename, frame)
        
#         frame_count += 1

#     # 자원 해제
#     video.release()

In [None]:
# video_dir = os.path.join(aircraft_datasets, "video")
# output_dir = os.path.join(aircraft_datasets, "frames_from_video")

# #동영상에서 각 프레임을 이미지 파일로 저장하는 코드
# for video_filename in os.listdir(video_dir):
#     # video 파일 경로
#     video_path = os.path.join(video_dir, video_filename)
    
#     # video 파일 이름에서 확장자 제거하여 동영상 이름 추출
#     video_name = os.path.splitext(video_filename)[0]
    
#     # 해당 동영상의 프레임 저장 폴더 생성
#     video_output_dir = os.path.join(output_dir, video_name)
#     os.makedirs(video_output_dir, exist_ok=True)
    
#     # 동영상 파일 로드
#     video = cv2.VideoCapture(video_path)

#     # 프레임 카운터 초기화
#     frame_count = 0

#     while True:
#         # 동영상에서 프레임을 읽음
#         ret, frame = video.read()
#         if not ret:
#             break  # 동영상 끝에 도달하면 중단
        
#         # 프레임을 이미지 파일로 저장
#         frame_filename = os.path.join(video_output_dir, f'frame_{frame_count:04d}.jpg')
#         frame = cv2.resize(frame, (640, 480))
#         cv2.imwrite(frame_filename, frame)
        
#         frame_count += 1

#     # 자원 해제
#     video.release()

In [7]:
# 원본 이미지 경로를 저장할 리스트
images = [[] for i in range(len(origin_coordinate))]
i = 0

# output_dir 내의 모든 폴더에 대한 반복
for folder_name in os.listdir(output_dir):
    folder_path = os.path.join(output_dir, folder_name)
    
    for name in os.listdir(folder_path):
        filename = os.path.join(folder_path, name)
        images[i].append(filename)
    
    i = i + 1

# images 리스트의 길이 반환
# num_images = len(images)
# print(f"총 이미지 수: {num_images}")

print(len(images[0]))

368


In [18]:
# # 이미지 안정화 알고리즘을 전체 비디오에 적용
# stabilizer = VidStab()

# # 안정화할 동영상 파일들이 있는 폴더 경로
# origin_video_path = os.path.join(aircraft_datasets, "video")

# # 안정화된 동영상 파일들을 저장할 폴더 경로
# stabilized_video_path = os.path.join(aircraft_datasets, "stabilized_video")

# # output_stabilized_frame 폴더가 없다면 생성합니다
# if not os.path.exists(stabilized_video_path):
#     os.makedirs(stabilized_video_path)

# # stablized_video_path 내의 모든 폴더에 대해 반복합니다
# for video in os.listdir(video_dir):
#     video_path = os.path.join(video_dir, video)
    
#     # 동영상 파일을 안정화하여 stablized_frame 폴더에 저장합니다
#     stabilized_video = stabilizer.stabilize(input_path=video_path, output_path=os.path.join(stabilized_video_path, video))
#     print(f"{video} stabilized and saved to {os.path.join(stabilized_video_path, video)}")




FuelPumpRemoval_00001.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00001.mp4
FuelPumpRemoval_00002.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00002.mp4
FuelPumpRemoval_00003.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00003.mp4
FuelPumpRemoval_00004.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00004.mp4
FuelPumpRemoval_00005.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00005.mp4
FuelPumpRemoval_00006.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00006.mp4
FuelPumpRemoval_00007.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00007.mp4
FuelPumpRemoval_00008.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\FuelPumpRemoval_00008.mp4
FuelPumpRemoval_00009.mp4 stabilized and saved to D:/aircraft_datasets\stabilized_video\

In [19]:
# #안정화된 동영상에서 프레임을 이미지 파일로 저장
# stabilized_frame_path = os.path.join(aircraft_datasets, "stabilized_frame")

# #동영상에서 각 프레임을 이미지 파일로 저장하는 코드
# for video_filename in os.listdir(stabilized_video_path):
#     # video 파일 경로
#     video_path = os.path.join(video_dir, video_filename)
    
#     # video 파일 이름에서 확장자 제거하여 동영상 이름 추출
#     video_name = os.path.splitext(video_filename)[0]
    
#     # 해당 동영상의 프레임 저장 폴더 생성
#     output_dir = os.path.join(stabilized_frame_path, video_name)
#     os.makedirs(output_dir, exist_ok=True)
    
#     # 동영상 파일 로드
#     video = cv2.VideoCapture(video_path)

#     # 프레임 카운터 초기화
#     frame_count = 0

#     while True:
#         # 동영상에서 프레임을 읽음
#         ret, frame = video.read()
#         if not ret:
#             break  # 동영상 끝에 도달하면 중단
        
#         # 프레임을 이미지 파일로 저장
#         frame_filename = os.path.join(output_dir, f'frame_{frame_count:04d}.jpg')
#         frame = cv2.resize(frame, (640, 480))
#         cv2.imwrite(frame_filename, frame)
        
#         frame_count += 1

#     # 자원 해제
#     video.release()

In [8]:
# 안정화 이미지 경로를 저장할 리스트
stablized_images = [[] for i in range(len(origin_coordinate))]
i = 0

# output_dir 내의 모든 폴더에 대한 반복
for folder_name in os.listdir(stabilized_frame_path):
    folder_path = os.path.join(stabilized_frame_path, folder_name)
    
    for name in os.listdir(folder_path):
        filename = os.path.join(folder_path, name)
        if os.path.isfile(filename):
            stablized_images[i].append(filename)
    
    i = i + 1

print(len(stablized_images[0]))

368


## Compare Homography Matrices

In [31]:
#원본 이미지를 기준으로 호모그래피 행렬을 구하고, 호모그래피 행렬을 이용하여 특징점의 좌표를 변환하는 코드
len_coord = len(origin_coordinate)

coord_list = [[] for _i in range(len(origin_coordinate))]

for i in range(len_coord):
    _images = images[i]
    _len_images = len(_images)
    x = origin_coordinate[i][0]
    y = origin_coordinate[i][1]
    
    # 두 번째 차원의 리스트 초기화
    coord_list[i] = [[] for _ in range(_len_images)]

    img0 = _images[0]
    for j in range(_len_images):
        if j != _len_images - 1:
            img1 = _images[j+1]

            # LightGlue
            results_lightglue = match_lightglue(img0, img1, cfg.lightglue)
            target_keypoint = results_lightglue["points0"].cpu().numpy()
            frame_keypoint = results_lightglue["points1"].cpu().numpy()

            homography, _ = CSRansac.csransac(target_keypoint, frame_keypoint)
            projected_pts = CSRansac.perspective_transform(np.array([x, y]), homography)

            coord_list[i][j].append(projected_pts)

In [34]:
# 각 행의 이름을 지정하기 위한 딕셔너리 생성
data_dict = {}
for i, row in enumerate(coord_list):
    key = f"video_{i + 1}"  # 각 행의 이름 생성 (row_1, row_2, ...)
    data_dict[key] = row

filename = "test_coord_list.json"
file_path = os.path.join(aircraft_datasets, filename)
with open(file_path, "w") as f:
    json.dump(data_dict, f, indent=4)

In [24]:
#안정화된 이미지를 기준으로 호모그래피 행렬을 구하고, 호모그래피 행렬을 이용하여 특징점의 좌표를 변환하는 코드
len_coord = len(origin_coordinate)

stable_coord_list = [[] for _i in range(len(origin_coordinate))]

for i in range(len_coord):
    _stablized_images = stablized_images[i]
    _len_stablized_images= len(_stablized_images)
    x = origin_coordinate[i][0]
    y = origin_coordinate[i][1]
    
    stable_coord_list[i] = [[] for _j in range(_len_stablized_images)]
    
    img0 = _stablized_images[0]
    for j in range(_len_stablized_images):
        if j != _len_stablized_images - 1:
            img1 = _stablized_images[j+1]
        
            # LightGlue
            results_lightglue = match_lightglue(img0, img1, cfg.lightglue)
            target_keypoint = results_lightglue["points0"].cpu().numpy()
            frame_keypoint = results_lightglue["points1"].cpu().numpy()
            
            homography, _ = CSRansac.csransac(target_keypoint, frame_keypoint)
            projected_pts = CSRansac.perspective_transform(np.array([x, y]), homography)
            
            stable_coord_list[i][j].append(projected_pts)

In [27]:
# 각 행의 이름을 지정하기 위한 딕셔너리 생성
stabilized_data_dict = {}
for i, row in enumerate(stable_coord_list):
    key = f"video_{i + 1}"  # 각 행의 이름 생성 (row_1, row_2, ...)
    stabilized_data_dict[key] = row

filename = "test_stabilized_coord_list.json"
file_path = os.path.join(aircraft_datasets, filename)
with open(file_path, "w") as f:
    json.dump(stabilized_data_dict, f, indent=4)

In [57]:
# for i in range(len(stable_coord_list[1])):
#     print(stable_coord_list[1][i])
    
# print(origin_coordinate[1])

[(320.0, 265.24536)]
[(321.3410281362208, 263.99715252606813)]
[(320.4579105263708, 265.4017293860557)]
[(322.7506653506821, 264.25106224484193)]
[(321.609030711605, 265.4011166248745)]
[(323.12295443314474, 263.3662179142266)]
[(321.92834221101714, 266.09945325926964)]
[(320.6351819674695, 265.0440713348464)]
[(324.1646619726952, 265.24504070673254)]
[(320.53197908289206, 262.3684231784588)]
[(318.1839405928217, 264.97535375167956)]
[(321.90913215191125, 262.8424213453671)]
[(318.62339802120675, 265.1963577480488)]
[(324.5378721923802, 265.54382478622756)]
[(320.5521185349833, 269.3236167708138)]
[(323.44450240311977, 264.22264029653314)]
[(324.43737944471843, 263.80273119510247)]
[(324.83807732026924, 266.0588801538426)]
[(322.99441337436355, 264.52479156216754)]
[(323.8203022643705, 264.70925823895067)]
[(323.086254304936, 264.3741331927215)]
[(324.72769568889413, 265.00859704745596)]
[(323.7577533327805, 265.48692988427825)]
[(326.6186091680363, 264.2118011462358)]
[(329.3827139416

In [10]:
# #프레임을 동영상으로 만드는 코드

# # 폴더 경로 설정
# folder = 'video'

# # 동영상 저장 경로 설정
# output_video_path = 'result_origin.mp4'

# # 동영상 속성 설정
# fourcc = cv2.VideoWriter_fourcc(*'XVID')  # 코덱 설정 (XVID를 사용하면 AVI 형식으로 저장)
# fps = 30.0  # 초당 프레임 수
# frame_width = 640  # 프레임 너비
# frame_height = 480  # 프레임 높이

# out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# x = 637 // 2
# y = 367 // 2

# i = 0
# for name in os.listdir(folder):
#     img = cv2.imread(os.path.join(folder, name))
#     if i == 0:
#         cv2.circle(img, (x, y), 3, (0, 0, 255), -1)
#     elif i == len_img - 1:
#         break
#     else:
#         x = round(coord_list[i][0])
#         y = round(coord_list[i][1])
#         cv2.circle(img, (x, y), 3, (0, 0, 255), -1)

#     i = i + 1
    
#     # 프레임을 동영상에 추가
#     out.write(img)

# # 동영상 저장 종료
# out.release()


## Error Estimate

In [9]:
float_origin_coordinate = []
lables = os.path.join(aircraft_datasets + "/label")
# 원점 좌표값 불러오기
for label in os.listdir(lables):
    label_path = os.path.join(lables, label)
    with open(label_path, "r") as f:
        json_file = json.load(f)
        coord = json_file["targetAnnotation"]
        float_origin_coordinate.append(coord)
    
        
print(float_origin_coordinate)
print(len(float_origin_coordinate))
print(type(float_origin_coordinate[0][0]))

[[0.4987062, 0.563651], [0.5, 0.5525945], [0.5382264, 0.5333939999999999], [0.4899627, 0.5360328999999999], [0.5085652, 0.3501746], [0.4936557, 0.42185229999999996], [0.5085613, 0.3501674], [0.4881113, 0.6383890999999999], [0.5, 0.5525804999999999], [0.5179487, 0.056047599999999975], [0.4945675, 0.4230996], [0.5148086, 0.12296450000000003], [0.5, 0.703283], [0.5064632, 0.33616650000000003], [0.483354, 0.5286341], [0.5019736, 0.5184847], [0.5200818, 0.4917138], [0.5094502, 0.4245869], [0.4976398, 0.5230427], [0.5015073, 0.5329699], [0.5019613, 0.44938769999999995], [0.4991458, 0.4703159], [0.499146, 0.3768104], [0.5018758, 0.4492454], [0.5019183, 0.4493161], [0.4943362, 0.4793417], [0.4940712, 0.4821516], [0.5014035, 0.6505963], [0.5014858, 0.41379449999999995], [0.4936377, 0.48229120000000003], [0.5013987, 0.5366961], [0.5012838, 0.5364107], [0.5012822, 0.5364124], [0.5010208, 0.6041888], [0.5011398, 0.5360265], [0.5, 0.6081664], [0.5, 0.53695], [0.5, 1.01116514], [0.4968961, 0.5822069

In [50]:
disappear_error = 0
num_error = 0
pixel_error = 0

for i in range(len_coord):
    origin_x = float_origin_coordinate[i][0]
    origin_y = float_origin_coordinate[i][1]
    
    for j in range(len(coord_list[i])-1):
        _coord = coord_list[i][j]
        
        x = _coord[0][0]
        y = _coord[0][1]
        
        x = x / 640
        y = y / 480
        
        x = round(x, 4)
        y = round(y, 4)
        
        # disappear_error
        if x < 0 or x > 1 or y < 0 or y > 1:
            disappear_error += 1
        
        distance = math.sqrt((origin_x - x)**2 + (origin_y - y)**2)
        
        # num_error
        if distance > 0.1:
            num_error += 1
        
        # pixel_error
        if distance > pixel_error:
            pixel_error = distance

print("disappear_error:", disappear_error)
print("num_error:", num_error)
print("pixel_error:", pixel_error)


disappear_error: 264
num_error: 873
pixel_error: 6.437033580597833


In [51]:
#stable_coord_list, origin_coordinate
num_error = 0
pixel_error = 0
disappear_error = 0

for i in range(len_coord):
    origin_x = float_origin_coordinate[i][0]
    origin_y = float_origin_coordinate[i][1]
    
    for j in range(len(stable_coord_list[i])-1):
        _stable_coord = stable_coord_list[i][j]
        
        x = _stable_coord[0][0]
        y = _stable_coord[0][1]
        
        x = x / 640
        y = y / 480
        
        x = round(x, 4)
        y = round(y, 4)
        
        # disappear_error
        if x < 0 or x > 1 or y < 0 or y > 1:
            disappear_error += 1
        
        distance = math.sqrt((origin_x - x)**2 + (origin_y - y)**2)
        
        # num_error
        if distance > 0.1:
            num_error += 1
        
        # pixel_error
        if distance > pixel_error:
            pixel_error = distance

print("disappear_error:", disappear_error)
print("num_error:", num_error)
print("pixel_error:", pixel_error)

disappear_error: 263
num_error: 830
pixel_error: 4.203426288478166


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 'mps', 'cpu'

extractor = SuperPoint(max_num_keypoints=2048).eval().to(device)  # load the extractor
#matcher = LightGlue(features='superpoint', depth_confidence=0.9, width_confidence=0.95).eval().to(device)
matcher = LightGlue(features='superpoint', depth_confidence=-1, width_confidence=-1).eval().to(device)
#matcher.compile(mode='reduce-overhead')

In [15]:
#원본 이미지를 기준으로 호모그래피 행렬을 구하고, 호모그래피 행렬을 이용하여 특징점의 좌표를 변환하는 코드
len_coord = len(origin_coordinate)

coord_list = [[] for _i in range(len(origin_coordinate))]

disappear_errors = []
misannotate_errors = []
pixel_errors = []

# 10번 반복하여 측정한 에러를 구함
for k in range(1):
    # 좌표의 개수(동영상의 개수)만큼 반복
    for i in range(len_coord):
        _images = images[i]
        _len_images = len(_images)
        x = origin_coordinate[i][0]
        y = origin_coordinate[i][1]
        
        # 두 번째 차원의 리스트 초기화
        coord_list[i] = [[] for _ in range(_len_images)]

        img0 = _images[0] # 첫 번째 이미지를 target 이미지로 설정
        for j in range(_len_images):
            if j != _len_images - 1:
                img1 = _images[j+1]

                # # LightGlue
                # results_lightglue = match_lightglue(img0, img1, cfg.lightglue)
                # target_keypoint = results_lightglue["points0"].cpu().numpy()
                # frame_keypoint = results_lightglue["points1"].cpu().numpy()

                # homography, _ = CSRansac.csransac(target_keypoint, frame_keypoint)
                # projected_pts = CSRansac.perspective_transform(np.array([x, y]), homography)

                # coord_list[i][j].append(projected_pts)
                
                image0 = load_image(img0, grayscale=True)
                feats0 = extractor.extract(image0.to(device))
                image1 = load_image(img1, grayscale=True)
                feats1 = extractor.extract(image1.to(device))

                matches01 = matcher({"image0": feats0, "image1": feats1})
                feats0, feats1, matches01 = [
                    rbd(x) for x in [feats0, feats1, matches01]
                ]  # remove batch dimension

                kpts0, kpts1, matches = feats0["keypoints"], feats1["keypoints"], matches01["matches"]
                m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]

                homography, mask = CSRansac.csransac(m_kpts0.cpu().numpy(), m_kpts1.cpu().numpy())
                print(mask)
                projected_pts = CSRansac.perspective_transform(np.array([x, y]), homography)
                
                coord_list[i][j].append(projected_pts)
                
    #에러 측정            
    disappear_error = 0
    num_error = 0
    pixel_error = 0

    for i in range(len_coord):
        origin_x = float_origin_coordinate[i][0]
        origin_y = float_origin_coordinate[i][1]
        
        for j in range(len(coord_list[i])-1):
            _coord = coord_list[i][j]
            
            x = _coord[0][0]
            y = _coord[0][1]
            
            x = x / 640
            y = y / 480
            
            x = round(x, 4)
            y = round(y, 4)
            
            # disappear_error
            if x < 0 or x > 1 or y < 0 or y > 1:
                disappear_error += 1
                continue
            
            distance = math.sqrt((origin_x - x)**2 + (origin_y - y)**2)
            
            # num_error
            if distance > 0.1:
                num_error += 1
            
            # pixel_error
            if distance > pixel_error:
                pixel_error = distance
                
    print("disappear_error:", disappear_error)
    print("num_error:", num_error)
    print("pixel_error:", pixel_error)

    disappear_errors.append(disappear_error)
    misannotate_errors.append(num_error)
    pixel_errors.append(pixel_error)

NameError: name 'origin_coordinate' is not defined

In [13]:
#안정화된 이미지를 기준으로 호모그래피 행렬을 구하고, 호모그래피 행렬을 이용하여 특징점의 좌표를 변환하는 코드
len_coord = len(origin_coordinate)

stable_coord_list = [[] for _i in range(len(origin_coordinate))]

stable_disappear_errors = []
stable_misannotate_errors = []
stable_pixel_errors = []

# 10번 반복하여 측정한 에러를 구함
for k in range(10):
    # 좌표의 개수(동영상의 개수)만큼 반복
    for i in range(len_coord):
        _stablized_images = stablized_images[i]
        _len_stablized_images= len(_stablized_images)
        x = origin_coordinate[i][0]
        y = origin_coordinate[i][1]
        
        stable_coord_list[i] = [[] for _j in range(_len_stablized_images)]
        
        img0 = _stablized_images[0]
        for j in range(_len_stablized_images):
            if j != _len_stablized_images - 1:
                img1 = _stablized_images[j+1]
            
                # LightGlue
                results_lightglue = match_lightglue(img0, img1, cfg.lightglue)
                target_keypoint = results_lightglue["points0"].cpu().numpy()
                frame_keypoint = results_lightglue["points1"].cpu().numpy()
                
                homography, _ = CSRansac.csransac(target_keypoint, frame_keypoint)
                projected_pts = CSRansac.perspective_transform(np.array([x, y]), homography)
                
                stable_coord_list[i][j].append(projected_pts)
                
    #에러 측정            
    stable_disappear_error = 0
    stable_num_error = 0
    stable_pixel_error = 0

    for i in range(len_coord):
        origin_x = float_origin_coordinate[i][0]
        origin_y = float_origin_coordinate[i][1]
        
        for j in range(len(stable_coord_list[i])-1):
            _stable_coord = stable_coord_list[i][j]
            
            x = _stable_coord[0][0]
            y = _stable_coord[0][1]
            
            x = x / 640
            y = y / 480
            
            x = round(x, 4)
            y = round(y, 4)
            
            # disappear_error
            if x < 0 or x > 1 or y < 0 or y > 1:
                stable_disappear_error += 1
                continue
            
            distance = math.sqrt((origin_x - x)**2 + (origin_y - y)**2)
            
            # num_error
            if distance > 0.1:
                stable_num_error += 1
            
            # pixel_error
            if distance > stable_pixel_error:
                stable_pixel_error = distance

    print("stable_disappear_error:", stable_disappear_error)
    print("stable_num_error:", stable_num_error)
    print("stable_pixel_error:", stable_pixel_error)
    print()

    stable_disappear_errors.append(stable_disappear_error)
    stable_misannotate_errors.append(stable_num_error)
    stable_pixel_errors.append(stable_pixel_error)

stable_disappear_error: 264
stable_num_error: 835
stable_pixel_error: 0.49876986491813036

stable_disappear_error: 256
stable_num_error: 798
stable_pixel_error: 0.46954623553951536

stable_disappear_error: 265
stable_num_error: 823
stable_pixel_error: 0.5616874140210104

stable_disappear_error: 259
stable_num_error: 808
stable_pixel_error: 0.46664061495931747

stable_disappear_error: 248
stable_num_error: 812
stable_pixel_error: 0.5707254981421541



KeyboardInterrupt: 

## check speed

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 'mps', 'cpu'

extractor = SuperPoint(max_num_keypoints=2048).eval().to(device)  # load the extractor
#matcher = LightGlue(features='superpoint', depth_confidence=0.9, width_confidence=0.95).eval().to(device)
matcher = LightGlue(features='superpoint', depth_confidence=-1, width_confidence=-1).eval().to(device)
#matcher.compile(mode='reduce-overhead')

In [13]:
# video_frames 폴더에서 프레임 파일 리스트 가져오기
video_frames = os.listdir('video')

# 프레임 수 초기화
frame_count = 0

# 프레임 별 처리 시간 리스트 초기화
frame_processing_times = []

x = 637 // 2
y = 367 // 2
image0 = load_image("img1.png", grayscale=True)

# 각 프레임 처리
for frame in video_frames:
    start_time = time.time()
    
    #image0 = load_image("img1.png", grayscale=True)
    feats0 = extractor.extract(image0.to(device))
    image1 = load_image(os.path.join('video', frame), grayscale=True)
    feats1 = extractor.extract(image1.to(device))

    matches01 = matcher({"image0": feats0, "image1": feats1})
    feats0, feats1, matches01 = [
        rbd(x) for x in [feats0, feats1, matches01]
    ]  # remove batch dimension

    kpts0, kpts1, matches = feats0["keypoints"], feats1["keypoints"], matches01["matches"]
    m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]

    homography, _ = CSRansac.csransac(m_kpts0.cpu().numpy(), m_kpts1.cpu().numpy())
    projected_pts = CSRansac.perspective_transform(np.array([x, y]), homography)

    # 현재 시간 측정
    current_time = time.time()

    # 프레임 처리 시간 계산
    frame_processing_time = current_time - start_time
    frame_processing_times.append(frame_processing_time)

    # 이전 프레임 처리 시간 업데이트
    prev_frame_time = current_time

    # FPS 계산
    fps = 1.0 / frame_processing_time

    # 프레임 수 증가
    frame_count += 1

    # 이미지 및 매칭 시각화 코드 (생략)

# 전체 처리 시간 계산
total_processing_time = sum(frame_processing_times)

# 전체 프레임 수와 전체 처리 시간을 사용하여 평균 FPS 계산
average_fps = frame_count / total_processing_time

print(f"Total Frames Processed: {frame_count}")
print(f"Average FPS: {average_fps:.2f}")

Total Frames Processed: 368
Average FPS: 7.01
