Playing a bit with the data before writing a proper utils file.

In [1]:
import os
import torch
from tqdm import tqdm

In [2]:
def get_camera_params_for_file(filepath):
    """
    Parses camera parameters from a given RealEstate10K file.

    Args:
        filepath (str): Path for the txt file.

    Returns:
        tuple:
            - video_url (str): URL of the video.
            - K (torch.Tensor): 3x3 camera intrinsics matrix.
            - timestamps (list[int]): List of timestamps (in microseconds) for each frame.
            - extrinsics (list[torch.Tensor]): List of 3x4 camera extrinsics matrices (one per frame).
    """
    
    with open(filepath, 'r') as file:
        lines = file.readlines()
    # lines is a list of str representing lines from a file, containing
    # the video URL, timestamps, camera intrinsics, and extrinsics.
    
    # video URL
    video_url = lines[0].strip()
        
    # camera intrinsics (done just on first frame since it is constant across time)
    first_frame = lines[1].strip().split()
    f_x, f_y, c_x, c_y = map(float, first_frame[1:5])
    K = torch.tensor([[f_x, 0, c_x],
                      [0, f_y, c_y],
                      [0, 0, 1]], dtype=torch.float32)
    # top left corner of the image is (0, 0) and bottom right corner is (1, 1)
    # for an image of size (w, h), the intrinsic matrix has w * f_x, h * f_y, w * c_x, h * c_y
    
    # timestamps and camera extrinsics
    timestamps = []
    extrinsics = []
    for line in lines[1:]: # skip the first line (video URL)
        frame = line.strip().split()
        timestamp = int(frame[0])
        timestamps.append(timestamp)
        P = torch.tensor(list(map(float, frame[7:])), dtype=torch.float32).reshape(3, 4)
        extrinsics.append(P)
    
    return video_url, K, timestamps, extrinsics

In [None]:
def parse_directory(directory):
    """
    Parses all RealEstate10K files in a given directory.

    Args:
        directory (str): Path for the directory.

    Returns:
        list: A list of tuples where each tuple contains: (video_url, K, timestamps, extrinsics).
    """
    
    camera_params = []
    
    txt_files = [file for file in os.listdir(directory) if file.endswith(".txt")]
    
    for file in tqdm(txt_files, desc="Processing RealEstate10K files", unit="file"):
        if file.endswith(".txt"):
            filepath = os.path.join(directory, file)
            video_url, K, timestamps, extrinsics = get_camera_params_for_file(filepath)
            camera_params.append((video_url, K, timestamps, extrinsics))

    return camera_params

In [4]:
# camera_params_test = parse_directory("RealEstate10K/RealEstate10K/test")
# print(f"Number of videos in test set: {len(camera_params_test)}")

In [5]:
# camera_params_train = parse_directory("RealEstate10K/RealEstate10K/train")
# print(f"Number of videos in train set: {len(camera_params_train)}")