In [1]:
import os
import numpy as np
import cv2
import h5py
import torch
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [2]:
def load_pairs_and_unique_paths_from_hdf5(hdf5_file_path: str, hdf5_folder: str):
    with h5py.File(hdf5_file_path, 'r') as hdf:
        loaded_pairs = []
        unique_paths = set()

        # Function to convert a single relative path back to an absolute path
        def make_absolute(rel_path):
            # Decode if the path is a byte string
            if isinstance(rel_path, bytes):
                rel_path = rel_path.decode('utf-8')
            parts = rel_path.split('/')
            new_parts = []
            for part in parts:
                if part == 'sun3d_extracted' or part == '..':
                    continue
                new_parts.append(part)
            corrected_path = '/'.join(new_parts)
            absolute_path = os.path.join(hdf5_folder, corrected_path)
            return absolute_path

        def process_paths(img_paths_array) -> tuple:
            paths = tuple(make_absolute(path) for path in img_paths_array)
            unique_paths.update(paths)
            return paths

        pairs_group = hdf['pairs']
        for pair_name in pairs_group:
            pair_group = pairs_group[pair_name]
            img_paths_array = pair_group['img_paths'][()]
            img_paths = process_paths(img_paths_array)
            points1 = torch.tensor(pair_group['points1'][()])
            pos_points2 = torch.tensor(pair_group['pos_points2'][()])
            neg_points2 = torch.tensor(pair_group['neg_points2'][()])
            loaded_pairs.append({
                'img_paths': img_paths,
                'points1': points1,
                'pos_points2': pos_points2,
                'neg_points2': neg_points2
            })

    return loaded_pairs, unique_paths

In [3]:
# Get the current working directory
current_directory = os.getcwd()
output_path = os.path.join(
    current_directory, os.pardir, os.pardir, 'datasets', 'sun3d_training')
hdf5_file_path = os.path.join(output_path, 'pairs.hdf5')

In [4]:
loaded_pairs, unique_paths = load_pairs_and_unique_paths_from_hdf5(
    hdf5_file_path, output_path)

In [5]:
def write_unique_paths_to_file(unique_paths: set, file_path: str):
    with open(file_path, 'w') as file:
        for path in sorted(unique_paths):
            file.write(path + '\n')

In [6]:
txt_file_path = os.path.join(output_path, 'unique_image_paths.txt')
write_unique_paths_to_file(unique_paths, txt_file_path)