In [1]:
from scipy.sparse import csr_matrix, lil_matrix
from tqdm.auto import tqdm
import numpy as np
import os

np.__version__

'1.18.4'

In [14]:
run_1 = '2014-12-09-13-21-02'
run_2 = '2014-12-10-18-10-50'
run_3 = '2015-05-19-14-06-38'
current_run = run_1
exp_dir = f"/projects/c/place_recognition/output/{current_run}/results"
if not os.path.isdir(exp_dir):
    raise IOError(f"Results dir {exp_dir} does not exist")
else:
    print(f"Processing {exp_dir}")
    
filename_prefix = 'htmap_loops_'
possible_filenames = list(filter(lambda x: x.startswith(filename_prefix), os.listdir(exp_dir)))
if len(possible_filenames) < 1:
    raise IOError(f"No file with prefix {filename_prefix} found")
elif len(possible_filenames) > 1:
    raise IOError(f"Multiple files with prefix {filename_prefix} found: {possible_filenames}")
    
filename = possible_filenames[0]
filepath = os.path.join(exp_dir, filename)
if not os.path.exists(filepath):
    raise IOError(f"File {filepath} not found")
else:
    print(f"Processing file {filename}")

Processing /projects/c/place_recognition/output/2014-12-09-13-21-02/results
Processing file htmap_loops_45.txt


## Without multithreading

In [3]:
# %%time
# image_match_list = []
# with open(filepath, 'r') as loops_file:
#     lines = loops_file.readlines()
#     for i, line in enumerate(tqdm(lines)):
#         match_indices = np.where(np.asarray(line.split('\t')[:-1]).astype('bool') == True)
#         image_match_list.append(match_indices)

# len(image_match_list)

## With multithreading

In [None]:
%%time
image_match_list = []

from multiprocessing import Pool
def get_match_indices(line):
    bool_values_list = line.split('\t')[:-1]  # remove final element \n at the end of each line
    bool_values_array = np.asarray(bool_values_list).astype('bool')  # convert string arr to bool arr
    return np.where(bool_values_array == True)[0]  # np.where returns (index_arr, ) for 1 dimension, return index_arr

with open(filepath, 'r') as loops_file:
    lines = loops_file.readlines()
    with Pool() as pool:
        image_match_list = list(tqdm(pool.imap(get_match_indices, lines), total=len(lines)))

len(image_match_list)

In [5]:
with open(os.path.join(exp_dir, 'htmap_image_matches.txt'), 'w') as f:
    for match_list in image_match_list:
        if len(match_list) == 0:
            print('-1', file=f)
        else:
            print('\t'.join(map(str, match_list)), file=f)