In [27]:
import numpy as np
import pandas as pd
import json
import glob
import os
from tqdm.auto import tqdm
from pathlib import Path

In [28]:
dataset_names = ['new_college', 'city_centre']
dataset_name = dataset_names[1]
dataset_name_pascal = dataset_name.title().replace("_", "")
dataset_name

'city_centre'

In [29]:
cephshare = Path('~/cephshare').expanduser()
gt_dir = f'{cephshare}/compsci/public/new_college/{dataset_name}'
gt_dir, os.path.exists(gt_dir)

('/home/saravanabalagi/cephshare/compsci/public/new_college/city_centre', True)

# Load Ground Truth Loops

In [30]:
gt_loops_matrix_file = f'{gt_dir}/{dataset_name_pascal}TextFormat.txt'
gt_loops_matrix_file, os.path.exists(gt_loops_matrix_file)

('/home/saravanabalagi/cephshare/compsci/public/new_college/city_centre/CityCentreTextFormat.txt',
 True)

In [31]:
loops_matrix = np.genfromtxt(gt_loops_matrix_file, delimiter=',', dtype=int)
loops_matrix = np.array(loops_matrix, dtype=bool)
loops_matrix.shape, loops_matrix.dtype

((2474, 2474), dtype('bool'))

In [32]:
loops = []

for i, row in enumerate(loops_matrix):
    loops_row = np.where(row == True)[0]
    # if loops_row.sum() > 0:
    #     print(f'{i}, ', end='')
    loops.append(loops_row)

len(loops), loops[200:210]

(2474,
 [array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64),
  array([], dtype=int64)])

# Load Ground Truth Poses

In [33]:
gt_poses_file = os.path.join(gt_dir, 'ImageCollectionCoordinates.txt')
gt_poses_file, os.path.exists(gt_poses_file)

('/home/saravanabalagi/cephshare/compsci/public/new_college/city_centre/ImageCollectionCoordinates.txt',
 True)

In [34]:
gt = pd.read_csv(gt_poses_file, delim_whitespace=True, header=None, names=['img', 'latitude', 'longitude'])
# gt['imgs'] = gt['imgs'].apply(lambda x: int(json.loads(x)['stereoCentre'].split('.jpg')[0]))
# gt = gt.rename(columns={'imgs': 'img'})
gt = gt.set_index(['img'], append=True, drop=False)

gt

Unnamed: 0_level_0,Unnamed: 1_level_0,img,latitude,longitude
Unnamed: 0_level_1,img,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0001.jpg,0001.jpg,303.864302,-57.550852
1,0002.jpg,0002.jpg,303.057736,-57.596078
2,0003.jpg,0003.jpg,302.223256,-57.581016
3,0004.jpg,0004.jpg,301.449563,-57.618403
4,0005.jpg,0005.jpg,300.541200,-57.707595
...,...,...,...,...
2469,2470.jpg,2470.jpg,190.059861,-147.638236
2470,2471.jpg,2471.jpg,189.469524,-147.432693
2471,2472.jpg,2472.jpg,188.714516,-146.758052
2472,2473.jpg,2473.jpg,188.045171,-146.442022


In [35]:
gt['loops'] = loops
gt

Unnamed: 0_level_0,Unnamed: 1_level_0,img,latitude,longitude,loops
Unnamed: 0_level_1,img,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0001.jpg,0001.jpg,303.864302,-57.550852,[]
1,0002.jpg,0002.jpg,303.057736,-57.596078,[]
2,0003.jpg,0003.jpg,302.223256,-57.581016,[]
3,0004.jpg,0004.jpg,301.449563,-57.618403,[]
4,0005.jpg,0005.jpg,300.541200,-57.707595,[]
...,...,...,...,...,...
2469,2470.jpg,2470.jpg,190.059861,-147.638236,"[252, 253, 254, 255, 256, 257, 258, 259, 260, ..."
2470,2471.jpg,2471.jpg,189.469524,-147.432693,"[251, 252, 253, 254, 255, 256, 257, 258, 259, ..."
2471,2472.jpg,2472.jpg,188.714516,-146.758052,"[250, 251, 252, 253, 254, 255, 256, 257, 258, ..."
2472,2473.jpg,2473.jpg,188.045171,-146.442022,"[249, 250, 251, 252, 253, 254, 255, 256, 257, ..."


## Combine consecutive rows

HTmap combines image_left and image_right
- `0201.jpg` + `0202.jpg` = `0201_0202.jpg`
- `[9, 10, 11, 12, 13, 14, 15]` + `[10, 11, 12, 13, 14, 15, 16]`
    - Combine `[9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15, 16]`
    - Divide by 2 `[4.5 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0]`
    - Apply Floor `[4, 5, 5, 6, 6, 7, 7, 5, 5, 6, 6, 7, 7, 8]`
    - Pick Unique `[4, 5, 6, 7, 8]`

In [36]:
combine_idx_0 = 200
combine_idx_1 = 201
print(gt.loops[combine_idx_0], gt.loops[combine_idx_1], "\n")

combine_loops_eg = np.append(loops[combine_idx_0], loops[combine_idx_1])
print(combine_loops_eg)
print(combine_loops_eg / 2)
print(np.array(combine_loops_eg / 2, dtype=int))
print(np.unique(np.array(combine_loops_eg / 2, dtype=int)))

img
0201.jpg    []
Name: loops, dtype: object img
0202.jpg    []
Name: loops, dtype: object 

[]
[]
[]
[]


In [37]:
gt_values = gt.values
gt_combined_values = []
for i in range(len(gt_values)):
    if i % 2 == 1: continue
    row_current = gt_values[i]
    row_next = gt_values[i + 1]
    img_combined = f'{os.path.splitext(row_current[0])[0]}_{row_next[0]}'
    loops_combined = np.append(row_current[3], row_next[3])
    loops_combined = np.array(loops_combined / 2, dtype=int)
    loops_combined = np.unique(loops_combined)
    gt_combined_values.append([img_combined, loops_combined])

len(gt_combined_values), gt_combined_values[100:105]

(1237,
 [['0201_0202.jpg', array([], dtype=int64)],
  ['0203_0204.jpg', array([], dtype=int64)],
  ['0205_0206.jpg', array([], dtype=int64)],
  ['0207_0208.jpg', array([], dtype=int64)],
  ['0209_0210.jpg', array([], dtype=int64)]])

In [38]:
gt_combined = pd.DataFrame(gt_combined_values, columns=['img', 'loops'])
gt_combined

Unnamed: 0,img,loops
0,0001_0002.jpg,[]
1,0003_0004.jpg,[]
2,0005_0006.jpg,[]
3,0007_0008.jpg,[]
4,0009_0010.jpg,[]
...,...,...
1232,2465_2466.jpg,"[128, 129, 130, 131, 132, 133, 134, 135, 136, ..."
1233,2467_2468.jpg,"[127, 128, 129, 130, 131, 132, 133, 134, 135, ..."
1234,2469_2470.jpg,"[126, 127, 128, 129, 130, 131, 132, 133, 134, ..."
1235,2471_2472.jpg,"[125, 126, 127, 128, 129, 130, 131, 132, 133, ..."


# Write GT Loops to file

## Write to CSV

- Add comments about representation
- Add settings used
- Add headers

### Use space saving representation

- Long lists occupy a lot of space when each element is serialized.
- If the list is known to have range of values, it's better to use a list of ranges representation

Example: 
```
[[0, 1, 2, 3, 4], [7, 8, 9], [11]]
```
can be represented as list of ranges
```
0:4;7:9;11
```

In [39]:
from utils import list_minify

loops_file = f'{dataset_name}/loops.csv'
with open(loops_file, 'w') as f:
    comment = f"""\
# CSV Comma Separated; Lists Semicolon Separated; Ranges Colon Separated Both Inclusive
# Ground Truth Loop Closure File, generated from {gt_poses_file}
# Combined 0001.jpg and 0002.jpg for HTMap to 0001_0002.jpg
# Combined their corresponding loops [10 11 12 13 14 15 16] and [11 12 13 14 15 16 17] to [5, 6, 7, 8]
# 
img,index,loops
"""
    print(comment, end="", file=f)
    for i, (t, loops_row) in enumerate(zip(gt_combined.img.values, gt_combined.loops.values)):
        loops_row_str = list_minify(loops_row)
        print(f'{t},{i},{loops_row_str}', file=f)