# 0: Import packages

In [None]:
from PIL import Image, ImageFile #pip install Pillow==9.4.0
import sys
import os
import numpy as np
import logging
import glob
import subprocess
import json
import pandas as pd

from ImageCrop import ImagePreprocessor
from SpotterWrapper import Spotter, PolygonVisualizer
from IPython.display import display
from shapely.geometry import Polygon

logging.basicConfig(level=logging.INFO)
Image.MAX_IMAGE_PIXELS=None
ImageFile.LOAD_TRUNCATED_IMAGES = True


# 1: Specify filepaths

In [None]:
# Name folders for raw data and processed data
map_data_topfolder = 'raw_maps_20231024'
map_strec_topfolder = 'processed/strec'

for fp in [map_strec_topfolder]:
    if not os.path.isdir(fp):
        os.makedirs(fp)

# IMPORTANT! Locate spotter directory and detectron weights
git_clone_location = 'C:/repo/'
spotter_directory = git_clone_location + 'mapkurator-spotter/spotter-v2'
model_weights = git_clone_location + 'detectron2-master/detectron2/checkpoint/model_v2_en.pth'
spotter_config = spotter_directory + '/configs/PALEJUN/Finetune/Rumsey_Polygon_Finetune.yaml'

# 2: Crop all jpeg maps in (user defined) map_data_topfolder

In [None]:
def pyramid_scan(img_path, output_dir, save_each_layer=False):
    image = Image.open(img_path)
    image_preprocessor = ImagePreprocessor(image, overlapping_tolerance=0.3, num_layers=5, min_patch_resolution=512, max_patch_resolution=4096)
    image_preprocessor.process()
    print("preprocessing done")
    spotter = Spotter(spotter_config, model_weights, confidence_thresh=0.8, draw_thresh=0.85)
    all_layer_results = []

    base_image_batch, base_offset_xs, base_offset_ys = image_preprocessor.get_image_patches(0)
    vis = PolygonVisualizer()
    vis.canvas_from_patches(base_image_batch, base_offset_xs, base_offset_ys)

    for i in range(image_preprocessor.num_layers):
        # If you want to save for each layer, uncomment the following line
        # image_preprocessor.save_patches(os.path.join(output_dir, f'layer_{i}_patches'), layer=i)

        image_batch, offset_xs, offset_ys = image_preprocessor.get_image_patches(i)
        spotter.load_batch(image_batch, offset_xs, offset_ys)
        results = spotter.inference_batch()
        all_layer_results.extend(results)

        #all_layer_offset_xs.extend(offset_xs)
        #all_layer_offset_ys.extend(offset_ys)

        if save_each_layer == True:
            vis.draw(results).save(os.path.join(output_dir, f'combined_tagged_{i}.png'))
            vis.save_json(results, os.path.join(output_dir, f'combined_tagged_{i}.json'))
        else:
            pass

    vis.draw(all_layer_results).save(os.path.join(output_dir, f'combined_tagged_all_layers.png'))
    vis.save_json(all_layer_results, os.path.join(output_dir, f'combined_tagged_all_layers.json'))

# Run crop on all maps
for map_data_subfolder in next(os.walk(map_data_topfolder))[1]:
    jpeg_list = glob.glob(map_data_topfolder + '/' + map_data_subfolder + '/*.jpeg')
    if len(jpeg_list) != 1:
        print(map_data_subfolder + " failed. Please ensure there is exactly 1 file with extension .jpeg in the folder.")
    else:
        map_image = jpeg_list[0].split("\\")[1]
        if map_data_subfolder in ['1846_vandevelde', '1874_saunders', '1845_kiepert']: # '1858_vandevelde', '1874_saunders', '1845_kiepert']: #,,]: #'1858_vandevelde', '1847_tobler', '1845_kiepert'
            img_path = map_data_topfolder + '/' + map_data_subfolder + "/" + map_image
            map_name = os.path.basename(img_path).split('.')[0] # get the map name without extension
            output_dir = os.path.join(map_strec_topfolder, map_name)
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            pyramid_scan(img_path, output_dir, save_each_layer=False)
            logging.info('Done cropping %s' %img_path )

# 3: Label Combination

In [3]:
from PIL import Image, ImageFile
import json 
import pandas as pd
from collections import Counter
from shapely.geometry import Polygon, MultiPolygon
from itertools import combinations

import numpy as np
import importlib
import Clustering
import TextRectify
import TextAmalgamate
import ExtractHandling
import json
import pickle

importlib.reload(Clustering)
importlib.reload(TextRectify)
importlib.reload(TextAmalgamate)
importlib.reload(ExtractHandling)

map_name_in_strec = 'kiepert_1845'

## 3.1 Text Rectification

In [4]:
do_cluster_pre_merge = True

with open(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers.json', 'r', encoding='utf-8') as f:

    clustered = Clustering.cluster_polygons(json.load(f))

    # visualize clusters
    #image = Clustering.visualize_polygons(clustered, 'processed/strec/kiepert_1845/raw.jpeg')
    #image.save('processed/strec/kiepert_1845/combined_tagged_all_layers_clustering.png')

for label, cluster in clustered.items():
    texts = []
    scores = []
    for polygon in cluster:
        texts.append(polygon['text'])
        scores.append(polygon['score'])

    rectifier = TextRectify.TextRectifier(0.95, 0.5, 10, True, True)

    rectifier.feed_data(texts, scores)

    rectifier.fit()

    rectified, mask = rectifier.get_rectified_text()

    if rectified is None:
        rectified = max(texts, key=len)

    for i in range(len(cluster)):
        cluster[i]['text'] = rectified[i]
        cluster[i]['keep'] = mask[i]

image = Clustering.visualize_polygons(clustered, f'processed/strec/{map_name_in_strec}/raw.jpeg')
image.save(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers_rectified.png')

polygon_x = {}
polygon_y = {}
texts = {}
scores = {}
i = 0
for label, cluster in clustered.items():
    for polygon in cluster:
        if do_cluster_pre_merge:
            if polygon['keep']:
                polygon_x[str(i)] = polygon['polygon_x']
                polygon_y[str(i)] = polygon['polygon_y']
                texts[str(i)] = polygon['text']
                scores[str(i)] = polygon['score']
                i += 1
        else:
            polygon_x[str(i)] = polygon['polygon_x']
            polygon_y[str(i)] = polygon['polygon_y']
            texts[str(i)] = polygon['text']
            scores[str(i)] = polygon['score']
            i += 1

json_data = {'polygon_x': polygon_x, 'polygon_y': polygon_y, 'text': texts, 'score': scores}

with open(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers_rectified_premerge.json', 'w', encoding='utf-8') as f:
    json.dump(json_data, f, ensure_ascii=False, indent=4)

## 3.2 Text Amalgamation

In [5]:
# Amalgamation stage - assumes there exists "combined_tagged_all_layers_rectified_premerge.json" in map_name_in_strec processed folder.
df = ExtractHandling.prepare_labels_for_amalgamation(map_name_in_strec)
df = TextAmalgamate.amalgamate_labels_wrapper(df, 0.75, .5)

# Save amalgamated labels
with open(f'processed/strec/{map_name_in_strec}/amalgamate.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

1507 labels.
1427 labels.
1402 labels.
1399 labels.
1396 labels.
1395 labels.
Amalgamation completed with 1395 labels.


## 3.3 ?

In [6]:
result = list(df["labels"])
polygons = []
texts = []
PCA_features = []

for i in range(len(result)):
    poly = result[i][0]
    polygons.append(poly)
    texts.append(result[i][1])

In [None]:
#reload SpotterWrapper module
import importlib
import SpotterWrapper
import Grouping

importlib.reload(SpotterWrapper)
importlib.reload(Grouping)

PCA_features = Grouping.calc_PCA_feats(polygons, do_separation=True, enhance_coords=True)

print("PCA features calculated.")

vis = SpotterWrapper.PolygonVisualizer()
canvas = Image.open(f'processed/strec/{map_name_in_strec}/raw.jpeg')
vis.canvas_from_image(canvas)

vis.draw_poly(polygons, texts, PCA_features)

vis.save(f'processed/strec/{map_name_in_strec}/output.jpeg')

# 4: Evaluation

In [200]:
from PIL import Image, ImageFile
import pandas as pd
from itertools import combinations
import scipy
import numpy as np

import Evaluation
importlib.reload(Evaluation)
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 4.1: Isolate crops to be used for evaluation

In [2]:

def visualize_crop(map_name_in_strec, raw_or_spotter, left_x, right_x, top_y, bottom_y):
    if raw_or_spotter == "raw":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/raw.jpeg') 
    elif raw_or_spotter == "spotter_0":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_0.png')
    elif raw_or_spotter == "spotter_1":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_1.png')
    elif raw_or_spotter == "spotter_2":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_2.png')
    elif raw_or_spotter == "all":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers.png')
    elif raw_or_spotter == "rectified":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers_rectified.png')
    width, height = map_img.size
    print("full map is " + str(width) + " pixels wide by " + str(height) + " pixels high.\n displaying crop:")
    display(map_img.crop((left_x, top_y, right_x, bottom_y, )))

left_x = 2475
right_x = 3550
top_y = 4820
bottom_y = 5850

#visualize_crop("kiepert_1845", "all", left_x, right_x, top_y, bottom_y)

In [201]:
kiepert_gt_patch_1 = [2475, 3550, 4820, 5850]
saunders_gt_patch_1 = [3150, 4150, 2250, 3250]
saunders_gt_patch_2 = [6750, 7750, 2250, 3250]
saunders_gt_patch_3 = [5400, 6400, 4500, 5500]
saunders_gt_patch_4 = [7650, 8650, 5400, 6400]
saunders_gt_patch_5 = [7650, 8650, 3150, 4150]

## 4.2 Precision and Recall: IoU after 1:1 Matching

In [202]:
# FUNCTIONS

## Patch-level geographic pairings for non-multiline-toponyms against our pyramid pipeline
pyramid_detected_kiepert, num_gt_kiepert, pyramid_IoU_pairs_kiepert = Evaluation.geographic_evaluation("kiepert_1845", "components", kiepert_gt_patch_1)
pyramid_detected_saunders1, num_gt_saunders1, pyramid_IoU_pairs_saunders1 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_1)
pyramid_detected_saunders2, num_gt_saunders2, pyramid_IoU_pairs_saunders2 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_2)
pyramid_detected_saunders3, num_gt_saunders3, pyramid_IoU_pairs_saunders3 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_3)
pyramid_detected_saunders4, num_gt_saunders4, pyramid_IoU_pairs_saunders4 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_4)
pyramid_detected_saunders5, num_gt_saunders5, pyramid_IoU_pairs_saunders5 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_5)

## Aggregating pyramid numbers and pairs to map-level figures
num_gt_kiepert = num_gt_kiepert
num_gt_saunders = num_gt_saunders1 + num_gt_saunders2 + num_gt_saunders3 + num_gt_saunders4 + num_gt_saunders5
pyramid_detected_kiepert = pyramid_detected_kiepert
pyramid_detected_saunders = pyramid_detected_saunders1 + pyramid_detected_saunders2 + pyramid_detected_saunders3 + pyramid_detected_saunders4 + pyramid_detected_saunders5
pyramid_IoU_pairs_kiepert = pyramid_IoU_pairs_kiepert
pyramid_IoU_pairs_saunders = np.concatenate((pyramid_IoU_pairs_saunders1, pyramid_IoU_pairs_saunders2, pyramid_IoU_pairs_saunders3, pyramid_IoU_pairs_saunders4, pyramid_IoU_pairs_saunders5))

## Patch-level geographic pairings for non-multiline-toponyms against baseline
baseline_detected_kiepert, num_gt_kiepert, baseline_IoU_pairs_kiepert = Evaluation.geographic_evaluation("kiepert_1845", "components", [2475, 3550, 4820, 5850], "combined_tagged_0.json")
baseline_detected_saunders1, num_gt_saunders1, baseline_IoU_pairs_saunders1 = Evaluation.geographic_evaluation("saunders_1874", "components", [3150, 4150, 2250, 3250], "combined_tagged_0.json")
baseline_detected_saunders2, num_gt_saunders2, baseline_IoU_pairs_saunders2 = Evaluation.geographic_evaluation("saunders_1874", "components", [6750, 7750, 2250, 3250], "combined_tagged_0.json")
baseline_detected_saunders3, num_gt_saunders3, baseline_IoU_pairs_saunders3 = Evaluation.geographic_evaluation("saunders_1874", "components", [5400, 6400, 4500, 5500], "combined_tagged_0.json")
baseline_detected_saunders4, num_gt_saunders4, baseline_IoU_pairs_saunders4 = Evaluation.geographic_evaluation("saunders_1874", "components", [7650, 8650, 5400, 6400], "combined_tagged_0.json")
baseline_detected_saunders5, num_gt_saunders5, baseline_IoU_pairs_saunders5 = Evaluation.geographic_evaluation("saunders_1874", "components", [7650, 8650, 3150, 4150], "combined_tagged_0.json")

## Aggregate baseline numbers and pairs to map-level figures
baseline_detected_kiepert = baseline_detected_kiepert
baseline_detected_saunders = baseline_detected_saunders1 + baseline_detected_saunders2 + baseline_detected_saunders3 + baseline_detected_saunders4 + baseline_detected_saunders5
baseline_IoU_pairs_kiepert = baseline_IoU_pairs_kiepert
baseline_IoU_pairs_saunders = np.concatenate((baseline_IoU_pairs_saunders1, baseline_IoU_pairs_saunders2, baseline_IoU_pairs_saunders3, baseline_IoU_pairs_saunders4, baseline_IoU_pairs_saunders5))

retaining 49 labels fully inside crop area
retaining 43 labels that have alphabetic characters


  df = pickle.load(open('processed/strec/' + map_name_in_strec + '/refined_labels.pickle', 'rb'))


retaining 85 labels fully inside crop area
retaining 80 labels that have alphabetic characters
retaining 6 labels fully inside crop area
retaining 6 labels that have alphabetic characters


  df = pickle.load(open('processed/strec/' + map_name_in_strec + '/refined_labels.pickle', 'rb'))


retaining 17 labels fully inside crop area
retaining 11 labels that have alphabetic characters
retaining 11 labels fully inside crop area
retaining 10 labels that have alphabetic characters


  df = pickle.load(open('processed/strec/' + map_name_in_strec + '/refined_labels.pickle', 'rb'))


retaining 41 labels fully inside crop area
retaining 38 labels that have alphabetic characters
retaining 47 labels fully inside crop area
retaining 46 labels that have alphabetic characters


  df = pickle.load(open('processed/strec/' + map_name_in_strec + '/refined_labels.pickle', 'rb'))


retaining 108 labels fully inside crop area
retaining 108 labels that have alphabetic characters
retaining 25 labels fully inside crop area
retaining 23 labels that have alphabetic characters


  df = pickle.load(open('processed/strec/' + map_name_in_strec + '/refined_labels.pickle', 'rb'))


retaining 71 labels fully inside crop area
retaining 65 labels that have alphabetic characters
retaining 33 labels fully inside crop area
retaining 33 labels that have alphabetic characters


  df = pickle.load(open('processed/strec/' + map_name_in_strec + '/refined_labels.pickle', 'rb'))


retaining 76 labels fully inside crop area
retaining 72 labels that have alphabetic characters
retaining 49 labels fully inside crop area
retaining 43 labels that have alphabetic characters
retaining 32 labels fully inside crop area
retaining 32 labels that have alphabetic characters
retaining 6 labels fully inside crop area
retaining 6 labels that have alphabetic characters
retaining 7 labels fully inside crop area
retaining 6 labels that have alphabetic characters
retaining 11 labels fully inside crop area
retaining 10 labels that have alphabetic characters
retaining 7 labels fully inside crop area
retaining 5 labels that have alphabetic characters
retaining 47 labels fully inside crop area
retaining 46 labels that have alphabetic characters
retaining 9 labels fully inside crop area
retaining 8 labels that have alphabetic characters
retaining 25 labels fully inside crop area
retaining 23 labels that have alphabetic characters
retaining 0 labels fully inside crop area
retaining 33 lab

In [205]:
# Gimme them numbers :)
print("\nkiepert baseline\n")
Evaluation.prec_rec(baseline_IoU_pairs_kiepert, baseline_detected_kiepert, num_gt_kiepert)
print("\nkiepert pyramid\n")
Evaluation.prec_rec(pyramid_IoU_pairs_kiepert, pyramid_detected_kiepert, num_gt_kiepert)
print("\nsaunders baseline\n")
Evaluation.prec_rec(baseline_IoU_pairs_saunders, baseline_detected_saunders, num_gt_saunders)
print("\nsaunders pyramid\n")
Evaluation.prec_rec(pyramid_IoU_pairs_saunders, pyramid_detected_saunders, num_gt_saunders)


kiepert baseline

Avg of Geographic Precision: 0.19116442583187376
Avg of Geographic Recall: 0.14226189829348745
Avg of Text Precision: 0.08627091726132564
Avg of Text Recall: 0.06420161284563769

kiepert pyramid

Avg of Geographic Precision: 0.30606272939358103
Avg of Geographic Recall: 0.5694190314299181
Avg of Text Precision: 0.27410302587350277
Avg of Text Recall: 0.5099591179041911

saunders baseline

Avg of Geographic Precision: 0.06051490020519661
Avg of Geographic Recall: 0.00974392460931132
Avg of Text Precision: 0.08252460898511729
Avg of Text Recall: 0.013287860768790072

saunders pyramid

Avg of Geographic Precision: 0.23311081069940068
Avg of Geographic Recall: 0.5808015114035915
Avg of Text Precision: 0.16913448253428082
Avg of Text Recall: 0.4214028632633776


# 5 Extra - fixing image labels
## outputting crops with sourceable ids

In [161]:
import Evaluation
import ExtractHandling
import Grouping
import importlib
importlib.reload(Grouping)
%load_ext autoreload
from PIL import Image

for map_name_in_strec in ["saunders_1874", "vandevelde_1846"]: #"kiepert_1845"
    gt_labels_full = Evaluation.load_ground_truth_labels(map_name_in_strec, "components")
    gt_labels_full = ExtractHandling.cast_coords_as_Polygons(gt_labels_full)

    # the original index in the dataframe should be the indentifying number
    for index, row in gt_labels_full.iterrows():
        img = Grouping.polygon_crop(row['label_polygons'], Image.open("processed/strec/" + map_name_in_strec + "/raw.jpeg"))
        img.save("dependencies/ground_truth_labels/cropped/" + map_name_in_strec + "_" + str(index) + ".jpeg")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload




## mapping old labels to new crop names

In [162]:
import json
with open("dependencies/ground_truth_labels/ground_truth_labels_backup_preidchg.json", "r", encoding="utf-8") as f:
    data_list = json.load(f)["data"]
data_list[0]

['dependencies\\ground_truth_labels\\cropped\\kiepert_1845_0.jpeg',
 'dependencies\\ground_truth_labels\\cropped\\kiepert_1845_1.jpeg',
 1]

In [163]:
# kiepert (old -> new)
kiepert_crop_dict = {}
kiepert_crop_dict[0] = 1
kiepert_crop_dict[1] = 2
kiepert_crop_dict[2] = 4
kiepert_crop_dict[4] = 6
kiepert_crop_dict[7] = 10
kiepert_crop_dict[9] = 13
kiepert_crop_dict[10] = 14
kiepert_crop_dict[12] = 17
kiepert_crop_dict[13] = 18
kiepert_crop_dict[19] = 23
kiepert_crop_dict[21] = 25
kiepert_crop_dict[29] = 35
kiepert_crop_dict[30] = 37
kiepert_crop_dict[31] = 38
kiepert_crop_dict[32] = 39
kiepert_crop_dict[35] = 43
kiepert_crop_dict[37] = 45
kiepert_crop_dict[40] = 49
kiepert_crop_dict[44] = 53
kiepert_crop_dict[49] = 59
kiepert_crop_dict[53] = 64
kiepert_crop_dict[56] = 68
kiepert_crop_dict[59] = 71
kiepert_crop_dict[61] = 0

# saunders (old -> new)
saunders_crop_dict = {}
saunders_crop_dict[1] = 4
saunders_crop_dict[3] = 13
saunders_crop_dict[4] = 18
saunders_crop_dict[7] = 22
saunders_crop_dict[11] = 28
saunders_crop_dict[18] = 38
saunders_crop_dict[20] = 47
saunders_crop_dict[30] = 61
saunders_crop_dict[35] = 70
saunders_crop_dict[38] = 75
saunders_crop_dict[39] = 76
saunders_crop_dict[40] = 77
saunders_crop_dict[46] = 84
saunders_crop_dict[54] = 120
saunders_crop_dict[57] = 125
saunders_crop_dict[59] = 132
saunders_crop_dict[67] = 9
saunders_crop_dict[69] = 12
saunders_crop_dict[70] = 14
saunders_crop_dict[71] = 15
saunders_crop_dict[72] = 16
saunders_crop_dict[73] = 17
saunders_crop_dict[86] = 110
saunders_crop_dict[94] = 118
saunders_crop_dict[97] = 128

# vandevelde (old -> new)
vandevelde_crop_dict = {}
vandevelde_crop_dict[0] = 0
vandevelde_crop_dict[2] = 10
vandevelde_crop_dict[4] = 13
vandevelde_crop_dict[7] = 33
vandevelde_crop_dict[8] = 35
vandevelde_crop_dict[9] = 37
vandevelde_crop_dict[10] = 40
vandevelde_crop_dict[12] = 5 
vandevelde_crop_dict[13] = 6
vandevelde_crop_dict[16] = 15
vandevelde_crop_dict[17] = 16
vandevelde_crop_dict[20] = 21
vandevelde_crop_dict[23] = 24
vandevelde_crop_dict[24] = 25
vandevelde_crop_dict[27] = 30
vandevelde_crop_dict[28] = 31
vandevelde_crop_dict[34] = 44
vandevelde_crop_dict[37] = 47
vandevelde_crop_dict[48] = 58
vandevelde_crop_dict[49] = 59
vandevelde_crop_dict[51] = 61
vandevelde_crop_dict[60] = 70
vandevelde_crop_dict[61] = 71
vandevelde_crop_dict[62] = 72

# global nested dict
id_updater_dict = {}
id_updater_dict['kiepert_1845'] = kiepert_crop_dict
id_updater_dict['saunders_1874'] = saunders_crop_dict
id_updater_dict['vandevelde_1846'] = vandevelde_crop_dict

In [164]:
def modify_font_labels(file_path):
    #print(file_path)
    for search_string in ['kiepert_1845', 'saunders_1874', 'vandevelde_1846']:
        if search_string in file_path:
            id_index = file_path.find(search_string) + len(search_string) + 1
            id_str = file_path[id_index:file_path.find('.jpeg')]
            if id_str.isdigit():
                new_id = id_updater_dict[search_string][int(id_str)]
                return file_path.replace(search_string + "_" + id_str, search_string + "_" + str(new_id))

fixed_ids = [[modify_font_labels(lbl[0]), modify_font_labels(lbl[1]), lbl[2]] for lbl in data_list]

json_dict = {}
json_dict['data'] = fixed_ids

with open('dependencies/ground_truth_labels/ground_truth_labels_fixed.json', 'w') as f:
    json.dump(json_dict, f)

## Padding with group labels

In [165]:
import Evaluation
import ExtractHandling
import Grouping
import importlib
importlib.reload(Evaluation)
%load_ext autoreload
from PIL import Image
from itertools import combinations

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [166]:
group_padding = []
for map_name_in_strec in ["kiepert_1845", "saunders_1874", "vandevelde_1846"]:
    gt_labels_full = Evaluation.load_ground_truth_labels(map_name_in_strec, "components")
    gt_labels_full = ExtractHandling.cast_coords_as_Polygons(gt_labels_full)
    gt_labels_group = gt_labels_full[(gt_labels_full['multiline_g'].notnull()) & (gt_labels_full['multiline_g'] != "")]
    gt_labels_group['file_path'] = "dependencies/ground_truth_labels/cropped/" + map_name_in_strec + "_" + gt_labels_group.index.astype(str) + ".jpeg"
    for group_id in gt_labels_group['multiline_g'].value_counts().index:
        tmp = gt_labels_group[gt_labels_group['multiline_g'] == str(group_id)].copy()
        fps = tmp['file_path'].to_list()
        result = []
        for combo in combinations(fps, 2):
            result.append([combo[0], combo[1], 1])
        group_padding.extend(result)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gt_labels_group['file_path'] = "dependencies/ground_truth_labels/cropped/" + map_name_in_strec + "_" + gt_labels_group.index.astype(str) + ".jpeg"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gt_labels_group['file_path'] = "dependencies/ground_truth_labels/cropped/" + map_name_in_strec + "_" + gt_labels_group.index.astype(str) + ".jpeg"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas

In [167]:
def deduplicate_list_of_lists(lst):
    seen = set()
    result = []
    for sublist in lst:
        tuple_sublist = tuple(sublist)
        rever_sublist = (tuple_sublist[1], tuple_sublist[0], tuple_sublist[2])
        # If the tuple is not in the set, add it to the set and result list
        if tuple_sublist not in seen and rever_sublist not in seen:
            seen.add(tuple_sublist)
            seen.add(rever_sublist)
            result.append(sublist)
    return result

fixed_n_padded_ids = deduplicate_list_of_lists(fixed_ids + group_padding)

json_dict['data'] = fixed_n_padded_ids

with open('dependencies/ground_truth_labels/ground_truth_labels_fixed.json', 'w') as f:
    json.dump(json_dict, f)