# 0: Import packages

In [None]:
from PIL import Image, ImageFile #pip install Pillow==9.4.0
import sys
import os
import numpy as np
import logging
import glob
import subprocess
import json
import pandas as pd

from ImageCrop import ImagePreprocessor
from SpotterWrapper import Spotter, PolygonVisualizer
from IPython.display import display
from shapely.geometry import Polygon

logging.basicConfig(level=logging.INFO)
Image.MAX_IMAGE_PIXELS=None
ImageFile.LOAD_TRUNCATED_IMAGES = True


# 1: Specify filepaths

In [None]:
# Name folders for raw data and processed data
map_data_topfolder = 'raw_maps_20231024'
map_strec_topfolder = 'processed/strec'

for fp in [map_strec_topfolder]:
    if not os.path.isdir(fp):
        os.makedirs(fp)

# IMPORTANT! Locate spotter directory and detectron weights
git_clone_location = 'C:/repo/'
spotter_directory = git_clone_location + 'mapkurator-spotter/spotter-v2'
model_weights = git_clone_location + 'detectron2-master/detectron2/checkpoint/model_v2_en.pth'
spotter_config = spotter_directory + '/configs/PALEJUN/Finetune/Rumsey_Polygon_Finetune.yaml'

# 2: Crop all jpeg maps in (user defined) map_data_topfolder

In [None]:
def pyramid_scan(img_path, output_dir, save_each_layer=False):
    image = Image.open(img_path)
    image_preprocessor = ImagePreprocessor(image, overlapping_tolerance=0.3, num_layers=5, min_patch_resolution=512, max_patch_resolution=4096)
    image_preprocessor.process()
    print("preprocessing done")
    spotter = Spotter(spotter_config, model_weights, confidence_thresh=0.7, draw_thresh=0.85)
    all_layer_results = []

    base_image_batch, base_offset_xs, base_offset_ys = image_preprocessor.get_image_patches(0)
    vis = PolygonVisualizer()
    vis.canvas_from_patches(base_image_batch, base_offset_xs, base_offset_ys)

    for i in range(image_preprocessor.num_layers):
        # If you want to save for each layer, uncomment the following line
        # image_preprocessor.save_patches(os.path.join(output_dir, f'layer_{i}_patches'), layer=i)

        image_batch, offset_xs, offset_ys = image_preprocessor.get_image_patches(i)
        spotter.load_batch(image_batch, offset_xs, offset_ys)
        results = spotter.inference_batch()
        all_layer_results.extend(results)

        #all_layer_offset_xs.extend(offset_xs)
        #all_layer_offset_ys.extend(offset_ys)

        if save_each_layer == True:
            vis.draw(results).save(os.path.join(output_dir, f'combined_tagged_{i}.png'))
            vis.save_json(results, os.path.join(output_dir, f'combined_tagged_{i}.json'))
        else:
            pass

    vis.draw(all_layer_results).save(os.path.join(output_dir, f'combined_tagged_all_layers.png'))
    vis.save_json(all_layer_results, os.path.join(output_dir, f'combined_tagged_all_layers.json'))

# Run crop on all maps
for map_data_subfolder in next(os.walk(map_data_topfolder))[1]:
    jpeg_list = glob.glob(map_data_topfolder + '/' + map_data_subfolder + '/*.jpeg')
    if len(jpeg_list) != 1:
        print(map_data_subfolder + " failed. Please ensure there is exactly 1 file with extension .jpeg in the folder.")
    else:
        map_image = jpeg_list[0].split("\\")[1]
        if map_data_subfolder in ['1846_vandevelde', '1874_saunders', '1845_kiepert']: # '1858_vandevelde', '1874_saunders', '1845_kiepert']: #,,]: #'1858_vandevelde', '1847_tobler', '1845_kiepert'
            img_path = map_data_topfolder + '/' + map_data_subfolder + "/" + map_image
            map_name = os.path.basename(img_path).split('.')[0] # get the map name without extension
            output_dir = os.path.join(map_strec_topfolder, map_name)
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            pyramid_scan(img_path, output_dir, save_each_layer=False)
            logging.info('Done cropping %s' %img_path )

# 3: Label Combination

In [1]:
from PIL import Image, ImageFile
import json 
import pandas as pd
from collections import Counter
from shapely.geometry import Polygon, MultiPolygon
from itertools import combinations

import numpy as np
import importlib
import Clustering
import TextRectify
import TextAmalgamate
import ExtractHandling
import json
import pickle
import SpotterWrapper
import Grouping
import BezierSplineMetric
import FontSimilarity
import SequenceRecovery
import SubwordDeduplication as sd
import NestedWordFlattening as nwf
import RumseyMetric
importlib.reload(SpotterWrapper)
importlib.reload(Grouping)
importlib.reload(Clustering)
importlib.reload(TextRectify)
importlib.reload(TextAmalgamate)
importlib.reload(ExtractHandling)
importlib.reload(BezierSplineMetric)
importlib.reload(FontSimilarity)
importlib.reload(SequenceRecovery)

map_name_in_strec = 'vandevelde_1846' # 'kiepert_1845', 'saunders_1874', 'vandevelde_1846'

Using device: cuda
Using device: cuda
Using device: cuda
Using device: cuda


## 3.1 Subword Deduplication

In [11]:
sd.subword_deduplication(map_name_in_strec, do_cluster_pre_merge=True)

971 polygons kept.


## 3.2 Nested Word Flattening

In [3]:
def testing_vis(df, map_name_in_strec, suffix):
    polygons = [el[0] for el in df['labels']]
    texts = [el[1] for el in df['labels']]
    vis = SpotterWrapper.PolygonVisualizer()
    canvas = Image.open(f'processed/strec/{map_name_in_strec}/raw.jpeg')
    vis.canvas_from_image(canvas)
    vis.draw_poly(polygons, texts, PCA_feature_list=None, BSplines=None, random_color=True)
    vis.save(f'processed/strec/{map_name_in_strec}/testing_{suffix}.jpeg')

In [12]:
# Amalgamation stage - assumes there exists "combined_tagged_all_layers_rectified_premerge.json" in map_name_in_strec processed folder.
df = ExtractHandling.prepare_labels_for_amalgamation(map_name_in_strec)
#df_orig = TextAmalgamate.amalgamate_labels_wrapper(df, 0.75, .5)
df_new = pd.DataFrame({"labels": nwf.nwf_wrapper(df['labels'].tolist(), 0.75, 0.5)})

# Save amalgamated labels
with open(f'processed/strec/{map_name_in_strec}/deduplicated_flattened_labels.pickle', 'wb') as handle:
    pickle.dump(df_new, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save visualization
testing_vis(df_new, map_name_in_strec, "orig_nwf")

Started NWF with 971 labels.
Retained 865.


## 3.3 Multi-Word Sequence Recovery

### 3.3.1 Prepare by calculating spline and font metrics

In [13]:
df = pickle.load(open('processed/strec/' + map_name_in_strec + '/deduplicated_flattened_labels.pickle', 'rb'))
df['polygons'] = df['labels'].apply(lambda x: x[0])
df['texts'] = df['labels'].apply(lambda x: x[1])

# Uncomment to draw splines later
## BezierSplineMetric.draw_splines(map_name_in_strec, polygons, texts, PCA_features, all_splines)

# reset index so list-based operations match df index
df = df.reset_index(drop=True).copy()

# pca for principal directions
df['PCA_features'] = Grouping.calc_PCA_feats(df['polygons'], do_separation=True, enhance_coords=True)

# find neighbors for spline and font metric consideration
df = BezierSplineMetric.calc_neighbours(df, radius_multiplier = 40)

# calculate spline metric between identified neighbors
df = BezierSplineMetric.spline_metric(df)

# calculate font metric between identified neighbors - long due to need to work with images
df = FontSimilarity.calc_font_similarities(df, map_name_in_strec)

# calculate rumsey metric for combination
df = RumseyMetric.calc_rumsey_metric(df)

with open(f'processed/strec/{map_name_in_strec}/seq_rec_prepared_labels.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Optional - draw splines
#df = pickle.load(open(f'processed/strec/{map_name_in_strec}/seq_rec_prepared_labels.pickle', 'rb'))
#BezierSplineMetric.draw_splines(map_name_in_strec, df['polygons'].tolist(), df['texts'].tolist(), df['PCA_features'].tolist(), df['all_splines'].explode().dropna().tolist(), spline_metric_threshold = 0.01)

### 3.3.2 Iterative Sequence Recovery

#### A. Rumsey's Method

In [15]:
# load in seq rec prepared
df = pickle.load(open(f'processed/strec/{map_name_in_strec}/seq_rec_prepared_labels.pickle', 'rb'))
## Drop PCA_features - no longer needed, makes me feel good to discard stuff i don't need
df.drop('PCA_features', axis=1, inplace=True)
df.drop('all_splines', axis=1, inplace=True)

# calculate using rumsey metric
df = SequenceRecovery.sl_sequence_recovery_wrapper(df, use_rumsey_metric=True)

# map to dataframe
new_texts = []
new_labels = []
for index, row in df.iterrows():
    sorted_text = sorted(row['text_list'], key=lambda x: x[0][0])
    new_texts.append(" ".join([_text[1] for _text in sorted_text]))
    new_labels.append((row['labels'][0], row['texts']))
df['labels'] = new_labels
df['texts'] = new_texts

# save
with open(f'processed/strec/{map_name_in_strec}/fully_processed_labels_rumsey.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

865 labels.
584 labels.
Sequence Recovery completed with 584 labels.


#### B. Our Method - Old, Issues with Two Recovered Sequences Combining

In [2]:
# load in seq rec prepared
df = pickle.load(open(f'processed/strec/{map_name_in_strec}/seq_rec_prepared_labels.pickle', 'rb'))
## Drop PCA_features - no longer needed, makes me feel good to discard stuff i don't need
df.drop('PCA_features', axis=1, inplace=True)
df.drop('all_splines', axis=1, inplace=True)

# calculate using our metric
df = SequenceRecovery.sl_sequence_recovery_wrapper(df, font_threshold=.5, bezier_threshold=1.5, use_rumsey_metric=False)

# map to dataframe
new_texts = []
new_labels = []
for index, row in df.iterrows():
    sorted_text = sorted(row['text_list'], key=lambda x: x[0][0])
    new_texts.append(" ".join([_text[1] for _text in sorted_text]))
    new_labels.append((row['labels'][0], row['texts']))

df['labels'] = new_labels
df['texts'] = new_texts

# save
with open(f'processed/strec/{map_name_in_strec}/fully_processed_labels.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

865 labels.
604 labels.
502 labels.
465 labels.
450 labels.
443 labels.
441 labels.
Sequence Recovery completed with 441 labels.


#### C. Our Method - New

In [56]:
import numpy as np
from shapely.geometry import Polygon, MultiPolygon
from collections import Counter
from itertools import combinations
from igraph import Graph

# draw edges between nodes where bezier and font values pass the threshold
def calculate_edges(g, font_threshold, bezier_threshold):
    node_indices = range(len(g.vs))
    all_pairwise_combs = list(combinations(node_indices, 2))
    for pair in all_pairwise_combs:
        i = pair[0]
        j = pair[1]
        if i != j and not g.are_connected(i, j) and (j in g.vs[i]['neighbours']):
            if g.vs[i]['font_similarities'][j] > font_threshold and g.vs[i]['bezier_costs'][j] < bezier_threshold:
                g.add_edge(i, j)
    return g

def remap_dictionary_of_indices(dict, indices_to_map):
    for from_num in indices_to_map:
        if from_num in dict.keys():
            to_num = indices_to_map[from_num]
            dict[to_num] = dict.pop(from_num)
    return dict

def remap_set_of_indices(set, indices_to_map):
    return {indices_to_map[element] if element in indices_to_map.keys() else element for element in set}

def update_ind_map(local_dict, global_dict, from_ind, to_ind):
    local_dict[from_ind] = to_ind
    global_dict[from_ind] = to_ind
    for key, value in global_dict.items():
        if value == from_ind:
            dict[key] = to_ind
    return local_dict, global_dict

# combine two labels
def combine_labels(v1, v2, global_indices_to_map):

    v1_ind = v1['index']
    v2_ind = v2['index']
    index_new = min(v1_ind, v2_ind)
    local_indices_to_map = {}
    if index_new == v1_ind:
        local_indices_to_map, global_indices_to_map = update_ind_map(local_indices_to_map, global_indices_to_map, v2_ind, v1_ind)
    else:
        local_indices_to_map, global_indices_to_map = update_ind_map(local_indices_to_map, global_indices_to_map, v1_ind, v2_ind)

    poly1 = v1['label'][0]
    poly2 = v2['label'][0]
    poly_new = poly1.union(poly2) # returns multipolygon object with disjoint polygons if polygons are disjoint

    text_new = ''

    label_new = (poly_new, text_new)

    bezier_costs1 = v1['bezier_costs']
    bezier_costs2 = v2['bezier_costs']
    bezier_costs_new = {key: min(bezier_costs1.get(key, float('inf')), bezier_costs2.get(key, float('inf'))) for key in set(bezier_costs1) | set(bezier_costs2)}
    bezier_costs_new = remap_dictionary_of_indices(bezier_costs_new, local_indices_to_map)

    font_similarities1 = v1['font_similarities']
    font_similarities2 = v2['font_similarities']
    font_similarities_new = {key: max(font_similarities1.get(key, 0), font_similarities2.get(key, 0)) for key in set(font_similarities1) | set(font_similarities2)}
    font_similarities_new = remap_dictionary_of_indices(font_similarities_new, local_indices_to_map)

    neighbours1 = set(v1['neighbours'])
    neighbours2 = set(v2['neighbours'])
    if neighbours1 is None:
        neighbours1 = []
    if neighbours2 is None:
        neighbours2 = []
    neighbours_new = neighbours1.union(neighbours2)
    neighbours_new.discard(v1_ind)
    neighbours_new.discard(v2_ind)
    neighbours_new = list(neighbours_new)

    return index_new, label_new, bezier_costs_new, font_similarities_new, neighbours_new, global_indices_to_map

# combine two nodes by adding to new graph and using combine_labels() function above for attribute
def subgraph_contractor(subgraph, edges_calculated, font_threshold, bezier_threshold, global_indices_to_map):
    if edges_calculated:
        pass
    else:
        subgraph = calculate_edges(subgraph, font_threshold, bezier_threshold)
    edges = subgraph.get_edgelist()
    uncontracted_vertices = {i for i in range(len(subgraph.vs))}
    subgraph_new = Graph()
    for edge in edges:
        if edge[0] in uncontracted_vertices and edge[1] in uncontracted_vertices:
            index_new, label_new, bezier_costs_new, font_similarities_new, neighbours_new, global_indices_to_map = combine_labels(subgraph.vs[edge[0]], subgraph.vs[edge[1]], global_indices_to_map)
            subgraph_new.add_vertex(index = index_new, label = label_new, bezier_costs = bezier_costs_new, font_similarities = font_similarities_new, neighbours = neighbours_new)
            uncontracted_vertices.remove(edge[0])
            uncontracted_vertices.remove(edge[1])
    for vertex in uncontracted_vertices:
        tmp_v = subgraph.vs[vertex]
        subgraph_new.add_vertex(index = tmp_v['index'], label = tmp_v['label'], bezier_costs = tmp_v['bezier_costs'], font_similarities = tmp_v['font_similarities'], neighbours = tmp_v['neighbours'])
    return subgraph_new, False, global_indices_to_map

# wrapper for continued combination until weak connected subgraph cannot be further contracted 
def subgraph_contractor_wrapper(subgraph, font_threshold, bezier_threshold, global_indices_to_map):
    edges_calculated = True
    base_len = 0
    contracted_len = len(subgraph.vs)
    while base_len != contracted_len:
        base_len = len(subgraph.vs)
        subgraph, edges_calculated, global_indices_to_map = subgraph_contractor(subgraph, edges_calculated, font_threshold, bezier_threshold, global_indices_to_map)
        contracted_len = len(subgraph.vs)
    return subgraph, global_indices_to_map

# prepare subgraphs for flattening base on IoMs
def sl_seq_req(indices, labels, bezier_costs, font_similarities, neighbours, font_threshold, bezier_threshold):

    # create graph from labels, extract weak connected components (for isolated seq req)
    label_dict_for_graph = dict(zip(['index','label', 'font_similarities', 'bezier_costs', 'neighbours'], [indices, labels, font_similarities, bezier_costs, neighbours]))
    g = Graph()
    g.add_vertices(len(labels),attributes=label_dict_for_graph)
    print(g.vs[1]['bezier_costs'])
    g = calculate_edges(g, font_threshold, bezier_threshold)
    connected_subgraphs = g.decompose()
    global_indices_to_map = {}

    # seq req weak connected components
    for i, subgraph in enumerate(connected_subgraphs, start=1):
        if subgraph.vcount() > 1:
            subgraph, global_indices_to_map = subgraph_contractor_wrapper(subgraph, font_threshold, bezier_threshold, global_indices_to_map)

    print(str(len(connected_subgraphs) + " labels after curr round of sequence recovery."))
    iter_indices = []
    iter_labels = []
    iter_bezier_costs = []
    iter_font_similarities = []
    iter_neighbours = []
    for i, subgraph in enumerate(connected_subgraphs, start=1):
        iter_indices.extend([node['index'] for node in subgraph.vs])
        iter_labels.extend([node['label'] for node in subgraph.vs])
        iter_bezier_costs.extend([remap_dictionary_of_indices(node['bezier_costs'], global_indices_to_map) for node in subgraph.vs])
        iter_font_similarities.extend([remap_dictionary_of_indices(node['font_similarities'], global_indices_to_map) for node in subgraph.vs])
        iter_neighbours.extend([remap_set_of_indices(node['neighbours'], global_indices_to_map) for node in subgraph.vs])
    return iter_labels, iter_bezier_costs, iter_font_similarities, iter_neighbours

# wrapper for nwf (apply nwf to weak connected subgraphs until no more connected components manifest) 
def sl_seq_req_wrapper(labels, bezier_costs, font_similarities, neighbours, font_threshold, bezier_threshold):
    print("Started SL SR with " + str(len(labels)) + " labels.")
    base_len = 0
    sreq_len = len(labels)
    indices = [i for i in range(len(labels))]
    while base_len != sreq_len:
        base_len = len(labels)
        indices, labels, bezier_scores, font_scores, neighbours = sl_seq_req(indices, labels, bezier_costs, font_similarities, neighbours, font_threshold, bezier_threshold)
        sreq_len = len(labels)
    print("Retained " + str(len(labels)) + ".")
    return labels, bezier_scores, font_scores, neighbours

In [57]:
# load in seq rec prepared
df = pickle.load(open(f'processed/strec/{map_name_in_strec}/seq_rec_prepared_labels.pickle', 'rb'))
## Drop PCA_features - no longer needed, makes me feel good to discard stuff i don't need
df.drop('PCA_features', axis=1, inplace=True)
df.drop('all_splines', axis=1, inplace=True)

bezier_scores = df['bezier_scores'].tolist()
font_scores = df['font_scores'].tolist()
neighbours = df['neighbours'].tolist()
labels = df['labels'].tolist()
sl_seq_req_wrapper(labels, bezier_scores, font_scores, neighbours, 0.5, 1.5)

Started SL SR with 865 labels.
{0: 0.3199538701430699, 80: 10.353152165106398, 213: 40.49653968653606, 218: 192.441951782067, 644: 1.0780189356298586, 802: 81.26057037971508, 803: 36.78602254981498, 804: 22.500761619038922, 806: 180.67736955281836, 808: 222.90401705015285, 839: 161.316903212114}


TypeError: unsupported operand type(s) for +: 'int' and 'str'

# 4: Evaluation

In [17]:
from PIL import Image, ImageFile
import pandas as pd
from itertools import combinations
import scipy
import numpy as np
import importlib 

import Evaluation
importlib.reload(Evaluation)
%load_ext autoreload

## 4.1: Isolate crops to be used for evaluation

In [18]:

def visualize_crop(map_name_in_strec, raw_or_spotter, left_x, right_x, top_y, bottom_y):
    if raw_or_spotter == "raw":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/raw.jpeg') 
    elif raw_or_spotter == "spotter_0":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_0.png')
    elif raw_or_spotter == "spotter_1":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_1.png')
    elif raw_or_spotter == "spotter_2":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_2.png')
    elif raw_or_spotter == "all":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers.png')
    elif raw_or_spotter == "rectified":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers_rectified.png')
    width, height = map_img.size
    print("full map is " + str(width) + " pixels wide by " + str(height) + " pixels high.\n displaying crop:")
    display(map_img.crop((left_x, top_y, right_x, bottom_y, )))

left_x = 2475
right_x = 3550
top_y = 4820
bottom_y = 5850

#visualize_crop("kiepert_1845", "all", left_x, right_x, top_y, bottom_y)

## 4.2 Precision and Recall: 1:1 Matching on Geometry, then IoU

In [19]:
kiepert_gt_patches = [[1750, 3750, 4775, 6200], [2250, 4050, 6050, 7500]] #,[2475, 3550, 4820, 5850]]
saunders_gt_patches = [[2350, 3850, 1750, 3250], [6450, 7500, 2200, 3250], [5400, 6400, 4500, 5500], [7650, 8650, 5400, 6400], [7650, 8650, 3150, 4150]] #
vandevelde_gt_patches = [[2850, 5250, 1450, 3850]]

# Gimme them numbers :)

kname = "kiepert_1845"
vname = "vandevelde_1846"
sname = "saunders_1874"

multiline_handling = "components" # "largest" for multiline gt

print("\nkiepert baseline (gt = " + multiline_handling + ")\n")
kbase_geo_prec, kbase_text_prec, kbase_geo_rec, kbase_text_rec, kbase_IoU_pairs, kbase_num_detected, kbase_num_gt = Evaluation.prec_rec(kname, multiline_handling, kiepert_gt_patches, "methods_0")
print("\nkiepert pyramid - subword dedup, nested word flattening (gt = " + multiline_handling + ")\n")
k12_geo_prec, k12_text_prec, k12_geo_rec, k12_text_rec, k12_IoU_pairs, k12_num_detected, k12_num_gt = Evaluation.prec_rec(kname, multiline_handling, kiepert_gt_patches, "methods_1_2")
print("\nkiepert pyramid - subword dedup, nested word flattening, Rumsey's sequence recovery (gt = " + multiline_handling + ")\n")
k123_geo_prec, k123_text_prec, k123_geo_rec, k123_text_rec, k123_IoU_pairs, k123_num_detected, k123_num_gt = Evaluation.prec_rec(kname, multiline_handling, kiepert_gt_patches, "methods_1_2_r")
print("\nkiepert pyramid - subword dedup, nested word flattening, Our's sequence recovery (gt = " + multiline_handling + ")\n")
k123_geo_prec, k123_text_prec, k123_geo_rec, k123_text_rec, k123_IoU_pairs, k123_num_detected, k123_num_gt = Evaluation.prec_rec(kname, multiline_handling, kiepert_gt_patches, "methods_1_2_3")

print("\nvandevelde baseline (gt = " + multiline_handling + ")\n")
vbase_geo_prec, vbase_text_prec, vbase_geo_rec, vbase_text_rec, vbase_IoU_pairs, vbase_num_detected, vbase_num_gt = Evaluation.prec_rec(vname, multiline_handling, vandevelde_gt_patches, "methods_0")
print("\nvandevelde pyramid - subword dedup, nested word flattening (gt = " + multiline_handling + ")\n")
v12_geo_prec, v12_text_prec, v12_geo_rec, v12_text_rec, v12_IoU_pairs, v12_num_detected, v12_num_gt = Evaluation.prec_rec(vname, multiline_handling, vandevelde_gt_patches, "methods_1_2")
print("\nvandevelde pyramid - subword dedup, nested word flattening, Rumsey's sequence recovery (gt = " + multiline_handling + ")\n")
v123_geo_prec, v123_text_prec, v123_geo_rec, v123_text_rec, v123_IoU_pairs, v123_num_detected, v123_num_gt = Evaluation.prec_rec(vname, multiline_handling, vandevelde_gt_patches, "methods_1_2_r")
print("\nvandevelde pyramid - subword dedup, nested word flattening, Our's sequence recovery (gt = " + multiline_handling + ")\n")
v123_geo_prec, v123_text_prec, v123_geo_rec, v123_text_rec, v123_IoU_pairs, v123_num_detected, v123_num_gt = Evaluation.prec_rec(vname, multiline_handling, vandevelde_gt_patches, "methods_1_2_3")

print("\nsaunders baseline (gt = " + multiline_handling + ")\n")
sbase_geo_prec, sbase_text_prec, sbase_geo_rec, sbase_text_rec, sbase_IoU_pairs, sbase_num_detected, sbase_num_gt = Evaluation.prec_rec(sname, multiline_handling, saunders_gt_patches, "methods_0")
print("\nsaunders pyramid - subword dedup, nested word flattening (gt = " + multiline_handling + ")\n")
s12_geo_prec, s12_text_prec, s12_geo_rec, s12_text_rec, s12_IoU_pairs, s12_num_detected, s12_num_gt = Evaluation.prec_rec(sname, multiline_handling, saunders_gt_patches, "methods_1_2")
print("\nsaunders pyramid - subword dedup, nested word flattening, Rumsey's sequence recovery (gt = " + multiline_handling + ")\n")
s123_geo_prec, s123_text_prec, s123_geo_rec, s123_text_rec, s123_IoU_pairs, s123_num_detected, s123_num_gt = Evaluation.prec_rec(sname, multiline_handling, saunders_gt_patches, "methods_1_2_r")
print("\nsaunders pyramid - subword dedup, nested word flattening, Our's sequence recovery (gt = " + multiline_handling + ")\n")
s123_geo_prec, s123_text_prec, s123_geo_rec, s123_text_rec, s123_IoU_pairs, s123_num_detected, s123_num_gt = Evaluation.prec_rec(sname, multiline_handling, saunders_gt_patches, "methods_1_2_3")


kiepert baseline (gt = components)

Avg of Geographic Precision: 0.1201465060772278
Avg of Text Precision: 0.10528688619228874
Avg of Geographic Recall: 0.6131614792906798
Avg of Text Recall: 0.5373261778089218

kiepert pyramid - subword dedup, nested word flattening (gt = components)

Avg of Geographic Precision: 0.23789807623121678
Avg of Text Precision: 0.21139102508949265
Avg of Geographic Recall: 0.546892129267165
Avg of Text Recall: 0.48595637951607507

kiepert pyramid - subword dedup, nested word flattening, Rumsey's sequence recovery (gt = components)

Avg of Geographic Precision: 0.3005359153606449
Avg of Text Precision: 0.26485513144958256
Avg of Geographic Recall: 0.4248956044753945
Avg of Text Recall: 0.3744503582563064

kiepert pyramid - subword dedup, nested word flattening, Our's sequence recovery (gt = components)

Avg of Geographic Precision: 0.4885405035688952
Avg of Text Precision: 0.5033158403198099
Avg of Geographic Recall: 0.6233102976568663
Avg of Text Recall: 0

## 5: Plot and Save results

In [34]:
import shapely as sh
save_map_name_in_strec = 'vandevelde_1846' # 'kiepert_1845', 'saunders_1874', 'vandevelde_1846'

# Plot the final image
Evaluation.plot_recovered_seq(save_map_name_in_strec, "methods_1_2_3")
Evaluation.plot_recovered_seq(save_map_name_in_strec, 'methods_1_2_r', '(rumsey)')

# Save final results into a json file
spotter_labels_full = ExtractHandling.load_processed_labels(save_map_name_in_strec, "methods_1_2_3")
polygon_list = spotter_labels_full['label_polygons'].tolist()
text_list = spotter_labels_full['annotation'].tolist()

polygons_json = []
for poly, text in zip(polygon_list, text_list):
    dict = {'polygon_x': [], 'polygon_y': [], 'text': text}
    if isinstance(poly, sh.geometry.polygon.Polygon):
        dict['polygon_x'] = list(poly.exterior.coords.xy[0])
        dict['polygon_y'] = list(poly.exterior.coords.xy[1])
            
    elif isinstance(poly, sh.geometry.multipolygon.MultiPolygon):
        for p in poly.geoms: # kaede added .geoms - package version differences
            polygon_x = p.exterior.coords.xy[0]
            polygon_y = p.exterior.coords.xy[1]

            for x, y in zip(polygon_x, polygon_y):
                dict['polygon_x'].append(x)
                dict['polygon_y'].append(y)

    polygons_json.append(dict)

with open(f'processed/strec/{save_map_name_in_strec}/final.json', 'w') as f:
    json.dump(polygons_json, f)