# 0: Import packages

In [None]:
from PIL import Image, ImageFile #pip install Pillow==9.4.0
import sys
import os
import numpy as np
import logging
import glob
import subprocess
import json
import pandas as pd

from ImageCrop import ImagePreprocessor
from SpotterWrapper import Spotter, PolygonVisualizer
from IPython.display import display
from shapely.geometry import Polygon

logging.basicConfig(level=logging.INFO)
Image.MAX_IMAGE_PIXELS=None
ImageFile.LOAD_TRUNCATED_IMAGES = True


# 1: Specify filepaths

In [None]:
# Name folders for raw data and processed data
map_data_topfolder = 'raw_maps_20231024'
map_strec_topfolder = 'processed/strec'

for fp in [map_strec_topfolder]:
    if not os.path.isdir(fp):
        os.makedirs(fp)

# IMPORTANT! Locate spotter directory and detectron weights
git_clone_location = 'C:/repo/'
spotter_directory = git_clone_location + 'mapkurator-spotter/spotter-v2'
model_weights = git_clone_location + 'detectron2-master/detectron2/checkpoint/model_v2_en.pth'
spotter_config = spotter_directory + '/configs/PALEJUN/Finetune/Rumsey_Polygon_Finetune.yaml'

# 2: Crop all jpeg maps in (user defined) map_data_topfolder

In [None]:
def pyramid_scan(img_path, output_dir, save_each_layer=False):
    image = Image.open(img_path)
    image_preprocessor = ImagePreprocessor(image, overlapping_tolerance=0.3, num_layers=5, min_patch_resolution=512, max_patch_resolution=4096)
    image_preprocessor.process()
    print("preprocessing done")
    spotter = Spotter(spotter_config, model_weights, confidence_thresh=0.8, draw_thresh=0.85)
    all_layer_results = []

    base_image_batch, base_offset_xs, base_offset_ys = image_preprocessor.get_image_patches(0)
    vis = PolygonVisualizer()
    vis.canvas_from_patches(base_image_batch, base_offset_xs, base_offset_ys)

    for i in range(image_preprocessor.num_layers):
        # If you want to save for each layer, uncomment the following line
        # image_preprocessor.save_patches(os.path.join(output_dir, f'layer_{i}_patches'), layer=i)

        image_batch, offset_xs, offset_ys = image_preprocessor.get_image_patches(i)
        spotter.load_batch(image_batch, offset_xs, offset_ys)
        results = spotter.inference_batch()
        all_layer_results.extend(results)

        #all_layer_offset_xs.extend(offset_xs)
        #all_layer_offset_ys.extend(offset_ys)

        if save_each_layer == True:
            vis.draw(results).save(os.path.join(output_dir, f'combined_tagged_{i}.png'))
            vis.save_json(results, os.path.join(output_dir, f'combined_tagged_{i}.json'))
        else:
            pass

    vis.draw(all_layer_results).save(os.path.join(output_dir, f'combined_tagged_all_layers.png'))
    vis.save_json(all_layer_results, os.path.join(output_dir, f'combined_tagged_all_layers.json'))

# Run crop on all maps
for map_data_subfolder in next(os.walk(map_data_topfolder))[1]:
    jpeg_list = glob.glob(map_data_topfolder + '/' + map_data_subfolder + '/*.jpeg')
    if len(jpeg_list) != 1:
        print(map_data_subfolder + " failed. Please ensure there is exactly 1 file with extension .jpeg in the folder.")
    else:
        map_image = jpeg_list[0].split("\\")[1]
        if map_data_subfolder in ['1846_vandevelde', '1874_saunders', '1845_kiepert']: # '1858_vandevelde', '1874_saunders', '1845_kiepert']: #,,]: #'1858_vandevelde', '1847_tobler', '1845_kiepert'
            img_path = map_data_topfolder + '/' + map_data_subfolder + "/" + map_image
            map_name = os.path.basename(img_path).split('.')[0] # get the map name without extension
            output_dir = os.path.join(map_strec_topfolder, map_name)
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            pyramid_scan(img_path, output_dir, save_each_layer=False)
            logging.info('Done cropping %s' %img_path )

# 3: Label Combination

In [64]:
from PIL import Image, ImageFile
import json 
import pandas as pd
from collections import Counter
from shapely.geometry import Polygon, MultiPolygon
from itertools import combinations

import numpy as np
import importlib
import Clustering
import TextRectify
import TextAmalgamate
import ExtractHandling
import json
import pickle
import SpotterWrapper
import Grouping
import BezierSplineMetric
import FontSimilarity

importlib.reload(SpotterWrapper)
importlib.reload(Grouping)
importlib.reload(Clustering)
importlib.reload(TextRectify)
importlib.reload(TextAmalgamate)
importlib.reload(ExtractHandling)
importlib.reload(BezierSplineMetric)
importlib.reload(FontSimilarity)


map_name_in_strec = 'kiepert_1845'

Using device: cuda


## 3.1 Subword Deduplication

In [2]:
do_cluster_pre_merge = True

with open(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers.json', 'r', encoding='utf-8') as f:

    clustered = Clustering.cluster_polygons(json.load(f))

    # visualize clusters
    #image = Clustering.visualize_polygons(clustered, 'processed/strec/kiepert_1845/raw.jpeg')
    #image.save('processed/strec/kiepert_1845/combined_tagged_all_layers_clustering.png')

for label, cluster in clustered.items():
    texts = []
    scores = []
    for polygon in cluster:
        texts.append(polygon['text'])
        scores.append(polygon['score'])

    rectifier = TextRectify.TextRectifier(0.95, 0.5, 10, True, True)

    rectifier.feed_data(texts, scores)

    rectifier.fit()

    rectified, mask = rectifier.get_rectified_text()

    if rectified is None:
        rectified = max(texts, key=len)

    for i in range(len(cluster)):
        cluster[i]['text'] = rectified[i]
        cluster[i]['keep'] = mask[i]

image = Clustering.visualize_polygons(clustered, f'processed/strec/{map_name_in_strec}/raw.jpeg')
image.save(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers_rectified.png')

polygon_x = {}
polygon_y = {}
texts = {}
scores = {}
i = 0
for label, cluster in clustered.items():
    for polygon in cluster:
        if do_cluster_pre_merge:
            if polygon['keep']:
                polygon_x[str(i)] = polygon['polygon_x']
                polygon_y[str(i)] = polygon['polygon_y']
                texts[str(i)] = polygon['text']
                scores[str(i)] = polygon['score']
                i += 1
        else:
            polygon_x[str(i)] = polygon['polygon_x']
            polygon_y[str(i)] = polygon['polygon_y']
            texts[str(i)] = polygon['text']
            scores[str(i)] = polygon['score']
            i += 1

json_data = {'polygon_x': polygon_x, 'polygon_y': polygon_y, 'text': texts, 'score': scores}

with open(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers_rectified_premerge.json', 'w', encoding='utf-8') as f:
    json.dump(json_data, f, ensure_ascii=False, indent=4)

## 3.2 Nested Word Flattening

In [3]:
# Amalgamation stage - assumes there exists "combined_tagged_all_layers_rectified_premerge.json" in map_name_in_strec processed folder.
df = ExtractHandling.prepare_labels_for_amalgamation(map_name_in_strec)
df = TextAmalgamate.amalgamate_labels_wrapper(df, 0.75, .5)

# Save amalgamated labels
with open(f'processed/strec/{map_name_in_strec}/deduplicated_flattened_labels.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

947 labels.
875 labels.
854 labels.
852 labels.
851 labels.
Amalgamation completed with 851 labels.


## 3.3 Multi-Word Sequence Recovery

In [76]:
map_name_in_strec = "vandevelde_1846"

import pickle
df = pickle.load(open('processed/strec/' + map_name_in_strec + '/deduplicated_flattened_labels.pickle', 'rb'))

df['polygons'] = df['labels'].apply(lambda x: x[0])
df['texts'] = df['labels'].apply(lambda x: x[1])

# Uncomment to draw splines later
## BezierSplineMetric.draw_splines(map_name_in_strec, polygons, texts, PCA_features, all_splines)

# reset index so list-based operations match df index
df = df.reset_index(drop=True).copy()

# pca for principal directions
df['PCA_features'] = Grouping.calc_PCA_feats(df['polygons'], do_separation=True, enhance_coords=True)

# find neighbors for spline metric consideration
df = BezierSplineMetric.calc_neighbours(df, radius_multiplier = 40)

# calculate spline metric between identified neighbors
df = BezierSplineMetric.spline_metric(df)

# Drop PCA_features - no longer needed
df.drop('PCA_features', axis=1, inplace=True)

In [96]:
def combine_labels(label1_row, label2_row):

    poly1 = label1_row['labels'][0]
    text1 = label1_row['labels'][1]
    poly2 = label2_row['labels'][0]
    text2 = label2_row['labels'][1]
    scores1 = label1_row['scores']
    scores2 = label2_row['scores']
    neighbours1 = label1_row['neighbours']
    neighbours2 = label2_row['neighbours']
    if neighbours1 is None:
        neighbours1 = []
    if neighbours2 is None:
        neighbours2 = []

    poly_new = poly1.union(poly2)

    leftmost_poly = [poly1, poly2].index(min([poly1, poly2], key=lambda shape: shape.bounds[0]))
    if leftmost_poly == 0:
        text_new = text1 + " " + text2
    else:
        text_new = text2 + " " + text1

    neighbours_new = list(set(neighbours1 + neighbours2))

    scores_new = {key: min(scores1.get(key, float('inf')), scores2.get(key, float('inf'))) for key in set(scores1) | set(scores2)}

    return [(poly_new, text_new), poly_new, text_new, neighbours_new, scores_new]

def recover_sequence(df, R, to_combine):
    for pair in to_combine:
        if pair[0] in df.index and pair[1] in df.index:
            new_label = combine_labels(df.loc[pair[0]], df.loc[pair[1]])
            new_label_index = int(df.index[-1]) + 1
            df.loc[new_label_index] = new_label
            df = df.drop([pair[0]]).copy()
            df = df.drop([pair[1]]).copy()
            try:
                R.pop(pair[0])
            except:
                pass
            try:
                R.pop(pair[1])
            except:
                pass
        else: # one of the polygons has already been recovered into a sequence so no combination can no longer occur
            pass
    return df, R

def update_R_matrix(df, font_threshold, bezier_threshold, R = None):
    if R == None:
        R = {}
    to_combine = []
    for i, j in combinations(df.index, 2):
        if i not in R.keys():
            R[i] = {}
        if j in R[i].keys():
            pass
        else:
            font_score = 1 #FontSimilarity.font_sim(crop1, crop2)
            spline_distance_score = BezierSplineMetric.get_distance_metric(df, i, j, infinitely_large_as=10000000)
            R[i][j] = (font_score, spline_distance_score)
            if font_score > font_threshold and spline_distance_score < bezier_threshold:
                to_combine.append((i,j))
    return R, to_combine

def sl_sequence_recovery_wrapper(df, font_threshold, bezier_threshold):

    pre_seqrec = 0
    post_seqrec = len(df)
    R = None

    while pre_seqrec - post_seqrec != 0:
        pre_seqrec = post_seqrec

        # map it to comparison matrix, find candidates for sequences
        R, to_combine = update_R_matrix(df, font_threshold, bezier_threshold, R)
        print(str(pre_seqrec) + " labels.")

        # recover sequences based on candidates
        df, R = recover_sequence(df, R, to_combine)
        post_seqrec = len(df)

    print("Sequence Recovery completed with " + str(pre_seqrec) + " labels.")
    return df

df = sl_sequence_recovery_wrapper(df, 0, .1)

854
854 labels.
1 13
2 79
8 851
9 127
12 837
19 80
20 497
22 124
23 33
24 785
27 463
29 819
32 69
36 500
37 457
43 492
46 122
56 508
60 62
70 81
71 470
82 454
84 711
91 129
92 165
94 133
96 564
99 102
107 115
111 117
131 527
136 151
137 155
139 145
142 169
144 170
147 166
148 162
149 157
150 787
152 171
156 180
164 838
168 185
174 179
177 575
181 821
186 617
188 201
190 535
191 252
202 241
203 226
205 255
207 227
208 846
209 235
210 811
212 220
214 624
222 223
224 249
232 791
237 246
239 253
244 251
248 634
257 636
259 263
260 765
261 276
262 281
266 268
267 563
274 325
280 282
284 287
285 639
289 658
290 297
291 312
292 411
293 307
294 421
295 329
298 311
300 799
301 442
302 323
304 413
306 576
308 580
309 739
310 405
313 664
315 797
316 317
319 321
326 653
332 334
333 336
335 351
337 390
338 367
339 802
340 369
343 356
345 804
352 364
357 669
358 683
360 773
363 805
366 375
371 384
376 379
378 803
385 389
388 809
393 708
394 404
396 402
397 741
398 432
401 748
403 685
406 435
407 813

In [94]:
df.loc[851]

labels        (MULTIPOLYGON (((2773.2839355469 969.486633300...
polygons      MULTIPOLYGON (((2773.2839355469 969.4866333008...
texts                                               naby sammil
neighbours                                                 None
scores        {1: 0.1893620868648559, 776: 0.061774939910144...
Name: 851, dtype: object

In [14]:
# example
crop1 = Grouping.polygon_crop(df.iloc[1]['polygons'], Image.open("processed/strec/" + map_name_in_strec + "/raw.jpeg"))
crop2 = Grouping.polygon_crop(df.iloc[2]['polygons'], Image.open("processed/strec/" + map_name_in_strec + "/raw.jpeg"))
FontSimilarity.font_sim(crop1, crop2)

### Iterative process for sequence recovery

In [None]:
# Amalgamation stage - assumes there exists "combined_tagged_all_layers_rectified_premerge.json" in map_name_in_strec processed folder.
df = ?.prepare_labels_for_single_line_sequence_recovery(df)
df = ?.single_line_sequence_recovery_wrapper(df, eps11, eps12, eps13)
df = ?.multi_line_sequence_recovery_wrapper(df, eps21, eps22, eps23, eps24)

# 4: Evaluation

In [1]:
from PIL import Image, ImageFile
import pandas as pd
from itertools import combinations
import scipy
import numpy as np
import importlib 

import Evaluation
importlib.reload(Evaluation)
%load_ext autoreload

## 4.1: Isolate crops to be used for evaluation

In [3]:

def visualize_crop(map_name_in_strec, raw_or_spotter, left_x, right_x, top_y, bottom_y):
    if raw_or_spotter == "raw":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/raw.jpeg') 
    elif raw_or_spotter == "spotter_0":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_0.png')
    elif raw_or_spotter == "spotter_1":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_1.png')
    elif raw_or_spotter == "spotter_2":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_2.png')
    elif raw_or_spotter == "all":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers.png')
    elif raw_or_spotter == "rectified":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers_rectified.png')
    width, height = map_img.size
    print("full map is " + str(width) + " pixels wide by " + str(height) + " pixels high.\n displaying crop:")
    display(map_img.crop((left_x, top_y, right_x, bottom_y, )))

left_x = 2475
right_x = 3550
top_y = 4820
bottom_y = 5850

#visualize_crop("kiepert_1845", "all", left_x, right_x, top_y, bottom_y)

In [2]:
kiepert_gt_patch_1 = [2475, 3550, 4820, 5850]
saunders_gt_patch_1 = [3150, 4150, 2250, 3250]
saunders_gt_patch_2 = [6750, 7750, 2250, 3250]
saunders_gt_patch_3 = [5400, 6400, 4500, 5500]
saunders_gt_patch_4 = [7650, 8650, 5400, 6400]
saunders_gt_patch_5 = [7650, 8650, 3150, 4150]

## 4.2 Precision and Recall: IoU after 1:1 Matching

In [3]:
# FUNCTIONS

## Patch-level geographic pairings for non-multiline-toponyms against our pyramid pipeline
pyramid_detected_kiepert, num_gt_kiepert, pyramid_IoU_pairs_kiepert = Evaluation.geographic_evaluation("kiepert_1845", "components", kiepert_gt_patch_1)
pyramid_detected_saunders1, num_gt_saunders1, pyramid_IoU_pairs_saunders1 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_1)
pyramid_detected_saunders2, num_gt_saunders2, pyramid_IoU_pairs_saunders2 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_2)
pyramid_detected_saunders3, num_gt_saunders3, pyramid_IoU_pairs_saunders3 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_3)
pyramid_detected_saunders4, num_gt_saunders4, pyramid_IoU_pairs_saunders4 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_4)
pyramid_detected_saunders5, num_gt_saunders5, pyramid_IoU_pairs_saunders5 = Evaluation.geographic_evaluation("saunders_1874", "components", saunders_gt_patch_5)

## Aggregating pyramid numbers and pairs to map-level figures
num_gt_kiepert = num_gt_kiepert
num_gt_saunders = num_gt_saunders1 + num_gt_saunders2 + num_gt_saunders3 + num_gt_saunders4 + num_gt_saunders5
pyramid_detected_kiepert = pyramid_detected_kiepert
pyramid_detected_saunders = pyramid_detected_saunders1 + pyramid_detected_saunders2 + pyramid_detected_saunders3 + pyramid_detected_saunders4 + pyramid_detected_saunders5
pyramid_IoU_pairs_kiepert = pyramid_IoU_pairs_kiepert
pyramid_IoU_pairs_saunders = np.concatenate((pyramid_IoU_pairs_saunders1, pyramid_IoU_pairs_saunders2, pyramid_IoU_pairs_saunders3, pyramid_IoU_pairs_saunders4, pyramid_IoU_pairs_saunders5))

## Patch-level geographic pairings for non-multiline-toponyms against baseline
baseline_detected_kiepert, num_gt_kiepert, baseline_IoU_pairs_kiepert = Evaluation.geographic_evaluation("kiepert_1845", "components", [2475, 3550, 4820, 5850], "combined_tagged_0.json")
baseline_detected_saunders1, num_gt_saunders1, baseline_IoU_pairs_saunders1 = Evaluation.geographic_evaluation("saunders_1874", "components", [3150, 4150, 2250, 3250], "combined_tagged_0.json")
baseline_detected_saunders2, num_gt_saunders2, baseline_IoU_pairs_saunders2 = Evaluation.geographic_evaluation("saunders_1874", "components", [6750, 7750, 2250, 3250], "combined_tagged_0.json")
baseline_detected_saunders3, num_gt_saunders3, baseline_IoU_pairs_saunders3 = Evaluation.geographic_evaluation("saunders_1874", "components", [5400, 6400, 4500, 5500], "combined_tagged_0.json")
baseline_detected_saunders4, num_gt_saunders4, baseline_IoU_pairs_saunders4 = Evaluation.geographic_evaluation("saunders_1874", "components", [7650, 8650, 5400, 6400], "combined_tagged_0.json")
baseline_detected_saunders5, num_gt_saunders5, baseline_IoU_pairs_saunders5 = Evaluation.geographic_evaluation("saunders_1874", "components", [7650, 8650, 3150, 4150], "combined_tagged_0.json")

## Aggregate baseline numbers and pairs to map-level figures
baseline_detected_kiepert = baseline_detected_kiepert
baseline_detected_saunders = baseline_detected_saunders1 + baseline_detected_saunders2 + baseline_detected_saunders3 + baseline_detected_saunders4 + baseline_detected_saunders5
baseline_IoU_pairs_kiepert = baseline_IoU_pairs_kiepert
baseline_IoU_pairs_saunders = np.concatenate((baseline_IoU_pairs_saunders1, baseline_IoU_pairs_saunders2, baseline_IoU_pairs_saunders3, baseline_IoU_pairs_saunders4, baseline_IoU_pairs_saunders5))

retaining 49 labels fully inside crop area
retaining 43 labels that have alphabetic characters
retaining 74 labels fully inside crop area
retaining 69 labels that have alphabetic characters
retaining 6 labels fully inside crop area
retaining 6 labels that have alphabetic characters
retaining 13 labels fully inside crop area
retaining 11 labels that have alphabetic characters
retaining 11 labels fully inside crop area
retaining 10 labels that have alphabetic characters
retaining 35 labels fully inside crop area
retaining 32 labels that have alphabetic characters
retaining 47 labels fully inside crop area
retaining 46 labels that have alphabetic characters
retaining 87 labels fully inside crop area
retaining 87 labels that have alphabetic characters
retaining 25 labels fully inside crop area
retaining 23 labels that have alphabetic characters
retaining 65 labels fully inside crop area
retaining 60 labels that have alphabetic characters
retaining 33 labels fully inside crop area
retaining

In [4]:
# Gimme them numbers :)
print("\nkiepert baseline\n")
Evaluation.prec_rec(baseline_IoU_pairs_kiepert, baseline_detected_kiepert, num_gt_kiepert)
print("\nkiepert pyramid\n")
Evaluation.prec_rec(pyramid_IoU_pairs_kiepert, pyramid_detected_kiepert, num_gt_kiepert)
print("\nsaunders baseline\n")
Evaluation.prec_rec(baseline_IoU_pairs_saunders, baseline_detected_saunders, num_gt_saunders)
print("\nsaunders pyramid\n")
Evaluation.prec_rec(pyramid_IoU_pairs_saunders, pyramid_detected_saunders, num_gt_saunders)


kiepert baseline

Avg of Geographic Precision: 0.19116442583187376
Avg of Geographic Recall: 0.14226189829348745
Avg of Text Precision: 0.08627091726132564
Avg of Text Recall: 0.06420161284563769

kiepert pyramid

Avg of Geographic Precision: 0.31668587247270935
Avg of Geographic Recall: 0.5081703535027197
Avg of Text Precision: 0.2951939227107476
Avg of Text Recall: 0.47368327132654847

saunders baseline

Avg of Geographic Precision: 0.06051490020519661
Avg of Geographic Recall: 0.00974392460931132
Avg of Text Precision: 0.08252460898511729
Avg of Text Recall: 0.013287860768790072

saunders pyramid

Avg of Geographic Precision: 0.21679753988318085
Avg of Geographic Recall: 0.4721776927964193
Avg of Text Precision: 0.191391017192887
Avg of Text Recall: 0.4168431476150166
