# 0: Import packages

In [None]:
from PIL import Image, ImageFile #pip install Pillow==9.4.0
import sys
import os
import numpy as np
import logging
import glob
import subprocess
import json
import pandas as pd

from ImageCrop import ImagePreprocessor
from SpotterWrapper import Spotter, PolygonVisualizer
from IPython.display import display
from shapely.geometry import Polygon

logging.basicConfig(level=logging.INFO)
Image.MAX_IMAGE_PIXELS=None
ImageFile.LOAD_TRUNCATED_IMAGES = True

# 1: Specify filepaths

In [None]:
# Name folders for raw data and processed data
map_data_topfolder = 'raw_maps_20231024'
map_strec_topfolder = 'processed/strec'

for fp in [map_strec_topfolder]:
    if not os.path.isdir(fp):
        os.makedirs(fp)

# IMPORTANT! Locate spotter directory and detectron weights
git_clone_location = 'C:/repo/'
spotter_directory = git_clone_location + 'mapkurator-spotter/spotter-v2'
model_weights = git_clone_location + 'detectron2-master/detectron2/checkpoint/model_v2_en.pth'
spotter_config = spotter_directory + '/configs/PALEJUN/Finetune/Rumsey_Polygon_Finetune.yaml'

# 2: Crop all jpeg maps in (user defined) map_data_topfolder

In [None]:
def pyramid_scan(img_path, output_dir, save_each_layer=False):
    image = Image.open(img_path)
    image_preprocessor = ImagePreprocessor(image, overlapping_tolerance=0.3, num_layers=5, min_patch_resolution=512, max_patch_resolution=4096)
    image_preprocessor.process()
    print("preprocessing done")
    spotter = Spotter(spotter_config, model_weights, confidence_thresh=0.8, draw_thresh=0.85)
    all_layer_results = []

    base_image_batch, base_offset_xs, base_offset_ys = image_preprocessor.get_image_patches(0)
    vis = PolygonVisualizer()
    vis.canvas_from_patches(base_image_batch, base_offset_xs, base_offset_ys)

    for i in range(image_preprocessor.num_layers):
        # If you want to save for each layer, uncomment the following line
        # image_preprocessor.save_patches(os.path.join(output_dir, f'layer_{i}_patches'), layer=i)

        image_batch, offset_xs, offset_ys = image_preprocessor.get_image_patches(i)
        spotter.load_batch(image_batch, offset_xs, offset_ys)
        results = spotter.inference_batch()
        all_layer_results.extend(results)

        #all_layer_offset_xs.extend(offset_xs)
        #all_layer_offset_ys.extend(offset_ys)

        if save_each_layer == True:
            vis.draw(results).save(os.path.join(output_dir, f'combined_tagged_{i}.png'))
            vis.save_json(results, os.path.join(output_dir, f'combined_tagged_{i}.json'))
        else:
            pass

    vis.draw(all_layer_results).save(os.path.join(output_dir, f'combined_tagged_all_layers.png'))
    vis.save_json(all_layer_results, os.path.join(output_dir, f'combined_tagged_all_layers.json'))

# Run crop on all maps
for map_data_subfolder in next(os.walk(map_data_topfolder))[1]:
    jpeg_list = glob.glob(map_data_topfolder + '/' + map_data_subfolder + '/*.jpeg')
    if len(jpeg_list) != 1:
        print(map_data_subfolder + " failed. Please ensure there is exactly 1 file with extension .jpeg in the folder.")
    else:
        map_image = jpeg_list[0].split("\\")[1]
        if map_data_subfolder in ['1846_vandevelde', '1874_saunders', '1845_kiepert']: # '1858_vandevelde', '1874_saunders', '1845_kiepert']: #,,]: #'1858_vandevelde', '1847_tobler', '1845_kiepert'
            img_path = map_data_topfolder + '/' + map_data_subfolder + "/" + map_image
            map_name = os.path.basename(img_path).split('.')[0] # get the map name without extension
            output_dir = os.path.join(map_strec_topfolder, map_name)
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            pyramid_scan(img_path, output_dir, save_each_layer=False)
            logging.info('Done cropping %s' %img_path )

# 3: Label Combination

In [None]:
from PIL import Image, ImageFile
import json 
import pandas as pd
from collections import Counter
from shapely.geometry import Polygon, MultiPolygon
from itertools import combinations
import scipy
import numpy as np
import importlib
import Clustering
import TextRectify
import TextAmalgamate
import ExtractHandling
import json
import pickle

importlib.reload(Clustering)
importlib.reload(TextRectify)
importlib.reload(TextAmalgamate)
importlib.reload(ExtractHandling)

map_name_in_strec = 'saunders_1874'

## 3.1 Text Rectification

In [None]:
do_cluster_pre_merge = True

with open(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers.json', 'r', encoding='utf-8') as f:

    clustered = Clustering.cluster_polygons(json.load(f))

    # visualize clusters
    #image = Clustering.visualize_polygons(clustered, 'processed/strec/kiepert_1845/raw.jpeg')
    #image.save('processed/strec/kiepert_1845/combined_tagged_all_layers_clustering.png')

for label, cluster in clustered.items():
    texts = []
    scores = []
    for polygon in cluster:
        texts.append(polygon['text'])
        scores.append(polygon['score'])

    rectifier = TextRectify.TextRectifier(0.95, 0.5, 10, True, True)

    rectifier.feed_data(texts, scores)

    rectifier.fit()

    rectified, mask = rectifier.get_rectified_text()

    if rectified is None:
        rectified = max(texts, key=len)

    for i in range(len(cluster)):
        cluster[i]['text'] = rectified[i]
        cluster[i]['keep'] = mask[i]

image = Clustering.visualize_polygons(clustered, f'processed/strec/{map_name_in_strec}/raw.jpeg')
image.save(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers_rectified.png')

polygon_x = {}
polygon_y = {}
texts = {}
scores = {}
i = 0
for label, cluster in clustered.items():
    for polygon in cluster:
        if do_cluster_pre_merge:
            if polygon['keep']:
                polygon_x[str(i)] = polygon['polygon_x']
                polygon_y[str(i)] = polygon['polygon_y']
                texts[str(i)] = polygon['text']
                scores[str(i)] = polygon['score']
                i += 1
        else:
            polygon_x[str(i)] = polygon['polygon_x']
            polygon_y[str(i)] = polygon['polygon_y']
            texts[str(i)] = polygon['text']
            scores[str(i)] = polygon['score']
            i += 1

json_data = {'polygon_x': polygon_x, 'polygon_y': polygon_y, 'text': texts, 'score': scores}

with open(f'processed/strec/{map_name_in_strec}/combined_tagged_all_layers_rectified_premerge.json', 'w', encoding='utf-8') as f:
    json.dump(json_data, f, ensure_ascii=False, indent=4)

## 3.2 Text Amalgamation

In [None]:
# Amalgamation stage - assumes there exists "combined_tagged_all_layers_rectified_premerge.json" in map_name_in_strec processed folder.
df = ExtractHandling.prepare_labels_for_amalgamation(map_name_in_strec)
df = TextAmalgamate.amalgamate_labels_wrapper(df, 0.75, .5)

# Save amalgamated labels
with open(f'processed/strec/{map_name_in_strec}/amalgamate.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

## 3.3 ?

In [None]:
result = list(df["labels"])
polygons = []
texts = []
PCA_features = []

for i in range(len(result)):
    poly = result[i][0]
    polygons.append(poly)
    texts.append(result[i][1])

In [68]:
#reload SpotterWrapper module
import importlib
import SpotterWrapper
import Grouping

importlib.reload(SpotterWrapper)
importlib.reload(Grouping)

PCA_features = Grouping.calc_PCA_feats(polygons, do_separation=True, enhance_coords=True)

print("PCA features calculated.")

vis = SpotterWrapper.PolygonVisualizer()
canvas = Image.open(f'processed/strec/{map_name_in_strec}/raw.jpeg')
vis.canvas_from_image(canvas)

vis.draw_poly(polygons, texts, PCA_features)

vis.save(f'processed/strec/{map_name_in_strec}/output.jpeg')

  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]
  polygon_y = p.exterior.coords.xy[1]


PCA features calculated.


  vis.draw_poly(polygons, texts, PCA_features)
  for p in poly:


# 4: Evaluation

## 4.1: Isolate crops to be used for evaluation

In [None]:
from PIL import Image, ImageFile
import pandas as pd
from itertools import combinations
import scipy
import numpy as np

def visualize_crop(map_name_in_strec, raw_or_spotter, left_x, right_x, top_y, bottom_y):
    if raw_or_spotter == "raw":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/raw.jpeg') 
    elif raw_or_spotter == "spotter_0":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_0.png')
    elif raw_or_spotter == "spotter_1":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_1.png')
    elif raw_or_spotter == "spotter_2":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_2.png')
    elif raw_or_spotter == "all":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers.png')
    elif raw_or_spotter == "rectified":
        map_img = Image.open('processed/strec/' + map_name_in_strec + '/combined_tagged_all_layers_rectified.png')
    width, height = map_img.size
    print("full map is " + str(width) + " pixels wide by " + str(height) + " pixels high.\n displaying crop:")
    display(map_img.crop((left_x, top_y, right_x, bottom_y, )))

left_x = 2475
right_x = 3550
top_y = 4820
bottom_y = 5850

#visualize_crop("kiepert_1845", "all", left_x, right_x, top_y, bottom_y)

In [None]:
kiepert_gt_patch_1 = [2475, 3550, 4820, 5850]
saunders_gt_patch_1 = [3150, 4150, 2250, 3250]
saunders_gt_patch_2 = [6750, 7750, 2250, 3250]
saunders_gt_patch_3 = [5400, 6400, 4500, 5500]
saunders_gt_patch_4 = [7650, 8650, 5400, 6400]
saunders_gt_patch_5 = [7650, 8650, 3150, 4150]

## 4.2 Precision and Recall: IoU after 1:1 Matching

In [None]:
# FUNCTIONS

## Load in (1) ground truth labels that were built using Via and (2) spotter labels
def load_ground_truth_labels(map_name_in_strec, multiline_handling, labels_on_fullsize_map=True):
    with open('dependencies/ground_truth_labels/' + map_name_in_strec + '.json') as f:
        gt_labels_tmp = json.load(f)
        gt_labels = pd.DataFrame([
            {
                'all_points_x': obs['shape_attributes']['all_points_x'],
                'all_points_y': obs['shape_attributes']['all_points_y'],
                'annotation': obs['region_attributes']['annotation'],
                'multiline_g': obs['region_attributes'].get('multiline_g', None)
            }
            for obs in gt_labels_tmp[list(gt_labels_tmp.keys())[0]]['regions']
        ])

    if multiline_handling == 'largest':
        gt_labels['annotation_length'] = gt_labels['annotation'].apply(len)
        tmp1 = gt_labels[gt_labels['multiline_g'].isnull()]
        tmp2 = gt_labels.dropna(subset=['multiline_g'])
        gt_labels = pd.concat([tmp2.loc[tmp2.groupby('multiline_g')['annotation_length'].idxmax()], tmp1])
    elif multiline_handling == 'components':
        gt_labels['annotation_length'] = gt_labels['annotation'].apply(len)
        tmp1 = gt_labels[gt_labels['multiline_g'].isnull()]
        tmp2 = gt_labels.dropna(subset=['multiline_g'])
        gt_labels = pd.concat([tmp2.loc[~tmp2.index.isin(tmp2.groupby('multiline_g')['annotation_length'].idxmax())], tmp1])
    return gt_labels

## Retain a subset of labels based on crop coordinates
def coords_fail_condition(list, direction_for_drop, value, baseline):
    if baseline == 1:
        return 1
    else:
        if direction_for_drop == '<':
            num_coords_broke_rule = sum([0 if coord < value else 1 for coord in list])
        elif direction_for_drop == '>':
            num_coords_broke_rule = sum([0 if coord > value else 1 for coord in list])
        if num_coords_broke_rule > 0:
            return 1
        else:
            return 0
        
def retain_crop_labels_only(df, left_x, right_x, top_y, bottom_y):
    df['drop'] = 0
    df['drop'] = df.apply(lambda row: coords_fail_condition(row['all_points_x'], '>', left_x, row['drop']), axis=1)
    df['drop'] = df.apply(lambda row: coords_fail_condition(row['all_points_x'], '<', right_x, row['drop']), axis=1)
    df['drop'] = df.apply(lambda row: coords_fail_condition(row['all_points_y'], '>', top_y, row['drop']), axis=1)
    df['drop'] = df.apply(lambda row: coords_fail_condition(row['all_points_y'], '<', bottom_y, row['drop']), axis=1)
    df = df[df['drop'] == 0]
    print("retaining " + str(len(df)) + " labels fully inside crop area")
    return df

## Calculate and Match IoUs
def calculate_IoU_matrix(spotter_polygons, gt_polygons):
    IoU_matrix = []
    for sptr_poly in spotter_polygons:
        row = []
        for gt_poly in gt_polygons:
            intersection_area = sptr_poly.intersection(gt_poly).area
            union_area = sptr_poly.union(gt_poly).area
            iou = intersection_area / union_area if union_area > 0 else 0
            row.append(iou)
        IoU_matrix.append(row)
    return np.array(IoU_matrix)

def maximize_1to1_precision(IoU_matrix):
    row_ind, col_ind = scipy.optimize.linear_sum_assignment(IoU_matrix, maximize=True)
    num_detected = IoU_matrix.shape[0]
    IoU_pairs = IoU_matrix[row_ind, col_ind]
    return num_detected, IoU_pairs

def maximize_1to1_recall(IoU_matrix):
    row_ind, col_ind = scipy.optimize.linear_sum_assignment(IoU_matrix, maximize=True)
    num_gt = IoU_matrix.shape[1]
    IoU_pairs = IoU_matrix[row_ind, col_ind]
    return num_gt, IoU_pairs

## Full pipeline 
def display_geographic_accuracy(map_name_in_strec, multiline_handling, spotter_layer):
    print("\n------")
    print(map_name_in_strec + " | " + multiline_handling + " of multiline GT labels | spotter layer " + spotter_layer)
    print("------")
    gt_labels_full = load_ground_truth_labels(map_name_in_strec, multiline_handling)
    gt_labels_crop = retain_crop_labels_only(gt_labels_full, left_x, right_x, top_y, bottom_y)
    gt_labels_crop = cast_coords_as_Polygons(gt_labels_crop)
    gt_polys = gt_labels_crop['label_polygons']

    spotter_labels_full = load_spotter_labels(map_name_in_strec, spotter_layer)
    spotter_labels_crop = retain_crop_labels_only(spotter_labels_full, left_x, right_x, top_y, bottom_y)
    spotter_labels_crop = cast_coords_as_Polygons(spotter_labels_crop)
    spotter_polys = spotter_labels_crop['label_polygons']

    IoU_matrix = calculate_IoU_matrix(spotter_polys, gt_polys)
    print("Avg of 1:1 IoUs: " + str(maximize_1to1_precision(IoU_matrix)))
    print("Avg of m:1 IoUs: " + str(maximize_1to1_recall(IoU_matrix)))

    return gt_labels_crop

In [None]:
print("Average IoU for text bounding boxes; unmatched boxes included in denominators.")
print("m:1 -> multiple spotter text boxes can be matched with a single ground truth text box")
print("")

for layer_s in ['combined_tagged_all_layers_rectified']:
    #display_geographic_accuracy("kiepert_1845", "largest", layer_s + ".json")
    a = display_geographic_accuracy("kiepert_1845", "components", layer_s + ".json")

In [None]:
a