# Install Python Dependencies
Run the following code cell to install the required python packages.
Note: your python environment should be python 3.8 or newer.

In [None]:
%pip install -r requirements.txt

# Next, download the annotated data from the ICDAR 2024 MapText competition.

In [None]:
!wget https://zenodo.org/records/11516933/files/rumsey_train.json
!wget https://zenodo.org/records/11516933/files/rumsey_val.json

# Download the Map Images
Optionally, you can download the source map images for these annotations. These images take up roughly 2 GB of disk storage, and you can download them by running the following cells:

In [None]:
!wget https://zenodo.org/records/11516933/files/train.zip
!unzip train.zip
!wget https://zenodo.org/records/11516933/files/val.zip
!unzip val.zip

In [6]:
# run this cell to delete the .zip files for the previous download
!rm -rf train.zip
!rm -rf val.zip

# Construct and Evaluate Linkage Graphs
First, fill in the variables in the cell below to select which linkage graph method you want to use. Then, run the cell to construct linkage graphs using that method, and evaluate the results.

In [None]:
import sys
import os
import json
import random
sys.path.append(os.getcwd() + "/scripts")
from map_graph import FeatureNode, prims_mst, distance_threshold_graph, MahalanobisMetric
from compare_linkages import map_list_compare_linkages, get_stats_from_results_file, LinkageMethod
from cross_validation_mahalanobis import train_mahalanobis_metric_for_fold
map_ids = []
# change MAP_SAMPLE_SIZE to change the number of maps you randomly select.
MAP_SAMPLE_SIZE = 700
# to use the validation maps instead, change "annotations_filepath" from
# "rumsey_train.json" to "rumsey_val.json"
annotations_filepath = "test_annotations.json"
with open(annotations_filepath, "r") as f:
    for map_annotation in json.load(f):
        map_ids.append(map_annotation["image"])
    map_sample = random.sample(map_ids, MAP_SAMPLE_SIZE)
    # you can change the linkage method here by changing the "linkage_method" variable
    # use the following variable for the linkage method presented in our paper
    linkage_method = LinkageMethod(prims_mst, FeatureNode.EdgeCostFunction([1, 1, 1]))
    # Uncomment the following lines to create MSTs using the Mahalanobis distance
    # note: training the Mahalanobis metric will take several minutes.
    """ M = train_mahalanobis_metric_for_fold([map_id for map_id in map_ids if map_id not in map_sample], "random_sample", annotations_filepath)
    linkage_method = LinkageMethod(prims_mst, MahalanobisMetric(M)) """
    # set the linkage_method as follows to create character distance threshold linkage graphs
    linkage_method = LinkageMethod(distance_threshold_graph, FeatureNode.distance)

    # name the output file for your method by setting the "name" variable.
    # this output file will be stored in a folder called "results"
    name = "train_edge_cost_heuristic"
    output_file_name = "results/" + name + ".json"
    # Create the "results" folder if it doesn't exist
    os.makedirs("results", exist_ok=True)
    #map_list_compare_linkages(map_sample, name, annotations_filepath, linkage_method, output_file_name)
    get_stats_from_results_file(output_file_name, annotations_filepath)

[0.7, 0.85, 0.90625, 0.896551724137931, 0.5813953488372093, 0.9523809523809523, 0.8301886792452831, 0.2, 0.8840579710144928, 0.9863013698630136, 0.9724770642201835, 0.8714285714285714, 0.9259259259259259, 1.0, 0.5925925925925926, 0.8974358974358975, 0.5357142857142857, 0.8695652173913043, 0.6046511627906976, 0.9518072289156626, 0.9117647058823529, 0.38095238095238093, 0.5714285714285714, 0.8, 0.9137931034482759, 0.8823529411764706, 0.9354838709677419, 0.8235294117647058, 0.9074074074074074, 0.6029411764705882, 0.943089430894309, 0.8987341772151899, 0.975609756097561, 0.8035714285714286, 0.6779661016949152, 0.8314606741573034, 0.9166666666666666, 0.7872340425531915, 0.9285714285714286, 0.9558823529411765, 0.8888888888888888, 0.9367088607594937, 0.9056603773584906, 0.5402298850574713, 0.9473684210526315, 0.925, 0.775, 0.8695652173913043, 0.9787234042553191, 0.7397260273972602, 0.9342105263157895, 0.8095238095238095, 0.7931034482758621, 0.8939393939393939, 0.926829268292683, 0.66666666666

# Visualizing Linkage Graphs
To visualize the linkage graphs drawn on various maps, you can run the following code cell.
Note: you will need to have downloaded the map images in order to create these visualizations.
__Once you run this cell, navigate to "scripts/annotated_linkage_results/ to view the visualization images.__

In [6]:
import sys
import os
import json
sys.path.append(os.getcwd() + "/scripts")
import draw_features_and_linkages as dfl
import map_graph
import multiword_name_extraction

# change this variable to choose which map file you want to visualize
map_filename = "rumsey/train/5797073_h2_w9.png"
mg = map_graph.MapGraph(map_filename)
# change the linkage method here to visualize different methods
map_graph.prims_mst(mg.nodes, map_graph.FeatureNode.EdgeCostFunction([1, 1, 1]))
map_annotations = multiword_name_extraction.extract_map_data_from_all_annotations(map_filename)
# equation_1.png is currently set to be a visualization of the MST method from our paper.
dfl.draw_features_and_linkages(map_filename, mg, "equation_1.png", map_dir=os.getcwd(), show_image=False)
mg = map_graph.MapGraph(map_filename)
map_graph.distance_threshold_graph(mg.nodes)
map_annotations = multiword_name_extraction.extract_map_data_from_all_annotations(map_filename)
# character_distance_threshold.png will create a visualization of the character distance threshold linkage graph.
dfl.draw_features_and_linkages(map_filename, mg, "character_distance_threshold.png",map_dir=os.getcwd(), show_image=False)

Text labels from correctly linked multiword phrases are highlighted in green.
Text labels from incorrectly linked multiword phrases are highlighted in red.
Text labels from single-word phrases are highlighted in blue.