# 0: Import packages

In [3]:
from PIL import Image, ImageFile #pip install Pillow==9.4.0
import sys
import os
import numpy as np
import logging
import glob
import subprocess

logging.basicConfig(level=logging.INFO)
Image.MAX_IMAGE_PIXELS=None
ImageFile.LOAD_TRUNCATED_IMAGES = True

# 1: Specify filepaths

In [4]:
# Name folders for raw data and processed data
map_data_topfolder = 'raw_maps_20231024'
map_crops_topfolder = 'processed/crops'
map_streg_topfolder = 'processed/streg'

for fp in [map_crops_topfolder, map_streg_topfolder]:
    if not os.path.isdir(fp):
        os.makedirs(fp)

# IMPORTANT! Locate spotter directory and detectron weights
git_clone_location = 'C:/Users/HP/'
spotter_directory = git_clone_location + 'mapkurator-spotter-main/spotter-v2'
model_weights = git_clone_location + 'detectron2-main/detectron2/checkpoint/model_v2_en.pth'

# For assistance, see how the data should be organized
from IPython.display import Image as ipyim
#ipyim(filename="./directory_organization.jpg", width=400, height = 200)

# 2: Crop all jpeg maps in (user defined) map_data_topfolder

In [14]:
# cropped image pyramid - subcrops with edge size i are created from crops with edge size i-1
edge_size_set = [2500, 1000, 500]
if not all(edge_size_set[i] > edge_size_set[i+1] for i in range(len(edge_size_set)-1)):
    print("Danger! Edge size set should be strictly decreasing.")
overlap_pct = .1

In [None]:
def crop(img_path, output_dir, edge_size_set):

    map_img = Image.open(img_path) 
    width, height = map_img.size 

    #print(width, height, map_name)

    edge_size = edge_size_set[0]
    
    shift_size = (1-overlap_pct) * edge_size

    # pad the image to the size divisible by shift-size
    num_tiles_w = int(np.ceil(1. * width / shift_size))
    num_tiles_h = int(np.ceil(1. * height / shift_size))
    enlarged_width = int(shift_size * num_tiles_w)
    enlarged_height = int(shift_size * num_tiles_h)

    enlarged_map = Image.new(mode="RGB", size=(enlarged_width, enlarged_height))
    # paste map_imge to enlarged_map
    enlarged_map.paste(map_img) 

    for idx in range(0, num_tiles_h):
        for jdx in range(0, num_tiles_w):
            img_clip = enlarged_map.crop((jdx * shift_size, idx * shift_size,(jdx * shift_size) + edge_size, (idx * shift_size) + edge_size, ))
            out_name_sans_jpg = 'e' + str(edge_size) + '_h' + str(idx) + '_w' + str(jdx)
            if not os.path.isdir(os.path.join(output_dir, out_name_sans_jpg)):
                os.makedirs(os.path.join(output_dir, out_name_sans_jpg))
            img_path_new = os.path.join(output_dir, out_name_sans_jpg) + "/" + out_name_sans_jpg + '.jpg'
            img_clip.save(img_path_new)
            if len(edge_size_set) > 1:
                crop(img_path_new, img_path_new.rsplit("/", 1)[0], edge_size_set[1:])

# Run crop on all maps
for map_data_subfolder in next(os.walk(map_data_topfolder))[1]:
    jpeg_list = glob.glob(map_data_topfolder + '/' + map_data_subfolder + '/*.jpeg')
    if len(jpeg_list) != 1:
        print(map_data_subfolder + " failed. Please ensure there is exactly 1 file with extension .jpeg in the folder.")
    else:
        map_image = jpeg_list[0].split("\\")[1]
        if map_data_subfolder in ['1874_saunders','1858_vandevelde','1847_tobler']:
            img_path = map_data_topfolder + '/' + map_data_subfolder + "/" + map_image
            map_name = os.path.basename(img_path).split('.')[0] # get the map name without extension
            output_dir = os.path.join(map_crops_topfolder, map_name)
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            crop(img_path, output_dir, edge_size_set)
            logging.info('Done cropping %s' %img_path )

# 3: Scan for text in all crops in (user defined) map_scan_topfolder

In [40]:
# Set up spotter arguments
gpu_id = 0
spotter_model = 'spotter-v2'
spotter_config = spotter_directory + '/configs/PALEJUN/Finetune/Base-SynthMap-Polygon.yaml'
inference_py_loc = spotter_directory + '/tools/inference.py'
curr_wd = os.getcwd().replace("\\","/")

## Run spotter on all files in processed/crops/
# if you remove --output_json, it will output a picture of the image with recognized text overlaid
command = f'python {inference_py_loc} --config-file {spotter_config} --model_weights {model_weights} --output_json --inputs True --curr_wd {curr_wd} --map_crops_topfolder {map_crops_topfolder} --map_streg_topfolder {map_streg_topfolder}'
subprocess.run(command) #, shell=True)

In [7]:
# here is a sample command if you want to run on one image (you must define filepath_to_jpg and filepath).
filepath_to_jpg = "processed/crops/saunders_1874/e2500_h0_w0/e2500_h0_w0.jpg"
filepath = "processed"
command = f'python {inference_py_loc} --config-file {spotter_config} --model_weights {model_weights} --input {filepath_to_jpg} --output {filepath}'
subprocess.run(command) #, shell=True)

CompletedProcess(args='python C:/Users/HP/mapkurator-spotter-main/spotter-v2/tools/inference.py --config-file C:/Users/HP/mapkurator-spotter-main/spotter-v2/configs/PALEJUN/Finetune/Base-SynthMap-Polygon.yaml --model_weights C:/Users/HP/detectron2-main/detectron2/checkpoint/model_v2_en.pth --input processed/crops/1871_whitney/h1_w0.jpg --output processed', returncode=0)