In [None]:
import sys

if ".." not in sys.path:
    sys.path.append("..")

import os
import PIL.Image as Image
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pickle

from collections import defaultdict

from common.task import path_join, load_ground_truths, get_best_bag_of_features_histograms

%load_ext autoreload
%autoreload 2
%matplotlib widget
%matplotlib inline

In [None]:
img_path = '2700270'
n_centroids = 100
step_size = 50
# load ground truth for image 2700270.png
visual_words = load_ground_truths('2700270.gtp') 
visual_word = visual_words[3]
print(f"{visual_word=}")

In [None]:
rv = get_best_bag_of_features_histograms('2700270.png', visual_word[:4], n_centroids, step_size)

# Visualization

In [None]:
x1, y1, x2, y2, _ = visual_word
document = Image.open(path_join('pages', '2700270.png'))
doc_arr = np.asarray(document, dtype='uint8')
req_arr = doc_arr[y1:y2, x1:x2]

plt.imshow(req_arr, cmap=cm.get_cmap("Greys_r"))
plt.title("Request image")
plt.show()

In [None]:
row_cols = 4
fig = plt.figure(figsize=(10, 6))
fig.suptitle("Best matching windows in document")

for idx, info in enumerate(rv):
    if idx == row_cols**2:
        break

    wx, wy, wxx, wyy = info['window']
    ax = fig.add_subplot(row_cols, row_cols, idx + 1)
    subimg = doc_arr[wy:wyy, wx:wxx]
    ax.imshow(subimg, cmap=cm.get_cmap('Greys_r'))

# Evaluation

In [None]:
from common.evaluation import SegmentfreeWordSpottingEvaluator

# 3 - and
# 4 - instructions

img_path = '2700270.png'
n_centroids = 100
step_size = 30
word_index = 0
max_eval_length = 10

sfwse = SegmentfreeWordSpottingEvaluator(
    img_path,
    n_centroids,
    step_size
)

result = sfwse.crossvalidate(word_index, max_eval_length)
prec, rec, mean_prec = result
print(f"{prec=} {rec=} {mean_prec=}")

In [None]:
from common.evaluation import SegmentfreeWordSpottingEvaluator

max_eval_length = 10

sfwse = SegmentfreeWordSpottingEvaluator(
    img_path='2700270.png',
    n_centroids=100,
    step_size=30
)

avg_prec, avg_rec, avg_mean_prec, overall_precision, overall_recall, overall_mean_prec = sfwse.validate(max_eval_length)
print(f"{avg_prec=} {avg_rec=} {avg_mean_prec=}")
print(f"{overall_precision=}")
print(f"{overall_recall=}")
print(f"{overall_mean_prec=}")