# Evaluation of methods
- Contains evaluations of: Segmentations, postprocessing order removal, postprocessing cross-reference linking

In [None]:
import os
os.chdir('../../')
print(os.getcwd())

from utils import json_helpers as jh
from utils.paths import *
import geopy.distance
#from scraping_and_segmenting_helpers import *

# Statistics paths
E1_SEGMENTER_STATS = f'{SEGMENTER_STATS_FOLDER}/stats_segmenter_e1.txt'
E2_SEGMENTER_STATS = f'{SEGMENTER_STATS_FOLDER}/stats_segmenter_e2.txt'

E1_ORDER_STATS = f'{ORDER_STATS_FOLDER}/e1_order_removal_precision.txt'
E2_ORDER_STATS = f'{ORDER_STATS_FOLDER}/e2_order_removal_precision.txt'

E1_CROSS_STATS = f'{CROSS_STATS_FOLDER}/e1_links_recall'
E2_CROSS_STATS = f'{CROSS_STATS_FOLDER}/e2_links_recall'

E1_LOCATION_STATS = f'{LOCATIONS_STATS_FOLDER}/e1_location_recall.txt'
E2_LOCATION_STATS = f'{LOCATIONS_STATS_FOLDER}/e2_location_recall.txt'

# Test data paths
E1_SEGMENTER_TEST = f'{SEGMENTER_TEST_FOLDER}/e1_test_segmenter'
E2_SEGMENTER_TEST = f'{SEGMENTER_TEST_FOLDER}/e2_test_segmenter'

E1_ORDER_TEST = f'{ORDER_TEST_FOLDER}/e1_test_order'
E2_ORDER_TEST = f'{ORDER_TEST_FOLDER}/e2_test_order'

E1_CROSS_TEST = f'{CROSS_TEST_FOLDER}/e1_test_links'
E2_CROSS_TEST = f'{CROSS_TEST_FOLDER}/e2_test_links'

E1_LOCATION_TEST = f'{LOCATIONS_TEST_FOLDER}/e1_test_locations'
E2_LOCATION_TEST = f'{LOCATIONS_TEST_FOLDER}/e2_test_locations'


### Evaluation functions

In [None]:
def recall_segmentation(entries: list[dict]):
    nr_entries = len(entries)
    true_positives = sum([1 if (entry['entryid'] != "na") else 0 for entry in entries])
    recall = true_positives / nr_entries
    return recall

def write_recall_to_stats(filename: str, recall: float, desc: str):
    with open(filename, 'a') as file:
        file.write("------------\n")
        file.write(f'{desc}\n')
        file.write(f"Recall: {recall}\n")

def precision(entries: list[dict], key: str) -> float:
    true_positives = sum([entry[key] for entry in entries])
    return true_positives / len(entries)

def write_to_stats(precision: float, desc: str, eval_file: str):
    with open(eval_file, "w", encoding='utf-8') as file:
        file.write(f"{desc}: {precision}")

## Evaluation of segmentations
* Uses the manually annotated test data.
* Writes the segmentation recall (true sample articles / number of these found in segmentations) to stats.

In [None]:
# Retrieve test json items
e1_test_entries = jh.read_items(E1_SEGMENTER_TEST)
e2_test_entries = jh.read_items(E2_SEGMENTER_TEST)

In [None]:
e1_recall = recall_segmentation(e1_test_entries)
e2_recall = recall_segmentation(e2_test_entries)

write_recall_to_stats(E1_SEGMENTER_STATS, e1_recall)
write_recall_to_stats(E2_SEGMENTER_STATS, e2_recall)

### Evaluation of postprocessing order removal
* Computes and saves precision of order removal method for each edition.

In [None]:
# Manual annotation is done? 
# Evaluate
e1_removed_data = jh.read_items(E1_ORDER_TEST)
e2_removed_data = jh.read_items(E2_ORDER_TEST)

e1_removed_precision = precision(e1_removed_data, 'valid_removal')
e2_removed_precision = precision(e2_removed_data, 'valid_removal')

write_to_stats(e1_removed_precision, "Order removal precision", E1_ORDER_STATS)
write_to_stats(e2_removed_precision, "Order removal precision", E2_ORDER_STATS)

### Evaluation of postprocessing cross-reference linking
* Computes and saves precision of cross-reference linking method for each edition.

In [None]:
# Manual annotation done?
# Evaluate

e1_cross_samples = jh.read_items(E1_CROSS_TEST)
e2_cross_samples = jh.read_items(E2_CROSS_TEST)

#e1_cross_ratio = sum([entry['is_cross_ref'] for entry in e1_cross_samples]) / len(e1_cross_samples)
#e2_cross_ratio = sum([entry['is_cross_ref'] for entry in e2_cross_samples]) / len(e2_cross_samples)

e1_stats = jh.read_items(E1_CROSS_STATS)
e2_stats = jh.read_items(E2_CROSS_STATS)

e1_nr_entries = e1_stats[0]['Nr_articles']
e2_nr_entries = e2_stats[0]['Nr_articles']

e1_cross_ratio = e1_stats[0]['Nr_cross_ref_total'] / e1_nr_entries
e2_cross_ratio = e2_stats[0]['Nr_cross_ref_total'] / e2_nr_entries

e1_nr_cross_linked = e1_stats[0]['Nr_cross_ref_linked']
e2_nr_cross_linked = e2_stats[0]['Nr_cross_ref_linked']

e1_stats[0]['cross_ref_ratio'] = e1_cross_ratio
e2_stats[0]['cross_ref_ratio'] = e2_cross_ratio

e1_stats[0]['Estimated_cross_refs'] = e1_cross_ratio * e1_nr_entries
e2_stats[0]['Estimated_cross_refs'] = e2_cross_ratio * e2_nr_entries

e1_stats[0]['Recall'] = e1_nr_cross_linked / (e1_cross_ratio * e1_nr_entries)
e2_stats[0]['Recall'] = e2_nr_cross_linked / (e2_cross_ratio * e2_nr_entries)

e1_stats[0]['Precision'] = 1 # Per definition these will always be correct
e2_stats[0]['Precision'] = 1

e1_stats[0]['F1-score'] = 2 * (e1_stats[0]['Precision'] * e1_stats[0]['Recall']) / (e1_stats[0]['Precision'] + e1_stats[0]['Recall'])
e2_stats[0]['F1-score'] = 2 * (e2_stats[0]['Precision'] * e2_stats[0]['Recall']) / (e2_stats[0]['Precision'] + e2_stats[0]['Recall'])

jh.write_items(e1_stats, E1_CROSS_STATS)
jh.write_items(e2_stats, E2_CROSS_STATS)

## Evaluation of project (Locations)
- How many of the sample locations have been correctly linked to wikidata?

In [None]:
def is_coords_correct(entry, distance):
    coords_pred = (entry['latitude'], entry['longitude'])
    coords_true = (entry['correct_latitude'], entry['correct_longitude'])
    if coords_pred == (None, None) and coords_true == (None, None): 
        return True
    elif coords_pred == (None, None) and coords_true != (None, None):
        return False
    elif geopy.distance.geodesic(coords_pred, coords_true).km < distance:
        return True
    else:
        return False

In [None]:
DISTANCE = 10

e1_test = jh.read_items(E1_LOCATION_TEST)
e2_test = jh.read_items(E2_LOCATION_TEST)

e1_nr_entries = len(e1_test)
e2_nr_entries = len(e2_test)

# Get edition recall for location articles
e1_nr_retrieved = sum([1 if entry['entryid'] == "" else 0 for entry in e1_test])
e2_nr_retrieved = sum([1 if entry['entryid'] == "" else 0 for entry in e2_test])

e1_retrieval_recall = e1_nr_retrieved / e1_nr_entries
e2_retrieval_recall = e2_nr_retrieved / e2_nr_entries

# Get edition recall for location coordinates kind
e1_nr_correct_coords = sum([1 if is_coords_correct(entry, DISTANCE) else 0 for entry in e1_test])
e2_nr_correct_coords = sum([1 if is_coords_correct(entry, DISTANCE) else 0 for entry in e2_test])

e1_coords_recall = e1_nr_correct_coords / e1_nr_entries
e2_coords_recall = e2_nr_correct_coords / e2_nr_entries

# Write to stats
write_recall_to_stats(E1_LOCATION_STATS, e1_retrieval_recall, "E1 recall for location articles")
write_recall_to_stats(E1_LOCATION_STATS, e1_coords_recall, "E1 recall for location coordinates")

write_recall_to_stats(E2_LOCATION_STATS, e2_retrieval_recall, "E2 recall for location articles")
write_recall_to_stats(E2_LOCATION_STATS, e2_coords_recall, "E2 recall for location coordinates")