# Evaluation of `passim` results

## Running the evaluation

In [67]:
import sys
import pandas as pd
sys.path.append('../')
from lib.evaluation import TextReuseEvaluator

In [68]:
GT_DATASET_PATH = '../data/homeric_repetitions_dataset.tsv'
PASSIM_OUTPUT_DIR = '../data/passim/exp6/out.json/'

evaluator = TextReuseEvaluator()

In [69]:
evaluator.read_predictions(PASSIM_OUTPUT_DIR)
evaluator.read_groundtruth(GT_DATASET_PATH)

341 predicted clusters found
69 groundtruth clusters found


In [70]:
evaluator.evaluate()

# of ground-truth cluster: 69
# of matched clusters: 58
# of unmatched clusters: 11


# of partially matched clusters: 3
# of exactly matched clusters: 47
# of clusters with spurious passages: 8


In [71]:
evaluator.matches

[<Match: GT cluster[1] with predicted cluster[54] (spurious-match)>,
 <Match: GT cluster[2] with predicted cluster[54] (spurious-match)>,
 <Match: GT cluster[3] with predicted cluster[58] (exact-match)>,
 <Match: GT cluster[4] with predicted cluster[144] (partial-match)>,
 <Match: GT cluster[6] with predicted cluster[185] (exact-match)>,
 <Match: GT cluster[7] with predicted cluster[280] (exact-match)>,
 <Match: GT cluster[8] with predicted cluster[88] (spurious-match)>,
 <Match: GT cluster[9] with predicted cluster[88] (exact-match)>,
 <Match: GT cluster[12] with predicted cluster[181] (spurious-match)>,
 <Match: GT cluster[13] with predicted cluster[41] (exact-match)>,
 <Match: GT cluster[14] with predicted cluster[577] (exact-match)>,
 <Match: GT cluster[72] with predicted cluster[503] (partial-match)>,
 <Match: GT cluster[15] with predicted cluster[345] (exact-match)>,
 <Match: GT cluster[16] with predicted cluster[548] (exact-match)>,
 <Match: GT cluster[18] with predicted cluster

In [8]:
for cluster in evaluator.gt_clusters.values():
    if cluster.matched_predictions:
        for match in cluster.matched_predictions:
            match.inspect()

Match type: spurious-match
GT cluster 1
	('<Locus: Homer, Iliad 2.11-2.15>') θωρῆξαί ἑ κέλευε κάρη κομόωντας Ἀχαιοὺς πανσυδίῃ· νῦν γάρ κεν ἕλοι πόλιν εὐρυάγυιαν Τρώων· οὐ γὰρ ἔτʼ ἀμφὶς Ὀλύμπια δώματʼ ἔχοντες ἀθάνατοι φράζονται· ἐπέγναμψεν γὰρ ἅπαντας Ἥρη λισσομένη, Τρώεσσι δὲ κήδεʼ ἐφῆπται.
	('<Locus: Homer, Iliad 2.28-2.32>') θωρῆξαί σε κέλευσε κάρη κομόωντας Ἀχαιοὺς πανσυδίῃ· νῦν γάρ κεν ἕλοις πόλιν εὐρυάγυιαν Τρώων· οὐ γὰρ ἔτʼ ἀμφὶς Ὀλύμπια δώματʼ ἔχοντες ἀθάνατοι φράζονται· ἐπέγναμψεν γὰρ ἅπαντας Ἥρη λισσομένη, Τρώεσσι δὲ κήδεʼ ἐφῆπται
	('<Locus: Homer, Iliad 2.66-2.69>') θωρῆξαί σε κέλευσε κάρη κομόωντας Ἀχαιοὺς πανσυδίῃ· νῦν γάρ κεν ἕλοις πόλιν εὐρυάγυιαν Τρώων· οὐ γὰρ ἔτʼ ἀμφὶς Ὀλύμπια δώματʼ ἔχοντες ἀθάνατοι φράζονται· ἐπέγναμψεν γὰρ ἅπαντας Ἥρη λισσομένη, Τρώεσσι δὲ κήδεʼ ἐφῆπται


Predicted cluster 73
	('<Locus: Homer, Iliad 2.60-2.70>') θωρῆξαί σε κέλευσε κάρη κομόωντας Ἀχαιοὺς πανσυδίῃ· νῦν γάρ κεν ἕλοις πόλιν εὐρυάγυιαν Τρώων· οὐ γὰρ ἔτʼ ἀμφὶς Ὀλύμπια δώματʼ ἔχοντες ἀθάνατ

## Count clusters by work

In [72]:
PASSIM_OUTPUT_DIR = '../data/passim/exp6/out.json/'
CSV_OUTPUT_PATH = '../data/output/passim_clusters_exp6.csv'

In [73]:
evaluator = TextReuseEvaluator()
evaluator.read_predictions(PASSIM_OUTPUT_DIR)

341 predicted clusters found


In [74]:
groups = {
    "iliad": [],
    "odyssey": [],
    "mixed": []
}

In [75]:
for cluster in evaluator.predicted_clusters.values():
    works = set(sorted([p.locus.work.lower() for p in cluster.passages]))
    if len(works) > 1:
        groups['mixed'].append(cluster.id) 
    else:
        groups[list(works)[0]].append(cluster.id)

In [76]:
for group in groups:
    print(f'{group}: {len(groups[group])}')

iliad: 112
odyssey: 217
mixed: 12


In [77]:
passim_df = pd.read_csv(CSV_OUTPUT_PATH)

In [78]:
passim_df

Unnamed: 0.1,Unnamed: 0,cluster,id,label,dices_tags,dices_speech_id,text,speaker,addressee
0,0,84,187,"Homer, Odyssey 2.85-2.128",del|inf|req|vit,721,"κοῦροι ἐμοὶ μνηστῆρες, ἐπεὶ θάνε δῖος Ὀδυσσεύς...",Antinous,Telemachus
1,1,84,189,"Homer, Odyssey 2.96-2.102",inf,722,"κοῦροι ἐμοὶ μνηστῆρες, ἐπεὶ θάνε δῖος Ὀδυσσεύς...",Penelope,suitors of Penelope
2,2,84,586,"Homer, Odyssey 19.124-19.163",lam,1198,"κοῦροι, ἐμοὶ μνηστῆρες, ἐπεὶ θάνε δῖος Ὀδυσσεύ...",Penelope,Odysseus
3,3,84,587,"Homer, Odyssey 19.141-19.147",del|nar|que,1199,"κοῦροι, ἐμοὶ μνηστῆρες, ἐπεὶ θάνε δῖος Ὀδυσσεύ...",Penelope,suitors of Penelope
4,4,84,1298,"Homer, Odyssey 24.121-24.190",nar,1342,"κοῦροι ἐμοὶ μνηστῆρες, ἐπεὶ θάνε δῖος Ὀδυσσεύς...",Amphimedon,Agamemnon
...,...,...,...,...,...,...,...,...,...
754,754,690,1185,"Homer, Odyssey 20.199-20.225",lau|nar,1236,"χαῖρε, πάτερ ὦ ξεῖνε· γένοιτό τοι ἔς περ ὀπίσσ...",Philoetius,Odysseus
755,755,732,1091,"Homer, Iliad 7.67-7.91",del,182,"ἀνδρὸς μὲν τόδε σῆμα πάλαι κατατεθνηῶτος, ὅν π...",Hector,"Greeks, Trojans"
756,756,732,1092,"Homer, Iliad 7.89-7.90",inf,183,"ἀνδρὸς μὲν τόδε σῆμα πάλαι κατατεθνηῶτος, ὅν π...",someone,someone
757,757,744,886,"Homer, Iliad 4.7-4.19",cha|del,85,"πόλεμόν τε κακὸν καὶ φύλοπιν αἰνὴν ὄρσομεν, ἦ ...",Zeus,gods


In [80]:
for group in groups:
    filter = groups[group]
    filtered_df = passim_df[passim_df.cluster.isin(filter)]
    filtered_df.to_csv(f'../data/output/passim_clusters_exp6_{group}.csv')