# **Intrinsic evaluation of KPMiner, MPRank & EmbedRank**

In [2]:
from datasets import load_dataset

from util.evaluation_metrics import *
from util.evaluation_util import *

## **0. Preprocessing of dataset**

In [3]:
semeval2010 = load_dataset("midas/semeval2010", "raw") 
test_docs, test_references = preprocess_dataset(semeval2010, "test")

  0%|          | 0/100 [00:00<?, ?it/s]

## **1. KPMiner**

**Note:** The same optimal hyperparameters were found using the exact-match & partial-match evaluation metrics.

In [None]:
# df = pke.pke.load_document_frequency_file("df-semeval2010.tsv.gz")
# n_to_alpha_sigma = {5: (2.2, 2.8), 10: (2.2, 2.8), 15: (2.2, 2.8)}
# extractor = pke.unsupervised.KPMiner()

# n_to_extr_test = dict()
# for n, (alpha, sigma) in n_to_alpha_sigma.items():
#     n_to_extr_test[n] = extract_keyphrases(
#         extractor=extractor, 
#         n=n, 
#         params=[(alpha, sigma)],
#         stemming=True,
#         docs=test_docs,
#         df=df
#         )
    
# # Serialize extracted keyphrases
# serialize(extracted=n_to_extr_test, path="extractions/kpminer/test.json")

In [5]:
# Deserialize extracted keyphrases
n_to_extr_test = deserialize(path="../extractions/kpminer/test.json")

### 1. Exact-match evaluation

In [6]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_f1 = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=exact_f1
        )
    print(f"\tF1@{n}: {round(avg_f1, 2)}\n")

Results:
	F1@5: 0.12

	F1@10: 0.16

	F1@15: 0.18



### 2. Partial-match evaluation

In [7]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_f1 = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=partial_f1
        )
    print(f"\tpF1@{n}: {round(avg_f1, 2)}\n")

Results:
	pF1@5: 0.22

	pF1@10: 0.27

	pF1@15: 0.28



### 3. Similarity

In [8]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_sim = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=similarity
        )
    print(f"\tSim@{n}: {round(avg_sim, 2)}\n")

Results:
	Sim@5: 0.4699999988079071

	Sim@10: 0.44999998807907104

	Sim@15: 0.4399999976158142



## **2. MPRank**

In [34]:
# n_to_alpha = {5: 1.2, 10: 0.9, 15: 1.2}
# extractor = pke.unsupervised.MultipartiteRank()

# n_to_extr_test = dict()
# for n, alpha in n_to_alpha.items():
#     n_to_extr_test[n] = extract_keyphrases(
#         extractor=extractor, 
#         n=n, 
#         params=[alpha],
#         stemming=True,
#         docs=test_docs
#         )
    
# # Serialize extracted keyphrases
# serialize(extracted=n_to_extr_test, path="extractions/mprank/test_exact.json")

### 1. Exact-match evaluation

In [9]:
# Deserialize extracted keyphrases
n_to_extr_test = deserialize(path="../extractions/mprank/test_exact.json")

In [10]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_f1 = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=exact_f1
        )
    print(f"\tF1@{n}: {round(avg_f1, 2)}\n")

Results:
	F1@5: 0.12

	F1@10: 0.15

	F1@15: 0.14



### 2. Partial-match evaluation

In [None]:
# n_to_alpha = {5: 1.3, 10: 1.3, 15: 1.3}
# extractor = pke.unsupervised.MultipartiteRank()

# n_to_extr_test = dict()
# for n, alpha in n_to_alpha.items():
#     n_to_extr_test[n] = extract_keyphrases(
#         extractor=extractor, 
#         n=n, 
#         params=[alpha],
#         stemming=True,
#         docs=test_docs
#         )

# # Serialize extracted keyphrases
# serialize(extracted=n_to_extr_test, path="extractions/mprank/test_partial.json")

In [12]:
# Deserialize extracted keyphrases
n_to_extr_test = deserialize(path="../extractions/mprank/test_partial.json")

In [13]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_f1 = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=partial_f1
        )
    print(f"\tpF1@{n}: {round(avg_f1, 2)}\n")

Results:
	pF1@5: 0.35

	pF1@10: 0.37

	pF1@15: 0.34



### 3. Similarity

In [None]:
# n_to_alpha = {5: 1.2, 10: 1.2, 15: 1.1}

# n_to_extr_test = dict()
# for n, alpha in n_to_alpha.items():
#     n_to_extr_test[n] = extract_keyphrases(
#         extractor=extractor, 
#         n=n, 
#         params=[alpha],
#         stemming=True,
#         docs=test_docs
#         )

# # Serialize extracted keyphrases
# serialize(extracted=n_to_extr_test, path="extractions/mprank/test_sim.json")

In [14]:
# Deserialize extracted keyphrases
n_to_extr_test = deserialize(path="../extractions/mprank/test_sim.json")

In [15]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_sim = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=similarity
        )
    print(f"\tSim@{n}: {round(avg_sim, 2)}\n")

Results:
	Sim@5: 0.5299999713897705

	Sim@10: 0.5

	Sim@15: 0.4699999988079071



## **3. EmbedRank**

### 1. Exact-match evaluation

In [19]:
# # Deserialize extracted keyphrases
# n_to_extr_test_raw = deserialize(path="extractions/embedrank/raw/test_exact.json")

# # Stem extracted keyphrases
# n_to_extr_test = perform_stemming(extracted_raw=n_to_extr_test_raw)

# # Serialize stemmed extracted keyphrases
# serialize(extracted=n_to_extr_test, path="extractions/embedrank/stemmed/test_exact.json")

In [17]:
# Deserialize stemmed extracted keyphrases
n_to_extr_test = deserialize(path="../extractions/embedrank/stemmed/test_exact.json")

In [18]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_f1 = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=exact_f1
        )
    print(f"\tF1@{n}: {round(avg_f1, 2)}\n")

Results:
	F1@5: 0.02

	F1@10: 0.03

	F1@15: 0.03



### 2. Partial-match evaluation

In [22]:
# # Deserialize extracted keyphrases
# n_to_extr_test_raw = deserialize(path="extractions/embedrank/raw/test_partial.json")

# # Stem extracted keyphrases
# n_to_extr_test = perform_stemming(extracted_raw=n_to_extr_test_raw)

# # Serialize stemmed extracted keyphrases
# serialize(extracted=n_to_extr_test, path="extractions/embedrank/stemmed/test_partial.json")

In [19]:
# Deserialize stemmed extracted keyphrases
n_to_extr_test = deserialize(path="../extractions/embedrank/stemmed/test_partial.json")

In [20]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_f1 = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=partial_f1
        )
    print(f"\tpF1@{n}: {round(avg_f1, 2)}\n")

Results:
	pF1@5: 0.38

	pF1@10: 0.4

	pF1@15: 0.37



### 3. Similarity

In [25]:
# # Deserialize extracted keyphrases
# n_to_extr_test_raw = deserialize(path="extractions/embedrank/raw/test_sim.json")

# # Stem extracted keyphrases
# n_to_extr_test = perform_stemming(extracted_raw=n_to_extr_test_raw)

# # Serialize stemmed extracted keyphrases
# serialize(extracted=n_to_extr_test, path="extractions/embedrank/stemmed/test_sim.json")

In [21]:
# Deserialize stemmed extracted keyphrases
n_to_extr_test = deserialize(path="../extractions/embedrank/stemmed/test_sim.json")

In [22]:
print("Results:")
for n, extracted in n_to_extr_test.items():
    avg_sim = average_metric(
        extracted=extracted, 
        test_references=test_references, 
        metric=similarity
        )
    print(f"\tSim@{n}: {round(avg_sim, 2)}\n")

Results:
	Sim@5: 0.4699999988079071

	Sim@10: 0.4399999976158142

	Sim@15: 0.41999998688697815

