# **Hyperparameter tuning for KPMiner, MPRank & EmbedRank**

In [11]:
import os
from itertools import product
from datasets import load_dataset

from util.evaluation_metrics import *
from util.evaluation_util import *

In [4]:
dir = "../extractions/kpminer/"
if not os.path.exists(dir):
    os.makedirs(dir)

dir = "../extractions/mprank/"
if not os.path.exists(dir):
    os.makedirs(dir)

## **0. Preprocessing of dataset**

In [2]:
semeval2010 = load_dataset("midas/semeval2010", "raw") # Use "extraction"-version of dataset next time
train_docs, train_references = preprocess_dataset(semeval2010, "train")

  0%|          | 0/144 [00:00<?, ?it/s]

## **1. KPMiner**

In [7]:
alphas = [2.2, 2.3, 2.4, 2.5]
sigmas = [2.8, 3.0, 3.2, 3.4]
df = pke.pke.load_document_frequency_file("df-semeval2010.tsv.gz")
extractor = pke.unsupervised.KPMiner()

In [4]:
# n_to_extr_train = dict()
# for n in [5, 10, 15]:
#     n_to_extr_train[n] = extract_keyphrases(
#         extractor = extractor,
#         n=n,
#         params=list(product(alphas, sigmas)),
#         stemming=True,
#         docs=train_docs, 
#         df=df
#         )
    
# # Serialize extracted keyphrases
# serialize(extracted=n_to_extr_train, path="../extractions/kpminer/train.json")

In [5]:
# Deserialize extracted keyphrases
n_to_extr_train = deserialize(path="../extractions/kpminer/train.json")

### 1. Exact-match evaluation

In [8]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=exact_f1, 
        params=list(product(alphas, sigmas)),
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_alpha_sigma = max(results, key=results.get)
    print(f"The best score for n={n} was {results[best_alpha_sigma]} for alpha={best_alpha_sigma[0]} and sigma={best_alpha_sigma[1]}.")

The best score for n=5 was 0.10729850314581132 for alpha=2.2 and sigma=2.8.
The best score for n=10 was 0.13735311518983984 for alpha=2.2 and sigma=2.8.
The best score for n=15 was 0.14427517244925336 for alpha=2.2 and sigma=2.8.


### 2. Partial-match evaluation

In [9]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=partial_f1, 
        params=list(product(alphas, sigmas)),
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_alpha_sigma = max(results, key=results.get)
    print(f"The best score for n={n} was {results[best_alpha_sigma]} for alpha={best_alpha_sigma[0]} and sigma={best_alpha_sigma[1]}.")

The best score for n=5 was 0.21563546341668094 for alpha=2.2 and sigma=2.8.
The best score for n=10 was 0.2589318774320937 for alpha=2.2 and sigma=2.8.
The best score for n=15 was 0.2653923980046201 for alpha=2.2 and sigma=2.8.


### 3. Similarity

In [10]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=similarity, 
        params=list(product(alphas, sigmas)),
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_alpha_sigma = max(results, key=results.get)
    print(f"The best score for n={n} was {results[best_alpha_sigma]} for alpha={best_alpha_sigma[0]} and sigma={best_alpha_sigma[1]}.")

KeyboardInterrupt: 

## **2. MPRank**

In [10]:
alphas = [0.9, 1.0, 1.1, 1.2, 1.3]
extractor = pke.unsupervised.MultipartiteRank()

In [11]:
# n_to_extr_train = dict()
# for n in [5, 10, 15]:
#     n_to_extr_train[n] = extract_keyphrases(
#         extractor = extractor,
#         n=n,
#         params=alphas,
#         stemming=True,
#         docs=train_docs
#         )
    
# # Serialize extracted keyphrases
# serialize(extracted=n_to_extr_train, path="../extractions/mprank/train.json")

In [12]:
# Deserialize extracted keyphrases
n_to_extr_train = deserialize(path="../extractions/mprank/train.json")

### 1. Exact-match evaluation

In [13]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=exact_f1, 
        params=alphas,
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_alpha = max(results, key=results.get)
    print(f"The highest F-score for n={n} was {results[best_alpha]} for alpha={best_alpha}.")

The highest F-score for n=5 was 0.12679910784647197 for alpha=1.2.
The highest F-score for n=10 was 0.14485760385389593 for alpha=0.9.
The highest F-score for n=15 was 0.15068132628354636 for alpha=1.2.


### 2. Partial-match evaluation

In [14]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=partial_f1, 
        params=alphas,
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_alpha = max(results, key=results.get)
    print(f"The highest F-score for n={n} was {results[best_alpha]} for alpha={best_alpha}.")

The highest F-score for n=5 was 0.3195913624415944 for alpha=1.3.
The highest F-score for n=10 was 0.3506735147909677 for alpha=1.3.
The highest F-score for n=15 was 0.34312121520284694 for alpha=1.3.


### 3. Similarity

In [15]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=similarity, 
        params=alphas,
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_alpha = max(results, key=results.get)
    print(f"The highest F-score for n={n} was {results[best_alpha]} for alpha={best_alpha}.")

KeyboardInterrupt: 

## **3. EmbedRank**

**Note:** Extract and serialize keyphrases in a different virtual environment first (because of dependency conflicts).

### 0. Stemming of extracted keyphrases

In [16]:
# # Deserialize extracted keyphrases
# n_to_extr_train_raw = deserialize(path="../extractions/embedrank/raw/train.json")

# # Stem extracted keyphrases
# n_to_extr_train = perform_stemming(extracted_raw=n_to_extr_train_raw)

# # Serialized stemmed extracted keyphrases
# serialize(extracted=n_to_extr_train, path="../extractions/embedrank/stemmed/train.json")

In [17]:
# Deserialize stemmed extracted keyphrases
n_to_extr_train = deserialize(path="../extractions/embedrank/stemmed/train.json")

In [18]:
betas = [0.4, 0.45, 0.5, 0.55, 0.6]

### 1. Exact-match evaluation

In [19]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=exact_f1, 
        params=betas,
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_beta = max(results, key=results.get)
    print(f"The highest F-score for n={n} was {results[best_beta]} for beta={best_beta}.")

The highest F-score for n=5 was 0.03029879184903695 for beta=0.45.
The highest F-score for n=10 was 0.030325962305657126 for beta=0.45.
The highest F-score for n=15 was 0.031178201850416896 for beta=0.45.


### 2. Partial-match evaluation

In [20]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=partial_f1, 
        params=betas,
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_beta = max(results, key=results.get)
    print(f"The highest F-score for n={n} was {results[best_beta]} for beta={best_beta}.")

The highest F-score for n=5 was 0.38896559766120287 for beta=0.55.
The highest F-score for n=10 was 0.4104219635039519 for beta=0.55.
The highest F-score for n=15 was 0.3895602980733236 for beta=0.55.


### 3. Similarity

In [None]:
for n, extractions in n_to_extr_train.items():
    results = compute_scores(
        metric=partial_f1, 
        params=betas,
        extracted_keyphrases=extractions, 
        references=train_references
        )
    best_beta = max(results, key=results.get)
    print(f"The highest F-score for n={n} was {results[best_beta]} for beta={best_beta}.")