In [1]:
import pandas as pd
# Evaluation Metrics (ROUGE)
from rouge_score import rouge_scorer
import numpy as np

### Reading all the predicted_files for evaluating results

In [2]:
# RAKE predictions
rake_pred_df = pd.read_pickle('semeval_results/rake_pred_df.pkl')

# YAKE predictions
yake_pred_df = pd.read_pickle('semeval_results/yake_pred_df.pkl')

# TF-IDF predictions
tf_idf_pred_df = pd.read_pickle('semeval_results/tf_idf_pred_df.pkl')

# KeyBERT predictions
key_bert_pred_df = pd.read_pickle('semeval_results/key_bert_pred_df.pkl')

# LDA predictions
lda_pred_df = pd.read_pickle('semeval_results/lda_pred_df.pkl')

# PositionRank predictions
pr_pred_df = pd.read_pickle('semeval_results/pos_rank_pred_df.pkl')

### Refrence Dataset with labeled keywords
- This data frame is used to evalute the results for the predicted keywords

In [3]:
data = pd.read_pickle('data/Processed_SemEval.pkl')

In [4]:
data

Unnamed: 0,Doc_no,Title,Abstract,Keywords
0,6,SBC gets more serious on regulatory compliance,eye past future sbc communication week created...,"telecommunication,sbc communications,regulator..."
1,7,Anti-spam suit attempts to hold carriers accou...,lawsuit alleges sprint violated utah new anti ...,"electronic mail,legislation,telecommunication,..."
2,8,New investors get steal of a deal [Global Cros...,hutchison telecommunication singapore technolo...,"telecommunication,hutchison telecommunications..."
3,9,Achieving competitive capabilities in e-services,implication internet service operation strateg...,"corporate modelling,electronic commerce,intern..."
4,11,Does social capital determine innovation? To w...,paper deal question social capital determine i...,"corporate modelling,decision theory,manufactur..."
...,...,...,...,...
495,946,Entanglement measures with asymptotic weak-mon...,propose entanglement measure asymptotic weak m...,"quantum computing,quantum interference phenome..."
496,947,The fully entangled fraction as an inclusive m...,characterizing entanglement simplest case qubi...,"information theory,quantum communication,quant..."
497,948,Pairwise thermal entanglement in the n-qubit (...,calculated concurrence pairwise thermal entang...,"heisenberg model,quantum computing,quantum int..."
498,949,"Reply to ""Comment on: Teleportation of an unkn...",letter ibid vol p main question consider gener...,"probability,quantum communication,quantum theo..."


In [5]:
doc_no = data['Doc_no']
abstracts = data['Abstract']
refrence_data = data['Keywords'].tolist()

In [6]:
"""
All the algorithms realted results
"""
algorithm = ["RAKE", "YAKE", "TF-IDF", "KeyBERT", "LDA", "Position Rank"]
algo_precision_scores = []
algo_recall_scores = []
algo_f1_scores = []

### ROUGE Metrics for Evaluation of the results

In [7]:
"""
This function returns the average of precision, recall and f1score
based on the score from rouge1, rouge2, rougeL rounding off the final 
value to 2 decimal places for each doc in the dataset
"""
def get_avg_scores(result):
    precision_score = []
    recall_score = []
    f1_score = []
    for rouge in ['rouge1','rouge2','rougeL']:
        precision_score.append(result[rouge][0])
        recall_score.append(result[rouge][1])
        f1_score.append(result[rouge][2])
    precision_score = np.round(np.average(precision_score), 2)
    recall_score = np.round(np.average(recall_score), 2)
    f1_score = np.round(np.average(f1_score), 2)
    return precision_score, recall_score, f1_score

In [8]:
def get_Rouge_scores(refrence_data, test_data):
    precision_score = []
    recall_score = []
    f1_score = []
    
    for refrence, test in zip(refrence_data, test_data):
        
        # modifying the format as a string with spaces
        refrence = refrence.replace(",", " ")
        test = test.replace(",", " ")
        scorer = rouge_scorer.RougeScorer(['rouge1','rouge2','rougeL'], use_stemmer=True)
        scores = scorer.score(refrence, test)
        
        # Getting the avg scores and appending them to the list
        p_score, r_score, f_score = get_avg_scores(scores)
        precision_score.append(p_score)
        recall_score.append(r_score)
        f1_score.append(f_score)
        
    results_df = pd.DataFrame(zip(doc_no,abstracts,precision_score, recall_score, f1_score), columns=['Doc_no', 'Abstract','Avg_precision_score','Avg_recall_score','Avg_f1_score'])
    return results_df

In [9]:
"""
This method returns the average scores for 
Precison, Recall and F1 
on the whole document
"""
def get_final_results(result):
    return np.average(result['Avg_precision_score']),np.average(result['Avg_recall_score']),np.average(result['Avg_f1_score'])

### Results for the RAKE Algorithm

In [10]:
rake_data = rake_pred_df['Extracted_Keywords'].tolist()

In [11]:
rake_results = get_Rouge_scores(refrence_data, rake_data)

In [12]:
rake_results.head()

Unnamed: 0,Doc_no,Abstract,Avg_precision_score,Avg_recall_score,Avg_f1_score
0,6,eye past future sbc communication week created...,0.11,0.29,0.16
1,7,lawsuit alleges sprint violated utah new anti ...,0.36,0.44,0.4
2,8,hutchison telecommunication singapore technolo...,0.27,0.47,0.34
3,9,implication internet service operation strateg...,0.43,0.15,0.22
4,11,paper deal question social capital determine i...,0.34,0.17,0.22


In [13]:
print("---------------- RAKE RESULTS ----------------")
final_res = get_final_results(rake_results)
algo_precision_scores.append(final_res[0])
algo_recall_scores.append(final_res[1])
algo_f1_scores.append(final_res[2])
print(f"1. Avg. Precision score on the whole document : {final_res[0]}")
print(f"2. Avg. Recall score on the whole document : {final_res[1]}")
print(f"3. Avg. F1 score on the whole document : {final_res[2]}")

---------------- RAKE RESULTS ----------------
1. Avg. Precision score on the whole document : 0.34786
2. Avg. Recall score on the whole document : 0.20124
3. Avg. F1 score on the whole document : 0.2417


### Results for the YAKE Algorithm

In [14]:
yake_data = yake_pred_df['Extracted_Keywords'].tolist()

In [15]:
yake_results = get_Rouge_scores(refrence_data, yake_data)

In [16]:
yake_results.head()

Unnamed: 0,Doc_no,Abstract,Avg_precision_score,Avg_recall_score,Avg_f1_score
0,6,eye past future sbc communication week created...,0.19,0.4,0.26
1,7,lawsuit alleges sprint violated utah new anti ...,0.36,0.39,0.38
2,8,hutchison telecommunication singapore technolo...,0.41,0.73,0.53
3,9,implication internet service operation strateg...,0.46,0.12,0.19
4,11,paper deal question social capital determine i...,0.27,0.09,0.14


In [17]:
print("---------------- YAKE RESULTS ----------------")
final_res = get_final_results(yake_results)
algo_precision_scores.append(final_res[0])
algo_recall_scores.append(final_res[1])
algo_f1_scores.append(final_res[2])
print(f"1. Avg. Precision score on the whole document : {final_res[0]}")
print(f"2. Avg. Recall score on the whole document : {final_res[1]}")
print(f"3. Avg. F1 score on the whole document : {final_res[2]}")

---------------- YAKE RESULTS ----------------
1. Avg. Precision score on the whole document : 0.32258000000000003
2. Avg. Recall score on the whole document : 0.16119999999999998
3. Avg. F1 score on the whole document : 0.19832


### Results for TF-IDF

In [18]:
tf_idf_data = tf_idf_pred_df['Extracted_Keywords'].tolist()

In [19]:
tf_idf_results = get_Rouge_scores(refrence_data, tf_idf_data)

In [20]:
tf_idf_results.head()

Unnamed: 0,Doc_no,Abstract,Avg_precision_score,Avg_recall_score,Avg_f1_score
0,6,eye past future sbc communication week created...,0.11,0.25,0.15
1,7,lawsuit alleges sprint violated utah new anti ...,0.21,0.26,0.23
2,8,hutchison telecommunication singapore technolo...,0.23,0.47,0.31
3,9,implication internet service operation strateg...,0.57,0.17,0.27
4,11,paper deal question social capital determine i...,0.3,0.12,0.17


In [21]:
print("---------------- TF-IDF RESULTS ----------------")
final_res = get_final_results(tf_idf_results)
algo_precision_scores.append(final_res[0])
algo_recall_scores.append(final_res[1])
algo_f1_scores.append(final_res[2])
print(f"1. Avg. Precision score on the whole document : {final_res[0]}")
print(f"2. Avg. Recall score on the whole document : {final_res[1]}")
print(f"3. Avg. F1 score on the whole document : {final_res[2]}")

---------------- TF-IDF RESULTS ----------------
1. Avg. Precision score on the whole document : 0.29417999999999994
2. Avg. Recall score on the whole document : 0.16390000000000002
3. Avg. F1 score on the whole document : 0.19407999999999997


### Results for KeyBert Algorithm

In [22]:
kb_data = key_bert_pred_df['Extracted_Keywords'].tolist()

In [23]:
kb_results = get_Rouge_scores(refrence_data, kb_data)

In [24]:
kb_results.head()

Unnamed: 0,Doc_no,Abstract,Avg_precision_score,Avg_recall_score,Avg_f1_score
0,6,eye past future sbc communication week created...,0.25,0.54,0.34
1,7,lawsuit alleges sprint violated utah new anti ...,0.19,0.21,0.2
2,8,hutchison telecommunication singapore technolo...,0.05,0.08,0.06
3,9,implication internet service operation strateg...,0.47,0.14,0.21
4,11,paper deal question social capital determine i...,0.25,0.09,0.13


In [25]:
print("---------------- KeyBERT RESULTS ----------------")
final_res = get_final_results(kb_results)
algo_precision_scores.append(final_res[0])
algo_recall_scores.append(final_res[1])
algo_f1_scores.append(final_res[2])
print(f"1. Avg. Precision score on the whole document : {final_res[0]}")
print(f"2. Avg. Recall score on the whole document : {final_res[1]}")
print(f"3. Avg. F1 score on the whole document : {final_res[2]}")

---------------- KeyBERT RESULTS ----------------
1. Avg. Precision score on the whole document : 0.311
2. Avg. Recall score on the whole document : 0.15896
3. Avg. F1 score on the whole document : 0.19533999999999999


### Results for LDA Algorithm

In [26]:
lda_data = lda_pred_df['Extracted_Keywords'].tolist()

In [27]:
lda_results = get_Rouge_scores(refrence_data, lda_data)

In [28]:
lda_results.head()

Unnamed: 0,Doc_no,Abstract,Avg_precision_score,Avg_recall_score,Avg_f1_score
0,6,eye past future sbc communication week created...,0.1,0.29,0.14
1,7,lawsuit alleges sprint violated utah new anti ...,0.26,0.42,0.32
2,8,hutchison telecommunication singapore technolo...,0.16,0.43,0.23
3,9,implication internet service operation strateg...,0.51,0.21,0.29
4,11,paper deal question social capital determine i...,0.24,0.13,0.17


In [29]:
print("---------------- LDA RESULTS ----------------")
final_res = get_final_results(lda_results)
algo_precision_scores.append(final_res[0])
algo_recall_scores.append(final_res[1])
algo_f1_scores.append(final_res[2])
print(f"1. Avg. Precision score on the whole document : {final_res[0]}")
print(f"2. Avg. Recall score on the whole document : {final_res[1]}")
print(f"3. Avg. F1 score on the whole document : {final_res[2]}")

---------------- LDA RESULTS ----------------
1. Avg. Precision score on the whole document : 0.26926
2. Avg. Recall score on the whole document : 0.19878
3. Avg. F1 score on the whole document : 0.21084


### Results for PositionRank Algorithm

In [30]:
pr_data = pr_pred_df['Extracted_Keywords'].tolist()

In [31]:
pr_results = get_Rouge_scores(refrence_data, pr_data)

In [32]:
pr_results.head()

Unnamed: 0,Doc_no,Abstract,Avg_precision_score,Avg_recall_score,Avg_f1_score
0,6,eye past future sbc communication week created...,0.09,0.25,0.14
1,7,lawsuit alleges sprint violated utah new anti ...,0.39,0.42,0.4
2,8,hutchison telecommunication singapore technolo...,0.45,0.69,0.55
3,9,implication internet service operation strateg...,0.37,0.08,0.13
4,11,paper deal question social capital determine i...,0.13,0.05,0.07


In [33]:
print("---------------- PositionRank RESULTS ----------------")
final_res = get_final_results(pr_results)
algo_precision_scores.append(final_res[0])
algo_recall_scores.append(final_res[1])
algo_f1_scores.append(final_res[2])
print(f"1. Avg. Precision score on the whole document : {final_res[0]}")
print(f"2. Avg. Recall score on the whole document : {final_res[1]}")
print(f"3. Avg. F1 score on the whole document : {final_res[2]}")

---------------- PositionRank RESULTS ----------------
1. Avg. Precision score on the whole document : 0.3404
2. Avg. Recall score on the whole document : 0.17837999999999998
3. Avg. F1 score on the whole document : 0.21072


### Comparing the overall results

In [34]:
algo_comparision = pd.DataFrame(zip(algorithm, algo_precision_scores, algo_recall_scores, algo_f1_scores), columns=['Algorithm','Avg Precision Score', 'Avg Recall Score','Avg F1 Scores'])

In [35]:
print("\n----- Comparision of various algorithms for keywords extraction on SemEval2017 dataset -----")
display(algo_comparision)


----- Comparision of various algorithms for keywords extraction on HULTH2003 dataset -----


Unnamed: 0,Algorithm,Avg Precision Score,Avg Recall Score,Avg F1 Scores_HULTH.ipynb
0,RAKE,0.34786,0.20124,0.2417
1,YAKE,0.32258,0.1612,0.19832
2,TF-IDF,0.29418,0.1639,0.19408
3,KeyBERT,0.311,0.15896,0.19534
4,LDA,0.26926,0.19878,0.21084
5,Position Rank,0.3404,0.17838,0.21072
