In [362]:
# Import necessary libraries

import glob
import re
import time
import numpy as np
import xml.etree.ElementTree as ET
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
from rouge_score import rouge_scorer
from statistics import mean

In [328]:
def extract_abstract(element):
    abstract = ''
    if element.tag == 'ABSTRACT':
        for child in element:
            line = child.text 
            # Text preprocessing
            line = line.lower()
            line = line.replace("quot", "")
            line = re.sub(r'https?://\S+', '', line) 
            line = re.sub(r'[^\w\s]', '', line)
            # Append to abstract
            abstract = abstract + '\n' + line
    
    return abstract

In [329]:
# Function to extract paper contents from the XML tree
def extract_data(element):
    text_data = (element.text or "").strip()
    # Separate abstract text from rest of the paper
    if element.tag != 'ABSTRACT':
        # Text preprocessing
        text_data = text_data.lower()
        text_data = text_data.replace("quot", "")
        text_data = re.sub(r'https?://\S+', '', text_data) 
        text_data = re.sub(r'[^\w\s]', '', text_data)

        for child in element:
            text_data = np.append(text_data, extract_data(child))  

    return text_data

In [356]:
def get_contents(document_path):
    summary_path = glob.glob(document_path + '/summary/*.txt')[0]
    article_path = glob.glob(document_path + '/Documents_xml/*.xml')[0]
    
    summary_file = open(summary_path)
    summary_content = summary_file.read()

    article_tree = ET.parse(article_path)
    article_root = article_tree.getroot()

    abstract_element = None

    for child in article_root:
        if child.tag == 'ABSTRACT':
            abstract_element = child

    sentences = extract_data(article_root)

    sentences = [s for s in list(sentences) if s != '']

    title = sentences[0]

    sentences = sentences[1:]

    sentence_embeddings = embedder.encode(sentences)

    if abstract_element: 
        abstract = extract_abstract(abstract_element)
        abstract = title + abstract 
    else:
        abstract = []

    contents = {'title': title, 'sentences': sentences, 'embeddings': sentence_embeddings, 'summary': summary_content, 'abstract': abstract}

    document_name = document_path.split('/')[-1]

    # data[document_name] = contents
    
    return contents

In [332]:
def generate_sim_matrix(contents):
    sentences = contents['sentences']
    sentence_embeddings = contents['embeddings']
    # similarity matrix
    sim_mat = np.zeros([len(sentences), len(sentences)])   

    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                sim_mat[i][j] = cosine_similarity(sentence_embeddings[i].reshape(1,768), sentence_embeddings[j].reshape(1,768))[0,0] 
                # sim_mat[i][j] = len([word for word in sentences[i].split() if word in sentences[j].split()])/(np.log(len(sentences[i])) + np.log(len(sentences[i]))
    
    return sim_mat

In [333]:
def textRank(sim_mat, sentences):
    nx_graph = nx.from_numpy_array(sim_mat)
    scores = nx.pagerank(nx_graph)
    ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
    return ranked_sentences

In [334]:
def generate_summary(title, ranked_sentences, n):
    result = title
    for i in range(n):
        # print(f"{i}.{ranked_sentences[i][1]}")
        result = result + '\n ' + ranked_sentences[i][1]
    return result

In [374]:
def evaluate(reference_summaries, generated_summaries):
    # Use Rouge score technique to evaluate text summary
    rouge_scoring = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=False)

    score_dict = {  "r1-precision":[]
                   , "r1-recall":[]
                   , "r1-f1-score":[]
                   , "r2-precision":[]
                   , "r2-recall":[]
                   , "r2-f1-score":[]
                   , "rL-precision":[]
                   , "rL-recall":[]
                   , "rL-f1-score":[]
                    }

    for i, (reference_summary, summary) in enumerate(zip(reference_summaries, generated_summaries)):
        scores = rouge_scoring.score(reference_summary, summary)
        # print(f"ROUGE scores for generated summary {i+1}:")
        # print("ROUGE-1 Precision:", scores['rouge1'].precision)
        # print("ROUGE-1 Recall:", scores['rouge1'].recall)
        # print("ROUGE-1 F1-Score:", scores['rouge1'].fmeasure)
        # print("ROUGE-2 Precision:", scores['rouge2'].precision)
        # print("ROUGE-2 Recall:", scores['rouge2'].recall)
        # print("ROUGE-2 F1-Score:", scores['rouge2'].fmeasure)
        # print("ROUGE-L Precision:", scores['rougeL'].precision)
        # print("ROUGE-L Recall:", scores['rougeL'].recall)
        # print("ROUGE-L F1-Score:", scores['rougeL'].fmeasure)
        # print("---------------------")

        score_dict["r1-precision"].append(scores['rouge1'].precision)
        score_dict["r1-recall"].append(scores['rouge1'].recall)
        score_dict["r1-f1-score"].append(scores['rouge1'].fmeasure)
        score_dict["r2-precision"].append(scores['rouge2'].precision)
        score_dict["r2-recall"].append(scores['rouge2'].recall)
        score_dict["r2-f1-score"].append(scores['rouge2'].fmeasure)
        score_dict["rL-precision"].append(scores['rougeL'].precision)
        score_dict["rL-recall"].append(scores['rougeL'].recall)
        score_dict["rL-f1-score"].append(scores['rougeL'].fmeasure)
    
    return score_dict



In [351]:
data_directory='./data/scisummnet_release1.1__20190413/top1000_complete'
embedder = SentenceTransformer('distilbert-base-nli-mean-tokens')
data = {}
n = 10

In [352]:
abstracts = []
manual_summaries = []
generated_summaries = []

In [358]:
for i, doc in enumerate(glob.glob(data_directory + '/*')[0:10]):
    start_time = time.time()
    contents = get_contents(doc)
    sim_mat = generate_sim_matrix(contents)
    ranked_sentences = textRank(sim_mat, contents['sentences'])
    result = generate_summary(contents['title'], ranked_sentences, n)
    abstracts.append(contents['abstract'])
    manual_summaries.append(contents['summary'])
    generated_summaries.append(result)
    end_time = time.time()
    print(i+1, doc, len(contents['sentences']), 'Duration:', end_time - start_time)

1 ./data/scisummnet_release1.1__20190413/top1000_complete/P99-1008 123 Duration: 4.377468824386597
2 ./data/scisummnet_release1.1__20190413/top1000_complete/J93-2003 677 Duration: 97.07724070549011
3 ./data/scisummnet_release1.1__20190413/top1000_complete/W08-0336 184 Duration: 8.469384908676147
4 ./data/scisummnet_release1.1__20190413/top1000_complete/N10-1119 157 Duration: 6.893887996673584
5 ./data/scisummnet_release1.1__20190413/top1000_complete/P04-3022 77 Duration: 1.919508934020996
6 ./data/scisummnet_release1.1__20190413/top1000_complete/J06-3003 600 Duration: 70.17219305038452
7 ./data/scisummnet_release1.1__20190413/top1000_complete/W08-0309 244 Duration: 13.543020963668823
8 ./data/scisummnet_release1.1__20190413/top1000_complete/P08-1114 145 Duration: 6.138139963150024
9 ./data/scisummnet_release1.1__20190413/top1000_complete/P87-1033 139 Duration: 5.61949610710144
10 ./data/scisummnet_release1.1__20190413/top1000_complete/J98-1006 338 Duration: 24.171996116638184


In [378]:
rouge_scores_manual_summaries = evaluate(manual_summaries, generated_summaries)
rouge_scores_abstracts = evaluate(abstracts, generated_summaries)

print('Average ROUGE-1 F1-Score:', mean(rouge_scores_manual_summaries['r1-f1-score']))
print('Average ROUGE-1 F1-Score:', mean(rouge_scores_abstracts['r1-f1-score']))

Average ROUGE-1 F1-Score: 0.3410811142281667
Average ROUGE-1 F1-Score: 0.2973768448016149


Ways to improve:

1 Remove abstract from paper content
2 Remove stop words
3 Rearrange sentences in summary according to their order of appearance in the paper
4 Use different embedding techniques
5 Use other weight calculations such as isf-modified-cosine-similarity
6 Understand the relationship between ROUGE scores and length of the summary and set n accordingly
7 Find other evaluation metrics to compare
8 Stem words in the summaries before comparison