In [615]:
# Import necessary libraries

import glob
import re
import time
import numpy as np
import xml.etree.ElementTree as ET
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
from rouge_score import rouge_scorer
from statistics import mean
import nltk
from nltk.corpus import stopwords
import pandas as pd


In [328]:
# Function to extract abstract from XML tree
def extract_abstract(element):
    abstract = ''
    if element.tag == 'ABSTRACT':
        for child in element:
            line = child.text 
            # Text preprocessing
            line = line.lower()
            line = line.replace("quot", "")
            line = re.sub(r'https?://\S+', '', line) 
            line = re.sub(r'[^\w\s]', '', line)
            # Append to abstract
            abstract = abstract + '\n' + line
    
    return abstract

In [570]:
# Function to extract paper text from XML tree
def extract_data(element):
    text_data = (element.text or "").strip()
    # Separate abstract text from rest of the paper
    if element.tag != 'ABSTRACT':
        # Text preprocessing
        text_data = text_data.lower()
        text_data = text_data.replace("quot", "")
        text_data = re.sub(r'https?://\S+', '', text_data) 
        text_data = re.sub(r'[^\w\s]', '', text_data)

        for child in element:
            text_data = np.append(text_data, extract_data(child))  

    return text_data

In [540]:
def generate_sentence_embeddings(sentences):
    clean_sentences = []
    for snt in list(sentences):
        clean_snt = [word for word in snt.split() if word not in stop_words]
        clean_sentences.append(" ".join(clean_snt))
    

    sentence_embeddings = embedder.encode(clean_sentences)
    return sentence_embeddings

In [476]:
# Function to extract all data related to a paper from dataset
def get_contents(document_path):
    summary_path = glob.glob(document_path + '/summary/*.txt')[0]
    article_path = glob.glob(document_path + '/Documents_xml/*.xml')[0]
    
    summary_file = open(summary_path)
    summary_content = summary_file.read()

    article_tree = ET.parse(article_path)
    article_root = article_tree.getroot()

    abstract_element = None

    for child in article_root:
        if child.tag == 'ABSTRACT':
            abstract_element = child

    sentences = extract_data(article_root)

    sentences = [s for s in list(sentences) if s != '']

    title = sentences[0]

    sentences = sentences[1:]

    sentence_embeddings = generate_sentence_embeddings(sentences)

    if abstract_element: 
        abstract = extract_abstract(abstract_element)
        abstract = title + abstract 
    else:
        abstract = ''

    contents = {'title': title, 'sentences': sentences, 'embeddings': sentence_embeddings, 'summary': summary_content, 'abstract': abstract}

    document_name = document_path.split('/')[-1]

    data[document_name] = contents
    
    return contents

In [332]:
# Function to generate the similarity matrix of the paper
def generate_sim_matrix(contents):
    sentences = contents['sentences']
    sentence_embeddings = contents['embeddings']
    # similarity matrix
    sim_mat = np.zeros([len(sentences), len(sentences)])   

    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                sim_mat[i][j] = cosine_similarity(sentence_embeddings[i].reshape(1,768), sentence_embeddings[j].reshape(1,768))[0,0] 
                # sim_mat[i][j] = len([word for word in sentences[i].split() if word in sentences[j].split()])/(np.log(len(sentences[i])) + np.log(len(sentences[i]))
    
    return sim_mat

In [333]:
# Function to compute textRank of sentences in the paper
def textRank(sim_mat, sentences):
    nx_graph = nx.from_numpy_array(sim_mat)
    scores = nx.pagerank(nx_graph)
    ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
    return ranked_sentences

In [457]:
# Function to generate a summary using top ranking sentences
def generate_summary(title, sentences, ranked_sentences, n):
    sentence_positions = []
    result = title
    for i in range(n):
        # print(f"{i}.{ranked_sentences[i][1]}")
        sentence_positions.append(list(sentences).index(ranked_sentences[i][1]))
    
    sentence_positions_sorted = sorted(sentence_positions)
    
    for pos in sentence_positions_sorted:
        result = result + '\n' + sentences[pos]
    return result

In [627]:
# Function to evaluate the model performance
def evaluate(ls_documents, reference_summaries, generated_summaries):
    # Use Rouge score technique to evaluate text summary
    rouge_scoring = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=False)

    score_dict = {   "document":[]
                   , "r1-precision":[]
                   , "r1-recall":[]
                   , "r1-f1-score":[]
                   , "r2-precision":[]
                   , "r2-recall":[]
                   , "r2-f1-score":[]
                   , "rL-precision":[]
                   , "rL-recall":[]
                   , "rL-f1-score":[]
                    }

    for i, (reference_summary, summary) in enumerate(zip(reference_summaries, generated_summaries)):
        score_dict['document'].append(ls_documents[i].split('/')[-1])
        if reference_summary == '':
            score_dict["r1-precision"].append(0)
            score_dict["r1-recall"].append(0)
            score_dict["r1-f1-score"].append(0)
            score_dict["r2-precision"].append(0)
            score_dict["r2-recall"].append(0)
            score_dict["r2-f1-score"].append(0)
            score_dict["rL-precision"].append(0)
            score_dict["rL-recall"].append(0)
            score_dict["rL-f1-score"].append(0)
        else:
            scores = rouge_scoring.score(reference_summary, summary)
            # print(f"ROUGE scores for generated summary {i+1}:")
            # print("ROUGE-1 Precision:", scores['rouge1'].precision)
            # print("ROUGE-1 Recall:", scores['rouge1'].recall)
            # print("ROUGE-1 F1-Score:", scores['rouge1'].fmeasure)
            # print("ROUGE-2 Precision:", scores['rouge2'].precision)
            # print("ROUGE-2 Recall:", scores['rouge2'].recall)
            # print("ROUGE-2 F1-Score:", scores['rouge2'].fmeasure)
            # print("ROUGE-L Precision:", scores['rougeL'].precision)
            # print("ROUGE-L Recall:", scores['rougeL'].recall)
            # print("ROUGE-L F1-Score:", scores['rougeL'].fmeasure)
            # print("---------------------")

            score_dict["r1-precision"].append(scores['rouge1'].precision)
            score_dict["r1-recall"].append(scores['rouge1'].recall)
            score_dict["r1-f1-score"].append(scores['rouge1'].fmeasure)
            score_dict["r2-precision"].append(scores['rouge2'].precision)
            score_dict["r2-recall"].append(scores['rouge2'].recall)
            score_dict["r2-f1-score"].append(scores['rouge2'].fmeasure)
            score_dict["rL-precision"].append(scores['rougeL'].precision)
            score_dict["rL-recall"].append(scores['rougeL'].recall)
            score_dict["rL-f1-score"].append(scores['rougeL'].fmeasure)
    
    return score_dict



In [628]:
# Set input data path and initialize
data_directory='./data/scisummnet_release1.1__20190413/top1000_complete'

nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

# embedder = SentenceTransformer('distilbert-base-nli-mean-tokens')
embedder = SentenceTransformer('allenai-specter')

data = {}
n = 10

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sumukhiganesan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [629]:
# Create lists to receive outputs
abstracts = []
manual_summaries = []
generated_summaries = []

In [630]:
# Iterate over papers in the dataset to generate sumamries
ls_documents = glob.glob(data_directory + '/*')

for i, doc in enumerate(ls_documents):
    start_time = time.time()
    contents = get_contents(doc)
    sim_mat = generate_sim_matrix(contents)
    ranked_sentences = textRank(sim_mat, contents['sentences'])
    result = generate_summary(contents['title'], contents['sentences'], ranked_sentences, n)
    abstracts.append(contents['abstract'])
    manual_summaries.append(contents['summary'])
    generated_summaries.append(result)
    end_time = time.time()
    print(i+1, doc, len(contents['sentences']), 'Duration:', end_time - start_time)

1 ./data/scisummnet_release1.1__20190413/top1000_complete/P99-1008 123 Duration: 5.086650848388672
2 ./data/scisummnet_release1.1__20190413/top1000_complete/J93-2003 677 Duration: 96.30519914627075
3 ./data/scisummnet_release1.1__20190413/top1000_complete/W08-0336 184 Duration: 9.461012840270996
4 ./data/scisummnet_release1.1__20190413/top1000_complete/N10-1119 157 Duration: 8.185255289077759


In [633]:
# Evaluate model performance
rouge_scores_manual_summaries = evaluate(ls_documents, manual_summaries, generated_summaries)
rouge_scores_abstracts = evaluate(ls_documents, abstracts, generated_summaries)


# Display scores
print('Average Scores against Manual Summaries:')
print('ROUGE-1 Precision:', mean(rouge_scores_manual_summaries['r1-precision']))
print('ROUGE-1 Recall:', mean(rouge_scores_manual_summaries['r1-recall']))
print('ROUGE-1 F1-Score:', mean(rouge_scores_manual_summaries['r1-f1-score']))
print('ROUGE-2 Precision:', mean(rouge_scores_manual_summaries['r2-precision']))
print('ROUGE-2 Recall:', mean(rouge_scores_manual_summaries['r2-recall']))
print('ROUGE-2 F1-Score:', mean(rouge_scores_manual_summaries['r2-f1-score']))
print('ROUGE-L Precision:', mean(rouge_scores_manual_summaries['rL-precision']))
print('ROUGE-L Recall:', mean(rouge_scores_manual_summaries['rL-recall']))
print('ROUGE-L F1-Score:', mean(rouge_scores_manual_summaries['rL-f1-score']))

print('-------------------------------------')

print('Average Scores against Abstracts:')
print('ROUGE-1 Precision:', mean(rouge_scores_abstracts['r1-precision']))
print('ROUGE-1 Recall:', mean(rouge_scores_abstracts['r1-recall']))
print('ROUGE-1 F1-Score:', mean(rouge_scores_abstracts['r1-f1-score']))
print('ROUGE-2 Precision:', mean(rouge_scores_abstracts['r2-precision']))
print('ROUGE-2 Recall:', mean(rouge_scores_abstracts['r2-recall']))
print('ROUGE-2 F1-Score:', mean(rouge_scores_abstracts['r2-f1-score']))
print('ROUGE-L Precision:', mean(rouge_scores_abstracts['rL-precision']))
print('ROUGE-L Recall:', mean(rouge_scores_abstracts['rL-recall']))
print('ROUGE-L F1-Score:', mean(rouge_scores_abstracts['rL-f1-score']))



Average Scores against Manual Summaries:
ROUGE-1 Precision: 0.314546479579676
ROUGE-1 Recall: 0.48383904695638025
ROUGE-1 F1-Score: 0.3741649709834971
ROUGE-2 Precision: 0.08426490588009021
ROUGE-2 Recall: 0.1310524035713424
ROUGE-2 F1-Score: 0.10076841415076362
ROUGE-L Precision: 0.1593792301188855
ROUGE-L Recall: 0.24640929771990466
ROUGE-L F1-Score: 0.18987792745554333
-------------------------------------
Average Scores against Abstracts:
ROUGE-1 Precision: 0.2596313600433187
ROUGE-1 Recall: 0.5039695340501792
ROUGE-1 F1-Score: 0.333268400604966
ROUGE-2 Precision: 0.07250590708509119
ROUGE-2 Recall: 0.14135966098222424
ROUGE-2 F1-Score: 0.0932611894909891
ROUGE-L Precision: 0.14475528574142496
ROUGE-L Recall: 0.2855501792114695
ROUGE-L F1-Score: 0.18672112517721687


In [632]:
# Save scores

df1 = pd.DataFrame(rouge_scores_manual_summaries)
df1.to_csv('./rouge_scores_manual_summaries.csv')

df2 = pd.DataFrame(rouge_scores_abstracts)
df2.to_csv('./rouge_scores_abstracts.csv')
