In [1]:
import pickle
from rouge import Rouge

rouge = Rouge()

import nltk
import numpy as np
from make_data import get_datapoints as get_data

In [2]:
store = get_data("./LaySumm_Sample/")
print("Number of Datapoints:", len(store))

Number of Datapoints: 10


In [3]:
# Displaying All Fields
# Checking Correctness of Loader
# for key in store[0]['fulltext'].keys():
#     print(key)

In [4]:
def make_stats_dict():
    # if more fields are required
    # add fields in thsis block
    
    fields = {'count': 0,
              'lengths': [],
              'rouge_measures': []}
    return fields

In [5]:
def extract_rouge(rouge_dict):
    # extracts all rouge metrics
    # returns a numpy array
    # s.t additive properties can be used directly
    
    scores = []

    scores.append(100 * rouge_dict["rouge-1"]['f'])
    scores.append(100 * rouge_dict["rouge-1"]['p'])
    scores.append(100 * rouge_dict["rouge-1"]['r'])

    scores.append(100 * rouge_dict["rouge-2"]['f'])
    scores.append(100 * rouge_dict["rouge-2"]['p'])
    scores.append(100 * rouge_dict["rouge-2"]['r'])

    scores.append(100 * rouge_dict["rouge-l"]['f'])
    scores.append(100 * rouge_dict["rouge-l"]['p'])
    scores.append(100 * rouge_dict["rouge-l"]['r'])

    return np.asarray(scores)

In [6]:
def get_text(units):
    
    text = ""
    
    for sent in units:
        if sent == 'PARAGRAPH':
            continue
        tokens = nltk.word_tokenize(sent)
        
        for token in tokens:
            text += token.strip().rstrip("\n") + " "
            
    return text

In [7]:
section_stats = {}

for paper in store:
    
    abstract = get_text(paper['abstract']['abstract'])
    
    for key in paper['fulltext'].keys():
        if key == 'title':
            continue
            
        if key not in section_stats.keys():
            section_stats[key] = make_stats_dict()

        
        section = get_text(paper['fulltext'][key])
            
        try:
            scores = rouge.get_scores(section, abstract)[0]
        except:
            continue
        
        section_stats[key]['count'] += 1
        section_stats[key]['lengths'].append(len(section))
        section_stats[key]['rouge_measures'].append(extract_rouge(scores))

In [8]:
# Displaying Results
print("Section Head: Count".upper())
for key in section_stats.keys():
    if section_stats[key]['count'] >= 5:
        print(key + ": " + str(section_stats[key]['count']))

SECTION HEAD: COUNT
introduction: 10
patients and methods: 5
statistical analysis: 6
results: 10
discussion: 10
financial support: 9
conflict of interest: 10
authors’ contributions: 10


In [9]:
# Saving Results
filename = 'stats_LaySumm_Sample.pickle'
# dumping summaries into a pickle file for further loading and evaluation
with open(filename, 'wb') as f:
    pickle.dump(section_stats, f)

In [10]:
# ^_^ Thank You