## LexRank: Graph-based Centrality as Salience in Text Summarization

Official Paper: http://tangra.si.umich.edu/~radev/lexrank/lexrank.pdf

GitHub: https://github.com/miso-belica/sumy

This program is to run LexRank's different combinations

In [None]:
import os
# directory of data
lexrank_extractive_dataset = "/home/s10166858/kawaijoe/Dataset/cnn-dailymail-master/lexrank_extractive_dataset/"

# directory to store generated summaries
system_dir = "/home/s10166858/kawaijoe/Extractive Methods/LexRank/sumy-dev/system"

#Import library essentials
from sumy.parsers.plaintext import PlaintextParser #Plain text parser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer #LexRank algorithm
from sumy.utils import get_stop_words #Stopwords
from sumy.nlp.stemmers import Stemmer #Stemming

## LexRank with No NLP (Completed)

In [None]:
for file in sorted(os.listdir(lexrank_extractive_dataset)):
    final_summary = ''
    
    parser = PlaintextParser.from_file(lexrank_extractive_dataset + file, Tokenizer('english'))
    summarizer = LexRankSummarizer()

    #Summarize the document
    summary = summarizer(parser.document, 4)
    for sentence in summary:
        final_summary = final_summary + str(sentence) + "\n"
        
    new_filename = "article" + str(file[:-5]) + "_system1.txt"
    new_filepath = os.path.join(system_dir, new_filename)

    with open(new_filepath, "a") as f:
        f.write(final_summary)

print("Running Completed")

## LexRank with Stopwords Removal Only (Completed)

In [None]:
for file in sorted(os.listdir(lexrank_extractive_dataset)):
    final_summary = ''
    
    parser = PlaintextParser.from_file(lexrank_extractive_dataset + file, Tokenizer('english'))
    summarizer = LexRankSummarizer()
    
    summarizer.stop_words = get_stop_words('english')

    #Summarize the document
    summary = summarizer(parser.document, 4)
    for sentence in summary:
        final_summary = final_summary + str(sentence) + "\n"
        
    new_filename = "article" + str(file[:-5]) + "_system1.txt"
    new_filepath = os.path.join(system_dir, new_filename)

    with open(new_filepath, "a") as f:
        f.write(final_summary)

print("Running Completed")

## LexRank with Stemming Only (Completed)

In [None]:
for file in sorted(os.listdir(lexrank_extractive_dataset)):
    final_summary = ''
    
    parser = PlaintextParser.from_file(lexrank_extractive_dataset + file, Tokenizer('english'))
    stemmer = Stemmer('english')
    summarizer = LexRankSummarizer(stemmer)

    #Summarize the document
    summary = summarizer(parser.document, 4)
    for sentence in summary:
        final_summary = final_summary + str(sentence) + "\n"
        
    new_filename = "article" + str(file[:-5]) + "_system1.txt"
    new_filepath = os.path.join(system_dir, new_filename)
    
    
    with open(new_filepath, "a") as f:
        f.write(final_summary)

print("Running Completed")

## LexRank with All NLP (Completed)

In [None]:
for file in sorted(os.listdir(lexrank_extractive_dataset)):
    final_summary = ''
    
    parser = PlaintextParser.from_file(lexrank_extractive_dataset + file, Tokenizer('english'))
    stemmer = Stemmer('english')
    summarizer = LexRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words('english')
    
    #Summarize the document
    summary = summarizer(parser.document, 4)
    for sentence in summary:
        final_summary = final_summary + str(sentence) + "\n"
        
    new_filename = "article" + str(file[:-5]) + "_system1.txt"
    new_filepath = os.path.join(system_dir, new_filename)
    
    
    with open(new_filepath, "a") as f:
        f.write(final_summary)

print("Running Completed")