# Overview
* Summarize articles by scoring each sentence based on word frequency

In [1]:
import requests
from bs4 import BeautifulSoup
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from string import punctuation
from nltk.probability import FreqDist
from heapq import nlargest
from collections import defaultdict

In [19]:
# Encapsulating the parsing logic into a function
def getTextWaPo(url):
    # download page
    page = requests.get(url)
    # Instantiate soup object
    soup = BeautifulSoup(page.content)
    # Find all elements with a article tag, extract the text and join into one single string
    text = ' '.join(map(lambda p: p.text, soup.find_all('article')))
    # Remove all \xa0 and \u2009
    text = text.replace(r'\\[uxc][abcdef]{0,2}\d{0,4}', ' ')
    return text

In [20]:
url = "https://www.washingtonpost.com/local/social-issues/americans-views-flipped-on-gay-rights-how-did-minds-change-so-quickly/2019/06/07/ae256016-8720-11e9-98c1-e945ae5db8fb_story.html?utm_term=.107ac30131f0"
text = getTextWaPo(url)
getTextWaPo(url)

"      By  Samantha Schmidt          Samantha Schmidt Reporter covering gender and family issues  Email  Bio  Follow         June 7 at 7:30 PM  Steve and Teri Augustine met, fell in love and got married in a conservative evangelical Christian community. They grew up believing homosexuality was a sin, and that the “gay agenda” was an attack on their values. Then, six years ago, their son Peter — their youngest child who loved theater and his church youth group — returned home to Ellicott City, Md., from his freshman year of college and came out to his family as gay. Teri asked her son not to tell anyone else, and drove herself to a mall parking lot to cry. Steve questioned his son’s faith, reciting Bible passages from Corinthians. The Augustines decided to put their son through a year of conversion therapy, determined to “set him straight.” But after the therapy failed, something changed. Steve and Teri Augustine started meeting Peter’s friends and inviting other gay Christians to dinne

In [8]:
# Encapsulate summary logic into function
def summarize(text, n):
    '''
    This function takes a string of text and the desired number of sentences in the ouput summary as inputs.
    '''
    # Get list of individual sentences in the text
    sents = sent_tokenize(text)
    
    # The full text must be longer than the desired summary
    assert n <= len(sents)
    # Get list of individual words in the text
    word_sent = word_tokenize(text.lower())
    # Create a set of stopwords
    _stopwords = set(stopwords.words('english') + list(punctuation) + ['“','”'])
    
    # Remove stopwords from our text
    word_sent = [word for word in word_sent if word not in _stopwords]
    # Construct a frequency distribution of words
    freq = FreqDist(word_sent)
    
    # Compute the significance score of each sentence by adding up the word frequencies. Add each ranking to a dictionary for lookup.
    ranking = defaultdict(int)
    for i, sent in enumerate(sents):
        for w in word_tokenize(sent.lower()):
            if w in freq:
                ranking[i] += freq[w]
    
    #  Pick most important sentences based on signifiance score
    sents_idx = nlargest(n, ranking, key=ranking.get)
    # Reorder sentences based on order in original text
    return " ".join([sents[j] for j in sorted(sents_idx)])

In [18]:
summarize(text, 4)

"But while it’s clear that the gay rights movement managed to change people’s minds faster than any other civil rights movement in memory, it’s less clear why. Uneven attitudes  In a study published earlier this year, Mahzarin Banaji, a psychology professor at Harvard University, investigated patterns of long-term changes in attitudes toward six social groups — the elderly, the disabled, the overweight, black people, people with darker skin tones and gay people — over a decade. “It is not segregated.” The more connections Americans made with gay or lesbian people, the more positive their attitudes toward them became — a trend social scientists call “the contact hypothesis.” And families like the Augustines were forced to resolve an internal dilemma that social scientists call “cognitive dissonance.” “When two beliefs come into conflict in our minds, our brains are not good at just holding the conflict. 'Love was in place' If the relationships gay and lesbian people have with those clos