### Importing all the essential libraries

In [1]:
### Importing basic NLTK library ###
import nltk
### Importing Tokenization algorithms from NLTK ###
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from string import punctuation
### Importing the DefaultDict library from Python
from collections import defaultdict
### Importing Built-in-Python function for n-largest elements in a list ###
from heapq import nlargest

### Defining the Class FreqSummarizer

#### The FreqSummarizer class will:
&nbsp;&nbsp;&nbsp;&nbsp;1. Eliminate the stopwords <br/>
&nbsp;&nbsp;&nbsp;&nbsp;2. Find the frequency of the words <br/>
&nbsp;&nbsp;&nbsp;&nbsp;3. Assign a score of importance for each word <br/>
&nbsp;&nbsp;&nbsp;&nbsp;4. Rank the sentences based on the score <br/>

In [2]:
class FreqSummarizer:
    
### Initializing the characteristics of the Member function
    def __init__(self, min_cut = 0.1, max_cut = 0.9):
        self.min_cut = min_cut
        self.max_cut = max_cut
        self._stopwords = set(stopwords.words('english') + list(punctuation))
    
### Function for the Dictionary of Words:Frequencies as the key-value pair
    def _compute_freq(self, word_sent):
        freq = defaultdict(int)
        
###     Iterating through the words in sentences for incrementing the Frequency count
        for sentence in word_sent:
            for word in sentence:
                if word not in self._stopwords:
                    freq[word] += 1

###     Normalizing the Frequency and Pruning the results based on Caps
        max_freq = float(max(freq.values()))
        for word in list(freq.keys()):
            freq[word] = freq[word]/max_freq
            if freq[word] >= self.max_cut or freq[word] <= self.min_cut:
                del freq[word]
        return freq
    
### Function for assigning a score to a sentence based on the Frequency of words
    def summarizer(self, text, n):
###     Splitting text into list of sentences
        sents = sent_tokenize(text)
###     Performing a sanity check on the inputted article
        assert n <= len(sents)
###     Creating a list of words from the sentences in the article
        word_sent = [word_tokenize(s.lower()) for s in sents]
    
        self._freq = self._compute_freq(word_sent)
        
        ranking = defaultdict(int)

###     Creating a tuple of indices and sentences and incrementing the Rankings dictionary
        for i, sent in enumerate(word_sent):
            for word in sent:
                if word in self._freq:
                    ranking[i] += self._freq[word]

###     Sorting the sentences based on their frequencies
        sents_idx = nlargest(n, ranking, key = ranking.get)
        
        return [sents[j] for j in sents_idx]

### Downloading the Article and Extracting the Information

#### Importing the essential libraries

In [3]:
import urllib.request
from bs4 import BeautifulSoup

#### Defining a Function to Extract the Text from the Article URL

In [4]:
def extract_text_wp(url):
### Downloading the Page's HTML content    
    page = urllib.request.urlopen(url).read().decode('utf8')
### Initializing a Soup object    
    soup = BeautifulSoup(page)
### Extracting and combining all the <p> content from the <article> tag
    text = ' '.join(map(lambda p: p.text, soup.find_all('article')))
### Initializing a second Soup object
    soup2 = BeautifulSoup(text)
### Joining 
    text = ' '.join(map(lambda p: p.text, soup2.find_all('p')))
    
    return soup.title.text, text

### Auto-Summarization of the Article

In [5]:
justAnotherUrl = "https://www.washingtonpost.com/news/innovations/wp/2014/10/01/the-incredible-potential-and-dangers-of-data-mining-health-records/"

In [6]:
justTheText = extract_text_wp(justAnotherUrl)



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [7]:
fs = FreqSummarizer()

In [8]:
summary = fs.summarizer(justTheText[1], 3)

In [9]:
print('TITLE:' + (justTheText[0]) + '\033[0m' + "\n\n" + "\n\n".join(summary))

TITLE: The incredible potential and dangers of data mining health records - The Washington Post[0m

“You really have to battle with Silicon Valley and the Boston academic scene.” “Why would someone who is really really good at analyzing data come to work for a health care organization and make X dollars when they could go to Google and make 10X dollars?” Marko added.

While they universally agree that data mining — the examination and analysis of huge batches of information — could invigorate health care, they caution that any sort of accurate estimate would be impossible.

It’s the kind of potential Google chief executive Larry Page hinted at when he told the New York Times earlier this year that “we’d probably save 100,000 lives next year,” if we data mined health care data.
