In [None]:
import bs4 as bs  
import urllib.request  
import re

import nltk
from nltk.tokenize import sent_tokenize
import nltk.corpus

# Getting the Summary (retrieve top 7 sentences and prints them)
import heapq 

# scrape the data from a site
def scrape_data_from_site(site):
    article_text = ""
    if(site):
        scraped_data = urllib.request.urlopen(site)
        article = scraped_data.read()

        parsed_article = bs.BeautifulSoup(article,'lxml')

        paragraphs = parsed_article.find_all('p')

        for p in paragraphs:  
            article_text += p.text
        
    return article_text

# Removing Square Brackets and Extra Spaces
def remove_square_brackets_and_extra_spaces(article_text): 
    article_text = re.sub(r'\[[0-9]*\]', ' ', article_text)  
    return re.sub(r'\s+', ' ', article_text)

# Removing special characters and digits
def remove_spl_chars_and_digits(article_text):
    formatted_article_text = re.sub('[^a-zA-Z]', ' ', article_text )  
    return re.sub(r'\s+', ' ', formatted_article_text)

# Find Weighted Frequency of Occurrence
def find_weighted_frequency_of_occurence(formatted_article_text):
    stopwords = nltk.corpus.stopwords.words('english')

    word_frequencies = {}  
    for word in nltk.word_tokenize(formatted_article_text):  
        if word not in stopwords:
            if word not in word_frequencies.keys():
                word_frequencies[word] = 1
            else:
                word_frequencies[word] += 1
    
    # To find the weighted frequency, we can simply divide the number of occurances 
    # of all the words by the frequency of the most occurring word
    maximum_frequncy = max(word_frequencies.values())

    for word in word_frequencies.keys():  
        word_frequencies[word] = (word_frequencies[word]/maximum_frequncy)
        
    return word_frequencies

# Calculating Sentence Scores
def calculate_sentence_scores(sentence_list, word_frequencies):
    sentence_scores = {}  
    for sent in sentence_list:  
        for word in nltk.word_tokenize(sent.lower()):
            if word in word_frequencies.keys():
                if len(sent.split(' ')) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = word_frequencies[word]
                    else:
                        sentence_scores[sent] += word_frequencies[word]
    return sentence_scores

def retrieve_top_sentences(sentence_scores, num):
    summary_sentences = heapq.nlargest(num, sentence_scores, key=sentence_scores.get)
    return ' \n\n'.join(summary_sentences) 

def get_top_summary_from_web_page(url, toplines): 
    article_text = scrape_data_from_site(url)

    article_text = remove_square_brackets_and_extra_spaces(article_text)

    formatted_article_text = remove_spl_chars_and_digits(article_text)

    # Converting Text To Sentences
    sentence_list = sent_tokenize(article_text)

    word_frequencies = find_weighted_frequency_of_occurence(formatted_article_text)

    sentence_scores = calculate_sentence_scores(sentence_list, word_frequencies)

    return retrieve_top_sentences(sentence_scores, toplines)

In [None]:
summary = get_top_summary_from_web_page('https://en.wikipedia.org/wiki/Cloud_computing', 5)
print(summary)

In [None]:
try:
    from googlesearch import search
except ImportError:
    print("No module named 'google' found")

# to search
query = "Quantum Computing"

sites=[]
for j in search(query, tld="com", num=10, start=0, stop=1, pause=2.0):
    summary = get_top_summary_from_web_page(j, 5)
    print(summary)