In [22]:
import re
import bs4 as bs
from urllib.request import urlopen
import nltk
import heapq

In [23]:
# get source data
source =urlopen("https://en.wikipedia.org/wiki/Global_warming").read()

In [24]:
# get data from html ( parse html )
soup =bs.BeautifulSoup(source)

In [25]:
# parse text
text =""

for para in soup.findAll('p'):
    text += para.text
    
text =re.sub(r"\[[0-9]*\]"," ", text)
text =re.sub(r"\s+"," ", text)
clean_text =text.lower()
clean_text =re.sub(r"\W", " ", clean_text)
clean_text =re.sub(r"\d", " ", clean_text)
clean_text =re.sub(r"\s+", " ", clean_text)

In [26]:
# tokenize text into sentences
sentences =nltk.sent_tokenize(text)

In [27]:
# get english stopwords
stopwords =nltk.corpus.stopwords.words('english')

In [28]:
# calculate words score
word2score ={}

for word in nltk.word_tokenize(clean_text):
    if word not in stopwords:
        if word not in word2score.keys():
            word2score[word] = 1
        else:
            word2score[word] += 1
            
for key in word2score.keys():
    word2score[key] =word2score[key] / max(word2score.values())

In [29]:
word2score

{'global': 0.675,
 'warming': 0.7666666666666667,
 'long': 0.06666666666666667,
 'term': 0.09166666666666666,
 'rise': 0.11666666666666667,
 'average': 0.058333333333333334,
 'temperature': 0.25,
 'earth': 0.15833333333333333,
 'climate': 1.0,
 'system': 0.23943661971830985,
 'major': 0.11267605633802817,
 'aspect': 0.014084507042253521,
 'current': 0.14084507042253522,
 'change': 1.0,
 'demonstrated': 0.020833333333333332,
 'direct': 0.08333333333333333,
 'measurements': 0.10416666666666667,
 'various': 0.125,
 'effects': 0.5208333333333334,
 'terms': 0.041666666666666664,
 'often': 0.041666666666666664,
 'used': 0.14583333333333334,
 'interchangeably': 0.020833333333333332,
 'however': 0.08333333333333333,
 'speaking': 0.020833333333333332,
 'properly': 0.020833333333333332,
 'denotes': 0.020833333333333332,
 'mainly': 0.0625,
 'human': 0.3541666666666667,
 'caused': 0.22916666666666666,
 'increase': 0.4166666666666667,
 'surface': 0.5,
 'temperatures': 0.4375,
 'projected': 0.041666

In [30]:
# calculate sentences score
sent2score ={}

for sentence in sentences:
    for word in nltk.word_tokenize(sentence.lower()):
        if word in word2score.keys():
            if len(sentence.split(' ')) < 30:
                if sentence not in sent2score.keys():
                    sent2score[sentence] = word2score[word]
                else:
                    sent2score[sentence] += word2score[word]

In [31]:
sent2score

{" Global warming is the long-term rise in the average temperature of the Earth's climate system.": 3.2644366197183103,
 'It is a major aspect of current climate change, and has been demonstrated by direct temperature measurements and by measurements of various effects of the warming.': 4.2426056338028175,
 'The terms global warming and climate change are often used interchangeably.': 3.691666666666667,
 'There have been prehistoric periods of global warming, but many observed changes since the mid-20th century have been unprecedented over decades to millennia.': 3.462500000000001,
 'The largest human influence has been the emission of greenhouse gases such as carbon dioxide, methane, and nitrous oxide.': 3.4375,
 'These findings have been recognized by the national science academies of the major industrialized nations and are not disputed by any scientific body of national or international standing.': 1.8460093896713614,
 'The effects of global warming include rising sea levels, regio

In [32]:
# get best sentences
best_sentences =heapq.nlargest(5,sent2score, key=sent2score.get)

In [33]:
best_sentences

['People who regard climate change as catastrophic, irreversible, or rapid might label climate change as a climate crisis or a climate emergency.',
 'Regions dependent on glacier water, regions that are already dry, and small islands are also at increased risk of water stress due to climate change.',
 'Climate change can be mitigated through the reduction of greenhouse gas emissions or the enhancement of the capacity of carbon sinks to absorb greenhouse gases from the atmosphere.',
 'One potential source of abrupt climate change would be the rapid release of methane and carbon dioxide from permafrost, which would amplify global warming.',
 'Abrupt climate change, tipping points in the climate system: Climate change could result in global, large-scale changes.']