#import the libraries NLTK - the Natural Language Toolkit.

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize

#here we are  using the text from Wikipedia black hole information for summerization.

In [2]:
text_str = '''
A black hole is a region of spacetime where gravity is so strong that nothing—no particles or even electromagnetic radiation such as light—can escape from it.[6] The theory of general relativity predicts that a sufficiently compact mass can deform spacetime to form a black hole.[7][8] The boundary of the region from which no escape is possible is called the event horizon. Although the event horizon has an enormous effect on the fate and circumstances of an object crossing it, according to general relativity it has no locally detectable features.[9] In many ways, a black hole acts like an ideal black body, as it reflects no light.[10][11] Moreover, quantum field theory in curved spacetime predicts that event horizons emit Hawking radiation, with the same spectrum as a black body of a temperature inversely proportional to its mass. This temperature is on the order of billionths of a kelvin for black holes of stellar mass, making it essentially impossible to observe.

Objects whose gravitational fields are too strong for light to escape were first considered in the 18th century by John Michell and Pierre-Simon Laplace.[12] The first modern solution of general relativity that would characterize a black hole was found by Karl Schwarzschild in 1916, although its interpretation as a region of space from which nothing can escape was first published by David Finkelstein in 1958. Black holes were long considered a mathematical curiosity; it was not until the 1960s that theoretical work showed they were a generic prediction of general relativity. The discovery of neutron stars by Jocelyn Bell Burnell in 1967 sparked interest in gravitationally collapsed compact objects as a possible astrophysical reality.

Black holes of stellar mass are expected to form when very massive stars collapse at the end of their life cycle. After a black hole has formed, it can continue to grow by absorbing mass from its surroundings. By absorbing other stars and merging with other black holes, supermassive black holes of millions of solar masses (M☉) may form. There is consensus that supermassive black holes exist in the centers of most galaxies.

The presence of a black hole can be inferred through its interaction with other matter and with electromagnetic radiation such as visible light. Matter that falls onto a black hole can form an external accretion disk heated by friction, forming quasars, some of the brightest objects in the universe. Stars passing too close to a supermassive black hole can be shred into streamers that shine very brightly before being "swallowed."[13] If there are other stars orbiting a black hole, their orbits can be used to determine the black hole's mass and location. Such observations can be used to exclude possible alternatives such as neutron stars. In this way, astronomers have identified numerous stellar black hole candidates in binary systems, and established that the radio source known as Sagittarius A*, at the core of the Milky Way galaxy, contains a supermassive black hole of about 4.3 million solar masses.

On 11 February 2016, the LIGO Scientific Collaboration and the Virgo collaboration announced the first direct detection of gravitational waves, which also represented the first observation of a black hole merger.[14] As of December 2018, eleven gravitational wave events have been observed that originated from ten merging black holes (along with one binary neutron star merger).[15][16] On 10 April 2019, the first direct image of a black hole and its vicinity was published, following observations made by the Event Horizon Telescope in 2017 of the supermassive black hole in Messier 87's galactic centre.[3][17][18]
'''

#For summerization we are using the four steps:
#1.Remove stop words
#2.Create frequency table of words - how many times each word appears in the text
#3.Assign score to each sentence depending on the words it contains and the frequency table
#4.Build summary by adding every sentence above a certain score threshold

In [3]:
#removing the stop words. 
#we create a dictionary for the word frequency table.
def frequency_table(text_str):
    
    stopWords = set(stopwords.words("english"))
    words = word_tokenize(text_str)
    ps = PorterStemmer()

    freqTable = dict()
    for word in words:
        word = word.lower()
        if word in stopWords:
            continue
        if word in freqTable:
            freqTable[word] += 1
        else:
            freqTable[word] = 1
    return freqTable

#score a sentence by its words
#Basic algorithm: adding the frequency of every non-stop word in a sentence divided by total no of words in a sentence.
#Notice that a potential issue with our score algorithm is that long sentences will have an advantage over short sentences. 
#To solve this, we're dividing every sentence score by the number of words in the sentence.
        

In [4]:
def score_sentences(sentences, freqTable):
    sentenceValue = dict()
    for sentence in sentences:
        word_count_in_sentence = (len(word_tokenize(sentence)))
        word_count_in_sentence_except_stop_words = 0
        for wordValue in freqTable:
            if wordValue in sentence.lower():
                word_count_in_sentence_except_stop_words += 1
                if sentence[:10] in sentenceValue:
                    sentenceValue[sentence[:10]] += freqTable[wordValue]
                else:
                    sentenceValue[sentence[:10]] = freqTable[wordValue]

        if sentence[:10] in sentenceValue:
            sentenceValue[sentence[:10]] = sentenceValue[sentence[:10]] / word_count_in_sentence_except_stop_words
    return sentenceValue

#Find the average score from the sentence value

In [5]:
def average_score(sentenceValue) -> int:
    sumValues = 0
    for entry in sentenceValue:
        sumValues += sentenceValue[entry]

    # Average value of a sentence from original text
    average = (sumValues / len(sentenceValue))

    return average


In [6]:
#defining the summary function for  creating the summary
def _summary(sentences, sentenceValue, threshold):
    sentence_count = 0
    summary = ''
    for sentence in sentences:
        if sentence[:10] in sentenceValue and sentenceValue[sentence[:10]] >= (threshold):
            summary += " " + sentence
            sentence_count += 1
    return summary

#We already have a sentence tokenizer, so we just need 
#to run the sent_tokenize() method to create the array of sentences.
   


In [7]:
#Calling here all the functions
def summarization(text_str):
    #Create the word frequency table
    freq_table = frequency_table(text_str)

   
    #Tokenize the sentences
    sentences = sent_tokenize(text_str)

    #Important Algorithm: score the sentences
    sentence_scores = score_sentences(sentences, freq_table)

    #Find the threshold
    threshold = average_score(sentence_scores)

    #Important Algorithm: Generate the summary
    summary = _summary(sentences, sentence_scores, 1.3 * threshold)

    return summary


#Printing the created summary

In [8]:
if __name__ == '__main__':
    result = summarization(text_str)
    print(result)

 [9] In many ways, a black hole acts like an ideal black body, as it reflects no light. After a black hole has formed, it can continue to grow by absorbing mass from its surroundings. There is consensus that supermassive black holes exist in the centers of most galaxies. "[13] If there are other stars orbiting a black hole, their orbits can be used to determine the black hole's mass and location.
