In [26]:
#Import required libraries

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize

In [27]:
text_str = '''
If Cristiano Ronaldo didn’t exist, would Lionel Messi have to invent him?

The question of how much these two other-worldly players inspire each other is an interesting one, 
and it’s tempting to imagine Messi sitting at home on Tuesday night, watching Ronaldo destroying Atletico,
angrily glaring at the TV screen and growling: “Right, I’ll show him!”

As appealing as that picture might be, however, it is probably a false one — from Messi’s perspective, at least.

He might show it in a different way, but Messi is just as competitive as Ronaldo. Rather than goals and personal glory,
however, the Argentine’s personal drug is trophies.

Ronaldo, it can be said, never looks happy on the field of play unless he’s just scored a goal — 
and even then he’s not happy for long, because he just wants to score another one. 
And that relentless obsession with finding the back of the net has undoubtedly played a major 
role in his stunning career achievements.

Messi, though, is a different animal, shown by the generosity with which he sets up team-mates 
even if he has a chance to shoot, regularly hands over penalty-taking duties to others and 
invariably celebrates a goal by turning straight to the player who passed him the ball with an appreciative smile.

Rather than being a better player than Ronaldo, Messi’s main motivations — according to 
the people who are close to him — are being the best possible version of Lionel Messi, 
and winning as many trophies as possible.

That theory was supported by Leicester boss Brendan Rodgers when I interviewed him for 
a book I recently wrote about Messi.

Do Messi and Ronaldo inspire each other? “Maybe subconsciously in some way they’ve 
driven each other on,” said Rodgers. “But I think both those players inherently have 
that hunger to be the best players they can be. With the very elite performers, that drive comes from within.”

Messi and Ronaldo ferociously competing with each other for everyone else’s acclaim 
is a nice story for fans to debate and the media to spread, but it’s probably not particularly true.
'''

In [29]:
#1. Create the word frequency table

def _create_frequency_table(text_string) -> dict:
    """
    we create a dictionary for the word frequency table.
    For this, we should only use the words that are not part of the stopWords array.
    Removing stop words and making frequency table
    Stemmer - an algorithm to bring words to its root word.
    :rtype: dict
    """
    stopWords = set(stopwords.words("english"))
    words = word_tokenize(text_string)
    ps = PorterStemmer()

    freqTable = dict()
    for word in words:
        word = ps.stem(word)
        if word in stopWords:
            continue
        if word in freqTable:
            freqTable[word] += 1
        else:
            freqTable[word] = 1

    return freqTable

In [30]:
#2. Score the sentences: Term frequency

def _score_sentences(sentences, freqTable) -> dict:
    """
    score a sentence by its words
    Basic algorithm: adding the frequency of every non-stop word in a sentence divided by total no of words in a sentence.
    :rtype: dict
    """
    sentenceValue = dict()

    for sentence in sentences:
        word_count_in_sentence = (len(word_tokenize(sentence)))
        word_count_in_sentence_except_stop_words = 0
        for wordValue in freqTable:
            if wordValue in sentence.lower():
                word_count_in_sentence_except_stop_words += 1
                if sentence[:10] in sentenceValue:
                    sentenceValue[sentence[:10]] += freqTable[wordValue]
                else:
                    sentenceValue[sentence[:10]] = freqTable[wordValue]

        if sentence[:10] in sentenceValue:
            sentenceValue[sentence[:10]] = sentenceValue[sentence[:10]] / word_count_in_sentence_except_stop_words
    return sentenceValue

In [31]:
#3. Find the threshold

def _find_average_score(sentenceValue) -> int:
    """
    Find the average score from the sentence value dictionary
    :rtype: int
    """
    sumValues = 0
    for entry in sentenceValue:
        sumValues += sentenceValue[entry]

    # Average value of a sentence from original text
    average = (sumValues / len(sentenceValue))

    return average

In [32]:
#4. Generate the summary

def _generate_summary(sentences, sentenceValue, threshold):
    sentence_count = 0
    summary = ''

    for sentence in sentences:
        if sentence[:10] in sentenceValue and sentenceValue[sentence[:10]] >= (threshold):
            summary += " " + sentence
            sentence_count += 1

    return summary

In [33]:
def run_summarization(text):
    # 1 Create the word frequency table
    freq_table = _create_frequency_table(text)
    '''
    We already have a sentence tokenizer, so we just need 
    to run the sent_tokenize() method to create the array of sentences.
    '''
    # 2 Tokenize the sentences
    sentences = sent_tokenize(text)
    # 3 Important Algorithm: score the sentences
    sentence_scores = _score_sentences(sentences, freq_table)
    # 4 Find the threshold
    threshold = _find_average_score(sentence_scores)
    # 5 Important Algorithm: Generate the summary
    summary = _generate_summary(sentences, sentence_scores, 1.3 * threshold)

    return summary

In [34]:
result = run_summarization(text_str)
print("Summarized text: \n\n",result)

Summarized text: 

  
If Cristiano Ronaldo didn’t exist, would Lionel Messi have to invent him? He might show it in a different way, but Messi is just as competitive as Ronaldo. Do Messi and Ronaldo inspire each other?
