In [1]:
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize


In [2]:
def _create_frequency_table(text_string) -> dict:

    stopWords = set(stopwords.words("english"))
    words = word_tokenize(text_string)
    ps = PorterStemmer()

    freqTable = dict()
    for word in words:
        word = ps.stem(word)
        if word in stopWords:
            continue
        if word in freqTable:
            freqTable[word] += 1
        else:
            freqTable[word] = 1

    return freqTable

In [3]:
with open('passage.txt', 'r') as file:
    text = file.read()
freq_table = _create_frequency_table(text)

In [4]:
freq_table

{'mike': 6,
 'morri': 6,
 'live': 2,
 'villag': 6,
 '.': 34,
 'largest': 1,
 'jewelri': 1,
 'shop': 1,
 ',': 20,
 'wa': 10,
 'poor': 1,
 'farmer': 1,
 'larg': 3,
 'famili': 10,
 'mani': 1,
 'son': 2,
 'daughters-in-law': 2,
 'grandchildren': 1,
 'one': 4,
 'fine': 1,
 'day': 1,
 'tire': 1,
 'abl': 1,
 'feed': 2,
 'hi': 12,
 'decid': 2,
 'leav': 1,
 'move': 1,
 'citi': 2,
 'certain': 1,
 'earn': 2,
 'enough': 1,
 'everyon': 4,
 'along': 1,
 'left': 2,
 'At': 1,
 'night': 1,
 'stop': 2,
 'tree': 7,
 'stream': 1,
 'run': 2,
 'nearbi': 1,
 'could': 1,
 'freshen': 1,
 'themselv': 1,
 'He': 8,
 'told': 3,
 'clear': 1,
 'area': 1,
 'wife': 3,
 'fetch': 2,
 'water': 2,
 'instruct': 1,
 'make': 2,
 'fire': 2,
 'start': 3,
 'cut': 1,
 'wood': 2,
 'didnâ€™t': 1,
 'know': 2,
 'branch': 2,
 'thief': 4,
 'hide': 2,
 'watch': 3,
 'mikeâ€™': 3,
 'work': 2,
 'togeth': 2,
 'also': 3,
 'notic': 1,
 'noth': 1,
 'cook': 1,
 'thought': 2,
 'ask': 1,
 'husband': 1,
 'everyth': 3,
 'readi': 2,
 'shall': 2,
 '

In [5]:
sentences = sent_tokenize(text)

In [6]:
sentences

['Mike and Morris lived in the same village.',
 'While Morris owned the largest jewelry shop in the village, Mike was a poor farmer.',
 'Both had large families with many sons, daughters-in-law and grandchildren.',
 'One fine day, Mike, tired of not being able to feed his family, decided to leave the village and move to the city where he was certain to earn enough to feed everyone.',
 'Along with his family, he left the village for the city.',
 'At night, they stopped under a large tree.',
 'There was a stream running nearby where they could freshen up themselves.',
 'He told his sons to clear the area below the tree, he told his wife to fetch water and he instructed his daughters-in-law to make up the fire and started cutting wood from the tree himself.',
 'They didnâ€™t know that in the branches of the tree, there was a thief hiding.',
 'He watched as Mikeâ€™s family worked together and also noticed that they had nothing to cook.',
 'Mikeâ€™s wife also thought the same and asked her 

In [7]:
def _score_sentences(sentences, freqTable):
    sentenceValue = dict()

    for sentence in sentences:
        no_of_words = (len(word_tokenize(sentence)))
        for wordValue in freqTable:
            if wordValue in sentence.lower():
                #print(wordValue)
                if sentence[:10] in sentenceValue:
                    sentenceValue[sentence[:10]] += freqTable[wordValue]
                else:
                    sentenceValue[sentence[:10]] = freqTable[wordValue]

        sentenceValue[sentence[:10]] = sentenceValue[sentence[:10]] // no_of_words

    return sentenceValue
sentence_scores = _score_sentences(sentences, freq_table)

In [8]:
sentence_scores.items()

dict_items([('Mike and M', 6), ('While Morr', 6), ('Both had l', 7), ('One fine d', 2), ('Along with', 5), ('At night, ', 6), ('There was ', 5), ('He told hi', 2), ('They didnâ', 5), ('He watched', 4), ('Mikeâ€™s w', 4), ('Mike raise', 7), ('He is watc', 7), ('He will he', 9), ('The thief ', 3), ('Taking adv', 3), ('He climbed', 3), ('But, he le', 5), ('Mike opene', 3), ('The family', 3), ('Morris tho', 4), ('He ordered', 2), ('They also ', 4), ('But no one', 4), ('Being a ri', 4), ('So, the on', 4), ('The one wh', 3), ('Morrisâ€™s', 4), ('Morris rai', 8), ('As soon as', 4), ('Seeing him', 7)])

In [9]:
def _find_average_score(sentenceValue) -> int:
    sumValues = 0
    for entry in sentenceValue:
        sumValues += sentenceValue[entry]

    # Average value of a sentence from original text
    average = int(sumValues / len(sentenceValue))

    return average
threshold = _find_average_score(sentence_scores)

In [10]:
threshold

4

In [11]:
def _generate_summary(sentences, sentenceValue, threshold):
    sentence_count = 0
    summary = ''

    for sentence in sentences:
        if sentence[:10] in sentenceValue and sentenceValue[sentence[:10]] >= (threshold):
            summary += " " + sentence
            sentence_count += 1

    return summary
summary = _generate_summary(sentences, sentence_scores, 1.2 * threshold)

In [12]:
print(summary)

Mike and Morris lived in the same village. While Morris owned the largest jewelry shop in the village, Mike was a poor farmer. Both had large families with many sons, daughters-in-law and grandchildren. Along with his family, he left the village for the city. At night, they stopped under a large tree. There was a stream running nearby where they could freshen up themselves. They didnâ€™t know that in the branches of the tree, there was a thief hiding. Mike raised his hands to heaven and said, Donâ€™t worry. He is watching all of this from above. He will help us. But, he left behind the bundle of stolen jewels and money which dropped into Mikeâ€™s lap. There was great excitement when they told everyone how they got rich. Morris raised his hands and said, Donâ€™t worry. He is watching all of this from above. He will help us. Seeing him, everyone started running around to save their lives.
