In [1]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer

In [2]:
text = """I’ve been asked by a few friends to develop a feature for a
WhatsApp chatbot of mine, that summarizes articles based on
URL inputs. So when a friend sends an article to a WhatsApp
group, the bot will reply with a summary of the given URL
article. I like this feature because from my personal
research, 65% of group users don’t even click the shared URLs,
but 97% of them will read a few lines of the articles summary.
As part of being a Fullstack developer, it is important to
know how to choose the right stack for each product you
develop, depending on the requirements and limitations.
For web crawling, I love using Python. The Python community
is filled with efficient, easy to implement open source
libraries both for web crawling and text summarization.
Once you’re done with this tutorial, you won’t believe how
simple it is to implement the task."""

In [3]:
words = word_tokenize(text)
# words = text.split(" ")
sentences = sent_tokenize(text)
sw = stopwords.words('english')
w_net = WordNetLemmatizer()
ps = PorterStemmer()

In [4]:
print(words)

['I', '’', 've', 'been', 'asked', 'by', 'a', 'few', 'friends', 'to', 'develop', 'a', 'feature', 'for', 'a', 'WhatsApp', 'chatbot', 'of', 'mine', ',', 'that', 'summarizes', 'articles', 'based', 'on', 'URL', 'inputs', '.', 'So', 'when', 'a', 'friend', 'sends', 'an', 'article', 'to', 'a', 'WhatsApp', 'group', ',', 'the', 'bot', 'will', 'reply', 'with', 'a', 'summary', 'of', 'the', 'given', 'URL', 'article', '.', 'I', 'like', 'this', 'feature', 'because', 'from', 'my', 'personal', 'research', ',', '65', '%', 'of', 'group', 'users', 'don', '’', 't', 'even', 'click', 'the', 'shared', 'URLs', ',', 'but', '97', '%', 'of', 'them', 'will', 'read', 'a', 'few', 'lines', 'of', 'the', 'articles', 'summary', '.', 'As', 'part', 'of', 'being', 'a', 'Fullstack', 'developer', ',', 'it', 'is', 'important', 'to', 'know', 'how', 'to', 'choose', 'the', 'right', 'stack', 'for', 'each', 'product', 'you', 'develop', ',', 'depending', 'on', 'the', 'requirements', 'and', 'limitations', '.', 'For', 'web', 'crawlin

In [5]:
print(sentences)

['I’ve been asked by a few friends to develop a feature for a\nWhatsApp chatbot of mine, that summarizes articles based on\nURL inputs.', 'So when a friend sends an article to a WhatsApp\ngroup, the bot will reply with a summary of the given URL\narticle.', 'I like this feature because from my personal\nresearch, 65% of group users don’t even click the shared URLs,\nbut 97% of them will read a few lines of the articles summary.', 'As part of being a Fullstack developer, it is important to\nknow how to choose the right stack for each product you\ndevelop, depending on the requirements and limitations.', 'For web crawling, I love using Python.', 'The Python community\nis filled with efficient, easy to implement open source\nlibraries both for web crawling and text summarization.', 'Once you’re done with this tutorial, you won’t believe how\nsimple it is to implement the task.']


In [6]:
print(sw)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [7]:
freq_table = dict()
for word in words:
    word = word.lower()
    
    if word in sw:
        continue
    
#     word = w_net.lemmatize(word, pos='v')
    word = ps.stem(word)
    if word in freq_table:
        freq_table[word] += 1
    else:
        freq_table[word] = 1
        

In [8]:
print(freq_table)

{'’': 4, 'ask': 1, 'friend': 2, 'develop': 3, 'featur': 2, 'whatsapp': 2, 'chatbot': 1, 'mine': 1, ',': 9, 'summar': 2, 'articl': 4, 'base': 1, 'url': 3, 'input': 1, '.': 7, 'send': 1, 'group': 2, 'bot': 1, 'repli': 1, 'summari': 2, 'given': 1, 'like': 1, 'person': 1, 'research': 1, '65': 1, '%': 2, 'user': 1, 'even': 1, 'click': 1, 'share': 1, '97': 1, 'read': 1, 'line': 1, 'part': 1, 'fullstack': 1, 'import': 1, 'know': 1, 'choos': 1, 'right': 1, 'stack': 1, 'product': 1, 'depend': 1, 'requir': 1, 'limit': 1, 'web': 2, 'crawl': 2, 'love': 1, 'use': 1, 'python': 2, 'commun': 1, 'fill': 1, 'effici': 1, 'easi': 1, 'implement': 2, 'open': 1, 'sourc': 1, 'librari': 1, 'text': 1, 'done': 1, 'tutori': 1, 'believ': 1, 'simpl': 1, 'task': 1}


In [9]:
len(sentences)

7

In [10]:
sent_table = dict()

for sentence in sentences:
    for word, freq in freq_table.items():
        if word in sentence:
            if sentence in sent_table:
                sent_table[sentence] += freq
                print("Word =>",word)
                print("Sentence =>",sentence)
                print(sent_table)
            else:
                sent_table[sentence] = freq
                print("Word =>",word)
                print("Sentence =>",sentence)
                print(sent_table)                

Word => ’
Sentence => I’ve been asked by a few friends to develop a feature for a
WhatsApp chatbot of mine, that summarizes articles based on
URL inputs.
{'I’ve been asked by a few friends to develop a feature for a\nWhatsApp chatbot of mine, that summarizes articles based on\nURL inputs.': 4}
Word => ask
Sentence => I’ve been asked by a few friends to develop a feature for a
WhatsApp chatbot of mine, that summarizes articles based on
URL inputs.
{'I’ve been asked by a few friends to develop a feature for a\nWhatsApp chatbot of mine, that summarizes articles based on\nURL inputs.': 5}
Word => friend
Sentence => I’ve been asked by a few friends to develop a feature for a
WhatsApp chatbot of mine, that summarizes articles based on
URL inputs.
{'I’ve been asked by a few friends to develop a feature for a\nWhatsApp chatbot of mine, that summarizes articles based on\nURL inputs.': 7}
Word => develop
Sentence => I’ve been asked by a few friends to develop a feature for a
WhatsApp chatbot of 

In [11]:
print(sent_table)

{'I’ve been asked by a few friends to develop a feature for a\nWhatsApp chatbot of mine, that summarizes articles based on\nURL inputs.': 41, 'So when a friend sends an article to a WhatsApp\ngroup, the bot will reply with a summary of the given URL\narticle.': 29, 'I like this feature because from my personal\nresearch, 65% of group users don’t even click the shared URLs,\nbut 97% of them will read a few lines of the articles summary.': 44, 'As part of being a Fullstack developer, it is important to\nknow how to choose the right stack for each product you\ndevelop, depending on the requirements and limitations.': 29, 'For web crawling, I love using Python.': 21, 'The Python community\nis filled with efficient, easy to implement open source\nlibraries both for web crawling and text summarization.': 34, 'Once you’re done with this tutorial, you won’t believe how\nsimple it is to implement the task.': 28}


In [12]:
len(sent_table)

7

In [14]:
sum_val = 0

for s in sent_table.values():
    sum_val += s

In [15]:
sum_val

226

In [16]:
avg = int(sum_val/len(sent_table))

In [20]:
avg

32

In [29]:
summary = ""

for sentence in sentences:
    if sent_table[sentence] > avg * 1.2 and sentence in sent_table:
        summary += sentence

In [30]:
print(summary)

I’ve been asked by a few friends to develop a feature for a
WhatsApp chatbot of mine, that summarizes articles based on
URL inputs.I like this feature because from my personal
research, 65% of group users don’t even click the shared URLs,
but 97% of them will read a few lines of the articles summary.
