In [2]:
!pip install -U spacy
!python -m spacy download en_core_web_sm
!pip install -U gensim==3.8.3

Collecting en-core-web-sm==3.2.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl (13.9 MB)
[+] Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')




In [3]:
import spacy 
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [4]:
from gensim.summarization.summarizer import summarize
from gensim.summarization import keywords

In [5]:
stopwords = list(STOP_WORDS)
print(stopwords)

['part', 'between', 'keep', 'since', 'former', 'beyond', 'n‘t', 'had', 'top', 'an', 'nor', 'alone', 'three', 'such', 'yourself', 'anything', 'whole', "'ve", 'on', 'how', 'else', 'mostly', '‘re', 'here', 'wherever', 'under', 'any', 'nobody', 'call', 'while', 'fifty', 'get', 'although', 'around', '’s', 'therein', 'across', 'up', 'one', 'does', 'name', 'whither', 'their', 'many', 'front', 'hundred', 'upon', 'once', 'whose', 'perhaps', 'quite', 'thereby', 'meanwhile', 'her', 'full', 'made', 'eight', 'among', '’m', 'which', 'too', 'anywhere', 'ca', 'bottom', 'whereas', 'hereby', 'am', 'formerly', 'elsewhere', 'afterwards', 'together', 'behind', 'have', 'now', 'no', 'n’t', 'besides', 'another', 'thru', 'namely', 'should', 'of', 'both', 'take', 'will', 'something', 'move', 'back', 'give', 'against', 'again', 'always', 'still', 'whereupon', 'did', 'over', 'thence', 'various', 'with', 'it', 'towards', 'everyone', 'often', 'more', 'about', '’re', 'except', 'she', 'sixty', 'been', 'toward', 'at',

In [6]:
nlp = spacy.load('en_core_web_sm')

In [7]:
text = input()

Naruto is a Japanese manga series written and illustrated by Masashi Kishimoto. It tells the story of Naruto Uzumaki, a young ninja who seeks recognition from his peers and dreams of becoming the Hokage, the leader of his village.


In [8]:
print(summarize(text, ratio=1))




In [9]:
topics = keywords(text, words=10)

In [10]:
print(topics)

manga series
seeks recognition
masashi
kishimoto
uzumaki
young ninja
naruto


In [11]:
doc = nlp(text)

In [12]:
token = [token.text for token in doc]
print(token)

['Naruto', 'is', 'a', 'Japanese', 'manga', 'series', 'written', 'and', 'illustrated', 'by', 'Masashi', 'Kishimoto', '.', 'It', 'tells', 'the', 'story', 'of', 'Naruto', 'Uzumaki', ',', 'a', 'young', 'ninja', 'who', 'seeks', 'recognition', 'from', 'his', 'peers', 'and', 'dreams', 'of', 'becoming', 'the', 'Hokage', ',', 'the', 'leader', 'of', 'his', 'village', '.']


In [13]:
punctuation = punctuation + '\n'

In [14]:
word_frequency = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequency.keys():
                word_frequency[word.text] = 1
            else:
                word_frequency[word.text] += 1

In [15]:
print(word_frequency)

{'Naruto': 2, 'Japanese': 1, 'manga': 1, 'series': 1, 'written': 1, 'illustrated': 1, 'Masashi': 1, 'Kishimoto': 1, 'tells': 1, 'story': 1, 'Uzumaki': 1, 'young': 1, 'ninja': 1, 'seeks': 1, 'recognition': 1, 'peers': 1, 'dreams': 1, 'Hokage': 1, 'leader': 1, 'village': 1}


In [16]:
max_frequency = max(word_frequency.values())

In [17]:
max_frequency

2

In [18]:
for word in word_frequency.keys():
    word_frequency[word] = word_frequency[word]/max_frequency

In [19]:
print(word_frequency)

{'Naruto': 1.0, 'Japanese': 0.5, 'manga': 0.5, 'series': 0.5, 'written': 0.5, 'illustrated': 0.5, 'Masashi': 0.5, 'Kishimoto': 0.5, 'tells': 0.5, 'story': 0.5, 'Uzumaki': 0.5, 'young': 0.5, 'ninja': 0.5, 'seeks': 0.5, 'recognition': 0.5, 'peers': 0.5, 'dreams': 0.5, 'Hokage': 0.5, 'leader': 0.5, 'village': 0.5}


In [20]:
for key,value in word_frequency.items():
    if key in topics:
        value += 1 
        word_frequency.update({key:value}) 
print(word_frequency.items())

dict_items([('Naruto', 1.0), ('Japanese', 0.5), ('manga', 1.5), ('series', 1.5), ('written', 0.5), ('illustrated', 0.5), ('Masashi', 0.5), ('Kishimoto', 0.5), ('tells', 0.5), ('story', 0.5), ('Uzumaki', 0.5), ('young', 1.5), ('ninja', 1.5), ('seeks', 1.5), ('recognition', 1.5), ('peers', 0.5), ('dreams', 0.5), ('Hokage', 0.5), ('leader', 0.5), ('village', 0.5)])


In [21]:
sentence_tokens = [sent for sent  in doc.sents]
print(sentence_tokens)

[Naruto is a Japanese manga series written and illustrated by Masashi Kishimoto., It tells the story of Naruto Uzumaki, a young ninja who seeks recognition from his peers and dreams of becoming the Hokage, the leader of his village.]


In [22]:
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequency.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_frequency[word.text.lower()] 
            else:
                sentence_scores[sent] += word_frequency[word.text.lower()]

In [23]:
sentence_scores

{Naruto is a Japanese manga series written and illustrated by Masashi Kishimoto.: 4.0,
 It tells the story of Naruto Uzumaki, a young ninja who seeks recognition from his peers and dreams of becoming the Hokage, the leader of his village.: 9.0}

In [24]:
from heapq import nlargest

In [25]:
select_length = int(len(sentence_scores)*0.5)
select_length

1

In [26]:
summary = nlargest(select_length,sentence_scores,key = sentence_scores.get )

In [27]:
summary

[It tells the story of Naruto Uzumaki, a young ninja who seeks recognition from his peers and dreams of becoming the Hokage, the leader of his village.]

In [28]:
final_summary = [word.text for word in summary ]

In [29]:
summary = ''.join(final_summary)

In [30]:
print(summary)

It tells the story of Naruto Uzumaki, a young ninja who seeks recognition from his peers and dreams of becoming the Hokage, the leader of his village.
