In [25]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

stop_words = set(stopwords.words('english'))

def remove_stopwords(text):
    words = word_tokenize(text)
    filtered_text = [word for word in words if word.lower() not in stop_words]
    return filtered_text

In [2]:
from nltk.stem import WordNetLemmatizer
wnl = WordNetLemmatizer()

def get_root(words):
    lemma_words = [wnl.lemmatize(word, pos = 'v') for word in words]
    lemma_words

    return lemma_words

In [26]:
import re
import nltk

def preprocess(text):
    text = ' '.join(text.split())
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = remove_stopwords(text)
    root_words = get_root(text)

    return root_words

In [27]:
text = '''
Once the brutal captain of the Spartan Army, Kratos led his men throughout numerous conquests all across the lands of Greece, eventually coming across a savage Barbarian horde. Confident of his own victory, Kratos led his army into battle, but soon found himself hopelessly outmatched and outclassed. The Barbarians' brutality exceeding his own, and on the verge of death, Kratos struck a deal with the "God of War" - Ares to further his exploits. He would then commit atrocity after atrocity under Ares' name, spreading death throughout the world with his armies and justifying it all by proclaiming his intent to make "the glory of Sparta known throughout the world!". For a time, it seemed, his only tether to humanity was his beloved family, yet even they grew horrified by him, to the point where his wife Lysandra would state outright that he cared nothing for Sparta's glory, but for his own. He would not listen to her, and continued his rampage, blindly following the will of Ares in his pursuit of more bloodshed and infamy — yet this took a tragic turn when the God tricked him into killing his wife and child, all to destroy what little humanity he had left. Branded the "Ghost of Sparta" for this terrible deed, the ashes of his wife and child would remain fused to his skin forever.

Completely undone by the killing of his wife and child, Kratos became a constantly-suicidal and greatly-bereaved wreck of a man beloved by none yet known to all. Devoting himself to the other Gods of Olympus in a desperate attempt to rid himself of his memories, Kratos would hang on to the small glimmer of hope that perhaps he would one day be able to redeem himself. Yet no matter how many enemies he'd slaughter or how many lives he would save, the Gods would continue to put labour upon labour onto Kratos' shoulders, forcing him to endure the pain of his memories for ten long years of servitude. Maddened by his memories and unable to find a moment of peace, Kratos would develop a deep-seated hatred of the Gods, and especially Ares in particular, for toying with his life. Though Kratos would eventually defeat Ares and claim the throne of the "God of War" for his own, his resentment of the other Gods would bring him in conflict against all on Mount Olympus, culminating in a cataclysmic series of battles against them that would decide the fate of Greece itself.

Eventually leaving Greece as well as his bloody past behind, Kratos ends up in Ancient Egypt and makes his way into Midgard. Having come to view his troubled past with great shame, Kratos has taken the initiative to mature and grow past his self-destructive tendencies, choosing to live as a man under the thumb of the Norse Pantheon. He even finds love again with a woman named Faye, eventually fathering a son with her named Atreus. When Faye dies of unknown circumstances, Kratos and Atreus set out on a journey to spread her ashes from the highest peak in all the Nine Realms as it was her final wish. However, he and Atreus come into conflict with various Nordic creatures along their way, and are constantly pursued along their path by a mysterious Stranger — seemingly under orders from the King of the Norse Pantheon himself, Odin.

As Ragnarök unfolds, Kratos finds himself on a difficult journey that places him against the forces of Odin and the friction between him and his son. Later, under Kratos' leadership, all the united forces of the other realms gather throughout Týr's Temple; Kratos blows the Gjallarhorn to begin the siege of Asgard. Initially, the battle does not go well; the other realms are quickly cut off, and Kratos' forces were struggling with Asgard's defenses. After a fight with Thor that ends in his death at the hands of Odin himself, Kratos, along with Atreus have a final battle with Odin and defeats him, resulting in Atreus trapping his soul. Odin is then denied an afterlife by a vengeful Sindri. After defeating Odin and bidding a heartfelt farewell to his son, Kratos discovers a mural depicting him as the new All-Father of Asgard. Finally hopeful about his future, Kratos recruits Freya and Mímir to help him rebuild and restore the Nine Realms.
'''

In [28]:
pre_text = preprocess(text)
pre_text

['brutal',
 'captain',
 'spartan',
 'army',
 'kratos',
 'lead',
 'men',
 'throughout',
 'numerous',
 'conquests',
 'across',
 'land',
 'greece',
 'eventually',
 'come',
 'across',
 'savage',
 'barbarian',
 'horde',
 'confident',
 'victory',
 'kratos',
 'lead',
 'army',
 'battle',
 'soon',
 'find',
 'hopelessly',
 'outmatch',
 'outclass',
 'barbarians',
 'brutality',
 'exceed',
 'verge',
 'death',
 'kratos',
 'strike',
 'deal',
 'god',
 'war',
 'ares',
 'exploit',
 'would',
 'commit',
 'atrocity',
 'atrocity',
 'ares',
 'name',
 'spread',
 'death',
 'throughout',
 'world',
 'armies',
 'justify',
 'proclaim',
 'intent',
 'make',
 'glory',
 'sparta',
 'know',
 'throughout',
 'world',
 'time',
 'seem',
 'tether',
 'humanity',
 'beloved',
 'family',
 'yet',
 'even',
 'grow',
 'horrify',
 'point',
 'wife',
 'lysandra',
 'would',
 'state',
 'outright',
 'care',
 'nothing',
 'spartas',
 'glory',
 'would',
 'listen',
 'continue',
 'rampage',
 'blindly',
 'follow',
 'ares',
 'pursuit',
 'bloodsh

In [19]:
from nltk import pos_tag
from nltk import RegexpParser

tokens_tag = pos_tag(pre_text)
tokens_tag

[('brutal', 'JJ'),
 ('captain', 'NN'),
 ('spartan', 'JJ'),
 ('army', 'NN'),
 ('kratos', 'NNS'),
 ('lead', 'VBP'),
 ('men', 'NNS'),
 ('throughout', 'IN'),
 ('numerous', 'JJ'),
 ('conquests', 'NNS'),
 ('across', 'IN'),
 ('land', 'NN'),
 ('greece', 'NN'),
 ('eventually', 'RB'),
 ('come', 'VBN'),
 ('across', 'IN'),
 ('savage', 'NN'),
 ('barbarian', 'JJ'),
 ('horde', 'NN'),
 ('confident', 'JJ'),
 ('victory', 'NN'),
 ('kratos', 'NNS'),
 ('lead', 'VBP'),
 ('army', 'JJ'),
 ('battle', 'NN'),
 ('soon', 'RB'),
 ('find', 'VB'),
 ('hopelessly', 'RB'),
 ('outmatch', 'JJ'),
 ('outclass', 'NN'),
 ('barbarians', 'NNS'),
 ('brutality', 'VBP'),
 ('exceed', 'VBP'),
 ('verge', 'JJ'),
 ('death', 'NN'),
 ('kratos', 'NN'),
 ('strike', 'NN'),
 ('deal', 'NN'),
 ('god', 'JJ'),
 ('war', 'NN'),
 ('ares', 'NNS'),
 ('exploit', 'VBP'),
 ('would', 'MD'),
 ('commit', 'VB'),
 ('atrocity', 'NN'),
 ('atrocity', 'NN'),
 ('ares', 'VBZ'),
 ('name', 'JJ'),
 ('spread', 'NN'),
 ('death', 'NN'),
 ('throughout', 'IN'),
 ('world',

In [22]:
entities = [(word, pos) for word, pos in tokens_tag if pos.startswith('N')]

In [33]:
frequencies = {}
for word, pos in entities:
    if word not in frequencies.keys():
        frequencies[word] = pre_text.count(word)

frequencies

{'captain': 1,
 'army': 2,
 'kratos': 18,
 'men': 1,
 'conquests': 1,
 'land': 1,
 'greece': 3,
 'savage': 1,
 'horde': 1,
 'victory': 1,
 'battle': 4,
 'outclass': 1,
 'barbarians': 1,
 'death': 3,
 'strike': 1,
 'deal': 1,
 'war': 2,
 'ares': 5,
 'atrocity': 2,
 'spread': 2,
 'world': 2,
 'armies': 1,
 'intent': 1,
 'glory': 2,
 'sparta': 2,
 'time': 1,
 'humanity': 2,
 'family': 1,
 'wife': 4,
 'lysandra': 1,
 'state': 1,
 'nothing': 1,
 'rampage': 1,
 'pursuit': 1,
 'trick': 1,
 'child': 3,
 'brand': 1,
 'ghost': 1,
 'deed': 1,
 'skin': 1,
 'wreck': 1,
 'man': 2,
 'none': 1,
 'gods': 4,
 'attempt': 1,
 'memories': 3,
 'glimmer': 1,
 'day': 1,
 'redeem': 1,
 'enemies': 1,
 'labour': 2,
 'force': 4,
 'endure': 1,
 'pain': 1,
 'years': 1,
 'moment': 1,
 'peace': 1,
 'life': 1,
 'resentment': 1,
 'conflict': 2,
 'mount': 1,
 'olympus': 2,
 'series': 1,
 'fate': 1,
 'past': 3,
 'end': 2,
 'ancient': 1,
 'egypt': 1,
 'way': 2,
 'view': 1,
 'trouble': 1,
 'shame': 1,
 'mature': 1,
 'grow'

In [36]:
freqs = []
for key, val in frequencies.items():
    freqs.append((key, val))

freqs.sort(key= lambda x: x[1])

In [39]:
freqs = freqs[::-1]
freqs[:5]

[('kratos', 18), ('odin', 6), ('ares', 5), ('realms', 4), ('force', 4)]

In [24]:
set(entities)

{('ancient', 'NN'),
 ('ares', 'NNS'),
 ('armies', 'NNS'),
 ('army', 'NN'),
 ('asgard', 'NN'),
 ('asgards', 'NNS'),
 ('ash', 'NN'),
 ('atrocity', 'NN'),
 ('attempt', 'NN'),
 ('barbarians', 'NNS'),
 ('battle', 'NN'),
 ('begin', 'NN'),
 ('bid', 'NN'),
 ('brand', 'NN'),
 ('captain', 'NN'),
 ('child', 'NN'),
 ('circumstances', 'NNS'),
 ('conflict', 'NN'),
 ('conquests', 'NNS'),
 ('creatures', 'NNS'),
 ('day', 'NN'),
 ('deal', 'NN'),
 ('death', 'NN'),
 ('deed', 'NN'),
 ('defeat', 'NN'),
 ('defenses', 'NNS'),
 ('egypt', 'NNS'),
 ('end', 'NN'),
 ('endure', 'NN'),
 ('enemies', 'NNS'),
 ('family', 'NN'),
 ('fate', 'NN'),
 ('faye', 'NN'),
 ('force', 'NN'),
 ('freya', 'NN'),
 ('friction', 'NN'),
 ('future', 'NN'),
 ('gather', 'NN'),
 ('ghost', 'NN'),
 ('glimmer', 'NNS'),
 ('glory', 'NN'),
 ('gods', 'NNS'),
 ('greece', 'NN'),
 ('grow', 'NN'),
 ('hand', 'NN'),
 ('heartfelt', 'NN'),
 ('help', 'NN'),
 ('horde', 'NN'),
 ('humanity', 'NN'),
 ('intent', 'NN'),
 ('journey', 'NN'),
 ('kratos', 'NN'),
 ('kr

In [20]:
patterns= "NP: {<DT>?<JJ>*<NN>}"
chunker = RegexpParser(patterns)
chunks = chunker.parse(tokens_tag)

In [21]:
noun_phrases = [subtree.leaves() for subtree in chunks.subtrees() if subtree.label() == 'NP']
noun_phrases

[[('brutal', 'JJ'), ('captain', 'NN')],
 [('spartan', 'JJ'), ('army', 'NN')],
 [('land', 'NN')],
 [('greece', 'NN')],
 [('savage', 'NN')],
 [('barbarian', 'JJ'), ('horde', 'NN')],
 [('confident', 'JJ'), ('victory', 'NN')],
 [('army', 'JJ'), ('battle', 'NN')],
 [('outmatch', 'JJ'), ('outclass', 'NN')],
 [('verge', 'JJ'), ('death', 'NN')],
 [('kratos', 'NN')],
 [('strike', 'NN')],
 [('deal', 'NN')],
 [('god', 'JJ'), ('war', 'NN')],
 [('atrocity', 'NN')],
 [('atrocity', 'NN')],
 [('name', 'JJ'), ('spread', 'NN')],
 [('death', 'NN')],
 [('world', 'NN')],
 [('proclaim', 'JJ'), ('intent', 'NN')],
 [('glory', 'NN')],
 [('sparta', 'NN')],
 [('world', 'NN')],
 [('time', 'NN')],
 [('humanity', 'NN')],
 [('family', 'NN')],
 [('point', 'JJ'), ('wife', 'NN')],
 [('lysandra', 'NN')],
 [('state', 'NN')],
 [('nothing', 'NN')],
 [('spartas', 'JJ'), ('glory', 'NN')],
 [('continue', 'JJ'), ('rampage', 'NN')],
 [('pursuit', 'NN')],
 [('god', 'JJ'), ('trick', 'NN')],
 [('wife', 'NN')],
 [('child', 'NN')],
