<a href="https://colab.research.google.com/github/ranesh88/ranesh_data-science/blob/master/Topic_Modelling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from string import punctuation
from wordcloud import WordCloud

import matplotlib.pyplot as plt
import operator

In [None]:
stop = stopwords.words('english')
punkt = list(punctuation)
bad_tokens = stop + punkt

In [None]:
def clean_text(text):
    
    text = text.lower()
    tokens = word_tokenize(text)
    word_tokens = [t for t in tokens if t.isalpha()]
    final_tokens = [t for t in word_tokens if t not in bad_tokens]
    return " ".join(final_tokens)

In [None]:
text = '''
I would like to request the suppression of the following items 
from my credit report, which are the result of my falling victim to 
identity theft. This information does not relate to 
[ transactions that I have made/accounts that I have opened ], 
as the attached supporting documentation can attest. 
As such, it should be blocked from appearing on my credit 
report pursuant to section 605B of the Fair Credit Reporting Act.

'''

In [None]:
clean_text = clean_text(text)

In [None]:
clean_text

'would like request suppression following items credit report result falling victim identity theft information relate transactions opened attached supporting documentation attest blocked appearing credit report pursuant section fair credit reporting act'

In [None]:
from collections import Counter

In [None]:
word_count = dict(Counter(clean_text.split()))

In [None]:
word_count

{'would': 1,
 'like': 1,
 'request': 1,
 'suppression': 1,
 'following': 1,
 'items': 1,
 'credit': 3,
 'report': 2,
 'result': 1,
 'falling': 1,
 'victim': 1,
 'identity': 1,
 'theft': 1,
 'information': 1,
 'relate': 1,
 'transactions': 1,
 'opened': 1,
 'attached': 1,
 'supporting': 1,
 'documentation': 1,
 'attest': 1,
 'blocked': 1,
 'appearing': 1,
 'pursuant': 1,
 'section': 1,
 'fair': 1,
 'reporting': 1,
 'act': 1}

In [None]:
word_count_sorted = sorted(word_count.items(), key=operator.itemgetter(1), reverse=True)

In [None]:
top_words = word_count_sorted[:10]

In [None]:
top_words

[('credit', 3),
 ('report', 2),
 ('would', 1),
 ('like', 1),
 ('request', 1),
 ('suppression', 1),
 ('following', 1),
 ('items', 1),
 ('result', 1),
 ('falling', 1)]

In [None]:
# credit, report, would, like, request, suppression

## POS tagging

In [None]:
#import nltk
#nltk.download("averaged_perceptron_tagger")

In [None]:
from textblob import TextBlob

In [None]:
blob = TextBlob(clean_text)

In [None]:
text_pos = blob.tags

In [None]:
text_pos

[('would', 'MD'),
 ('like', 'VB'),
 ('request', 'JJS'),
 ('suppression', 'NN'),
 ('following', 'VBG'),
 ('items', 'NNS'),
 ('credit', 'NN'),
 ('report', 'NN'),
 ('result', 'NN'),
 ('falling', 'VBG'),
 ('victim', 'NN'),
 ('identity', 'NN'),
 ('theft', 'NN'),
 ('information', 'NN'),
 ('relate', 'NN'),
 ('transactions', 'NNS'),
 ('opened', 'VBD'),
 ('attached', 'RP'),
 ('supporting', 'VBG'),
 ('documentation', 'NN'),
 ('attest', 'NN'),
 ('blocked', 'VBD'),
 ('appearing', 'VBG'),
 ('credit', 'NN'),
 ('report', 'NN'),
 ('pursuant', 'JJ'),
 ('section', 'NN'),
 ('fair', 'NN'),
 ('credit', 'NN'),
 ('reporting', 'NN'),
 ('act', 'NN')]

In [None]:
nouns = []
for item in text_pos:
    if item[1] == "NN" or item[1] == "NNS" or item[1] == "NNP":
        nouns.append(item[0])

In [None]:
nouns

['suppression',
 'items',
 'credit',
 'report',
 'result',
 'victim',
 'identity',
 'theft',
 'information',
 'relate',
 'transactions',
 'documentation',
 'attest',
 'credit',
 'report',
 'section',
 'fair',
 'credit',
 'reporting',
 'act']




<p>CC coordinating conjunction</p>
<p>CD cardinal digit</p>
<p>DT determiner</p>
<p>EX existential there (like: “there is” … think of it like “there exists”)</p>
<p>FW foreign word</p>
<p>IN preposition/subordinating conjunction</p>
<p>JJ adjective ‘big’</p>
<p>JJR adjective, comparative ‘bigger’</p>
<p>JJS adjective, superlative ‘biggest’</p>
<p>LS list marker 1)</p>
<p>MD modal could, will</p>
<p>NN noun, singular ‘desk’</p>
<p>NNS noun plural ‘desks’</p>
<p>NNP proper noun, singular ‘Harrison’</p>
<p>NNPS proper noun, plural ‘Americans’</p>
<p>PDT predeterminer ‘all the kids’</p>
<p>POS possessive ending parent‘s</p>
<p>PRP personal pronoun I, he, she</p>
<p>PRP possessive pronoun my, his, hers</p>
<p>RB adverb very, silently,</p>
<p>RBR adverb, comparative better</p>
<p>RBS adverb, superlative best</p>
<p>RP particle give up</p>
<p>TO to go ‘to‘ the store.</p>
<p>UH interjection errrrrrrrm</p>
<p>VB verb, base form take</p>
<p>VBD verb, past tense took</p>
<p>VBG verb, gerund/present participle taking</p>
<p>VBN verb, past participle taken</p>
<p>VBP verb, sing. present, non-3d take</p>
<p>VBZ verb, 3rd person sing. present takes</p>
<p>WDT wh-determiner which</p>
<p>WP wh-pronoun who, what</p>
<p>WP$ possessive wh-pronoun whose</p>
<p>WRB wh-abverb where, when</p>