# [Topic Modeling](https://towardsdatascience.com/topic-modeling-and-latent-dirichlet-allocation-in-python-9bf156893c24)

In [1]:

from pprint import pprint
import numpy as np
np.random.seed(2018)
import nltk

# reload only imported modules before run
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

'''
pantree bank tags: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
standford source: https://nlp.stanford.edu/software/CRF-NER.shtml
standford online text tree generater: http://nlp.stanford.edu:8080/parser/index.jsp
'''

'\npantree bank tags: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html\nstandford source: https://nlp.stanford.edu/software/CRF-NER.shtml\nstandford online text tree generater: http://nlp.stanford.edu:8080/parser/index.jsp\n'

### constants

In [2]:
start_date = '01-11-2018' #  09-Sep-2018 
end_date = '30-12-2018' # 01-Oct-2018

### We will perform the following steps:

#### 1. Tokenization: 
Split the text into sentences and the sentences into words. Lowercase the words and remove punctuation.

In [3]:
# def tokenize(text):
#     return gensim.utils.simple_preprocess(text)

#### 2. Remove small words:
Words that have fewer than 3 characters are removed.

In [4]:
# def isShortWord(token):
#     return len(token) < 3

#### 3. Remove stopwords:
All stopwords are removed.

In [5]:
# def isStopWord(token):
#     return token in gensim.parsing.preprocessing.STOPWORDS

#### 4. lemmatized +  Stemming:
Words are lemmatized — words in third person are changed to first person and verbs in past and future tenses are changed into present.

Words are stemmed — words are reduced to their root form.

In [6]:
# from nltk.corpus import wordnet as wn
# from nltk import pos_tag, word_tokenize
# from nltk.stem.porter import *
# from textblob import TextBlob


# def lemmatize_stemming(token):
#     stemmer = PorterStemmer() #gensim.parsing.stem_text(tokenize) #
#     for word, tag in pos_tag(word_tokenize(token)):
#         wntag = tag[0].lower()
#         wntag = wntag if wntag in ['a', 'r', 'n', 'v'] else None
#         lemma = WordNetLemmatizer().lemmatize(word, wntag) if wntag else word
#         return TextBlob(lemma).words[0].singularize()
#     return ''

#### 5. Replace Emojis:

In [7]:
# ## get emoji characters file path
# def getEmojis():
#     from dataSource import getEmojis
#     comments_file_path = getDataSourcePathFor(emoji_path)
#     return getEmojis(comments_file_path)#.head()

In [8]:
# def hasEmojicon(token):
    
# def replaceEmojicons(token, emojies):
#     pass
    

## Data Source

In [9]:
# def getNounList(sentence='', tokens = []):
#     from nltk import word_tokenize, pos_tag
#     if len(tokens) > 0:    
#         nouns = [token for token, pos in pos_tag(tokens) if pos.startswith('NN')]
#         return nouns
#     else:
#         nouns = [token for token, pos in pos_tag(word_tokenize(sentence)) if pos.startswith('NN')]
#         return nouns

In [10]:
# text = 'I Have done reviewing, Will be seeing by them'
# print(preprocess(text))

In [11]:
# key is file storage path
# def getDataSourcePathFor(keyForFilePath):
#     import json
#     import os
    
#     config_file_path = os.environ[virtual_env] + '/config.json'

#     with open(config_file_path) as f:
#         config = json.load(f)
#         if keyForFilePath in config:# ['comments_path', 'output_path']
#             return config[keyForFilePath] 
#     return None

In [12]:
# ## get list of comments from stored input csv file
# import dataSource 

# def getListOfComments():
#     ### This is to get csv rows between given dates
#     comments_file_path = getDataSourcePathFor(comments_path)
#     commentsList = getComments(comments_file_path, start_date, end_date) #['comments'] 
#     commentsList = commentsList.sort_values(by='ratings', ascending=True)['comments'] 
#     print('Total number of comments: %s between %s and %s' % (len(commentsList), start_date, end_date))
#     return commentsList

### Text processing

In [13]:
# import re

# def filterWord(token):
#     if not (isStopWord(token) or isShortWord(token)):
#         lemmaWord = lemmatize_stemming(token)
#         if not isShortWord(lemmaWord):
#             return ("".join(re.findall("[a-zA-Z]+", lemmaWord)).lower())
#     return None 
    
# def filters(sentence):
# #     print('..given comments:', sentence)
#     result = []
#     #nouns = getNounList(sentence) # fetch only Nouns
#     for token in tokenize(sentence):#nouns: ###tokenize(text):
#         result.append(filterWord(token))
#     return result

# def filterWords(tokens):
#     return list(filter(lambda token: filterWord(token), tokens))

In [14]:
# def preprocessCommentDocument(document):
#     return list(map(lambda sentence: filters(sentence), document))

### Word Cloud

In [15]:
from wordCloud import showWordCloud
def showCloud(topicCollection):
    showWordCloud(topicCollection) 

### Bag of Words on the Data set

In [16]:
def bow(processed_docs):
    dictionary = gensim.corpora.Dictionary(processed_docs)
    bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]
    return bow_corpus

### Sentiment

In [17]:
from getSentiment import getSentiment

def getSentFromCommentList(commentList):
    sentimentList = []
    for sentence in commentList:
        sentiment = getSentiment(sentence)
        sentimentList.append(sentiment)
    return sentimentList


def isNegative(sentiment):
    return sentiment > 0.5


def tokenDictWithPosNegSentiment(sentimentList, document):
    sentDict = {}
    size = range(len(document))
    
    for i in size:
        for token in document[i]:
            v = (0, 0, 0) # (neg, pos, freq)
            if token in sentDict:
                v = sentDict[token]
                
            if isNegative(sentimentList[i]):
                v = (v[0]+1, v[1], v[2]+1)
            else:
                v = (v[0], v[1]+1, v[2]+1)
            sentDict[token] = v
    sentDict.pop('', None)
    return sentDict 

### Main()

In [244]:
import SDataSource
import SConstants
from SWordList import SWordList
from sUtility import SUtility
from sPreprocessor import SPreprocessor


In [245]:
sutility = SUtility()
spreprocessor = SPreprocessor()

# SPreprocessor.resolveDependancy(trieCommon)
dateBetween = [start_date, end_date]
commentsDocument = SDataSource.getListOfComments(dateBetween).head(1000)

# document preprocessing, cleaning, filtering, replacement, spliting into multiple senetnces from one
processed_doc = []
for sentence in commentsDocument:
    for each in spreprocessor.docCleaning(sentence):
        processed_doc.append(each)


Total number of comments: 972 between 01-11-2018 and 30-12-2018


In [246]:
def makeDict(sentence):
    t = spreprocessor.parseToTokens(trieTopic, sentence)
    r = spreprocessor.parseToTokens(trieNReason, sentence)
    print(sentence)
    print('topic is:', t)
    print('reasons are:', r)
    print()
    sutility.dump(t, r)

In [259]:
wordList_file = SDataSource.getDataSourcePathFor(SConstants.wordFile_path)
trieCommon = SWordList(wordList_file)

topic_file = SDataSource.getDataSourcePathFor(SConstants.topic_path)
trieTopic = SWordList(topic_file)

n_reason_file = SDataSource.getDataSourcePathFor(SConstants.n_reason_path)
trieNReason = SWordList(n_reason_file)

..SWordList constructor called 
..SWordList constructor called 
..SWordList constructor called 


In [260]:
l = ['crashing', 'crashes', 'upi']
print(spreprocessor.filterWords(l))
    

['crash', 'crash', 'upi']


In [261]:
# import nltk
# sno = nltk.stem.SnowballStemmer('english')
# print(sno.stem('upi'))
# print(sno.stem('debitance'))
# print(sno.stem('fairly'))

In [262]:
for each in processed_doc:
    makeDict(each)

i am cant add amount from another upi acoount or also cant transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id
topic is: ['upi', 'account']
reasons are: ['add amount', 'transfer', 'inact', 'problem']

now i am facing another problem
topic is: []
reasons are: ['face', 'problem']

i cant login dont know why solve the problem as soon as possible
topic is: ['login']
reasons are: ['problem']

the lastest update broke the app
topic is: ['updat', 'app']
reasons are: ['broke']

kya yr debit card se funds load nai kar sakte isme
topic is: ['debit card']
reasons are: []

koi kaam ka nai hai ye bank fir
topic is: []
reasons are: []

sir app is not open and ifsc of most of the bank dont reflect
topic is: ['app', 'ifsc']
reasons are: ['open', 'reflect']

 it was my favorite app but not now
topic is: ['app']
reasons are: []

hating your services
topic is: []
reasons are: ['servic']

 your technical issues were dont ending  highly absence

 uninstalled and reinstalled
topic is: []
reasons are: []

 but no luck
topic is: []
reasons are: []

 please anyone help
topic is: []
reasons are: []

 need to do urgent transaction
topic is: []
reasons are: []

 money got stuck in it
topic is: []
reasons are: ['stuck']

 
topic is: []
reasons are: []

making fraud and and wrongly deducted my rs  in my account
topic is: ['account']
reasons are: ['wrong', 'deduct']

my first transaction is successful but second transaction not success its bad app
topic is: ['app']
reasons are: ['bad']

everytime i open the app
topic is: ['app']
reasons are: ['open']

 it say register ur new device 
topic is: []
reasons are: []

 plz fix it
topic is: []
reasons are: ['fix it']

really sad to hear that
topic is: []
reasons are: []

 i have to visit the branch in other city i
topic is: []
reasons are: []

e  hours journey from hyderabad
topic is: []
reasons are: []

for just activating account for which i already submitted biometric at the time of account

reasons are: []

i dont know y made like this
topic is: []
reasons are: []

 that app can operate from same sim in and same phone 
topic is: ['app']
reasons are: []

 taken all the permission 
topic is: []
reasons are: []

 even if we disagree they wont allow to register
topic is: []
reasons are: []

 
topic is: []
reasons are: []

the app is too slow and freezes frequently the day it came on play store
topic is: ['app']
reasons are: []

 not a single time you people bothered to update the app to improve its smoothness
topic is: ['updat', 'app']
reasons are: []

 the app is developed in its worst way and instead you are taking around mb for this app
topic is: ['app']
reasons are: []

one of the worst app in play store
topic is: ['app']
reasons are: []

pls play dont allow to upload such kind of fake app thanks
topic is: ['app']
reasons are: []

you call yourself asias safest bank and all that nonsence but your app is probably not just asia but worlds slowest and most sluggish app
topic

 also my frnds with iphones unable to use this app from last  year
topic is: ['app']
reasons are: []

 suppprt is also worst do something
topic is: []
reasons are: []

hello dbs i have savings account in sbi bank
topic is: ['account']
reasons are: []

 my aadhar number and pan linked to sbi account
topic is: ['account']
reasons are: []

 i want new savings account in this paperless bank
topic is: ['account']
reasons are: []

 is it possible
topic is: []
reasons are: []

why is your app slow and buggy
topic is: ['app']
reasons are: ['buggi']

 need faster performance like phonepe
topic is: []
reasons are: []

now a days its working too slow
topic is: []
reasons are: []

what update version is tis
topic is: ['updat', 'version']
reasons are: []

 not working tis app kindly do neatly
topic is: ['app']
reasons are: ['not working']

after creating the account
topic is: ['account']
reasons are: []

 i get a popup saying
topic is: []
reasons are: ['popup']

 identity verrified
topic is: []
rea

 and i dont know where to find the customer care
topic is: ['customer care']
reasons are: []

they cant connect to upi and always says cant be connected try again later
topic is: ['upi']
reasons are: []

this app is irritating me as i have to register everyday for my device it automatically logs me off everyday please fix repeatedly registration of device everytime
topic is: ['app']
reasons are: []

i wish to invest in mutual funds through dbs
topic is: []
reasons are: []

  now when i go to mutual funds section it takes me to verify kyc and then takes request online
topic is: ['kyc']
reasons are: []

 it says it will be completed in  days
topic is: []
reasons are: []

  fact is
topic is: []
reasons are: []

 its more than  days and still its just taking request from me and nothing has been done
topic is: []
reasons are: []

  also its so so difficult to connect to customer care
topic is: ['customer care']
reasons are: []

 is it that we can dont speak to a natural person in dbs findin

reasons are: []

 the app keeps running in the background and crashes
topic is: ['app']
reasons are: ['crash']

 happens  times on a daily basis
topic is: []
reasons are: []

even when i am using latest version
topic is: ['version']
reasons are: []

i am getting message like we have detected you are using old version 
topic is: ['version']
reasons are: []

i think there is some issue with your detection system
topic is: []
reasons are: []

app is really slow to use
topic is: ['app']
reasons are: []

 interface must be smooth and fast
topic is: []
reasons are: []

 please resolve this issue as early as possible
topic is: []
reasons are: []

 need to register after every update
topic is: ['updat']
reasons are: []

 this is not a good thing
topic is: []
reasons are: []

there was an option to add money using debit card which is not available now
topic is: ['debit card']
reasons are: []

 will it be available in the next update i can see the other options to add money
topic is: ['updat']
r

 kindly add transaction history in simple interface with full transaction detail
topic is: []
reasons are: []

 so that easy to take screenshot and add share option
topic is: []
reasons are: []

it keeps registering the same device every few days as a new one
topic is: []
reasons are: []

 send sms again and again
topic is: []
reasons are: []

 customer care had assured that its a technical issue and will be sorted out soon
topic is: ['customer care']
reasons are: []

 but it continues
topic is: []
reasons are: []

 have to register the device again each time i try to login
topic is: ['login']
reasons are: []

 very frustrating
topic is: []
reasons are: ['frustrat']

 now it is not working at all
topic is: []
reasons are: ['not working']

 not login in
topic is: ['login']
reasons are: []

easier way to transfer money
topic is: []
reasons are: ['transfer']

the banking facilities are good but the app is worst
topic is: ['app']
reasons are: []

the web version is still better and fast
to


hasslefree banking system
topic is: []
reasons are: []

useful and easy
topic is: []
reasons are: []

good
topic is: []
reasons are: []

i liked but to activate debit card it is taking long time
topic is: ['debit card']
reasons are: []

banking made damn easy love you dbs keep rocking 
topic is: []
reasons are: []

please add option to see average monthlyquarterly balance
topic is: ['balanc']
reasons are: []

good
topic is: []
reasons are: []

good bank
topic is: []
reasons are: []

very good
topic is: []
reasons are: []

nice services bt where to do fingerprint verification
topic is: []
reasons are: ['servic']

very good app
topic is: ['app']
reasons are: []

good
topic is: []
reasons are: []

good
topic is: []
reasons are: []

very useful to transfer money to others within minute
topic is: []
reasons are: ['transfer']

and intrest level is good
topic is: []
reasons are: []

i am new here and st time
topic is: []
reasons are: []

 please consider user feedback very valuable
topic is:

reasons are: []

ok
topic is: []
reasons are: []

excellent app
topic is: ['app']
reasons are: []

 
topic is: []
reasons are: []

its nice bank for online payment
topic is: []
reasons are: []

excellent
topic is: []
reasons are: []

best online bank
topic is: []
reasons are: []

all type payments is good
topic is: []
reasons are: []

great ui and navigation
topic is: []
reasons are: []

good
topic is: []
reasons are: []

good
topic is: []
reasons are: []

awesome
topic is: []
reasons are: []

 very easy and very convenient
topic is: []
reasons are: []

good experience
topic is: []
reasons are: []

very good
topic is: []
reasons are: []

good
topic is: []
reasons are: []

nice app and nice bank
topic is: ['app']
reasons are: []

smooth
topic is: []
reasons are: []

very good
topic is: []
reasons are: []

helpful
topic is: []
reasons are: []

very supportive digital banking app
topic is: ['app']
reasons are: []

you say this is our last update of  every time you update the app
topic is:

topic is: ['app']
reasons are: []

wow awesome banking app
topic is: ['app']
reasons are: []

 overall is good
topic is: []
reasons are: []

i didnt find word for regarding be this golden appdbs
topic is: []
reasons are: []

i love this app
topic is: ['app']
reasons are: []

ui and easy to use 
topic is: []
reasons are: []

good usability
topic is: []
reasons are: []

 i like the dragon picture
topic is: []
reasons are: []

simply great
topic is: []
reasons are: []

thanks digibank
topic is: []
reasons are: []

best bank
topic is: []
reasons are: []

but i get everday a msg of to update the the
topic is: ['updat']
reasons are: []

 but i link to the link it shows nothing
topic is: []
reasons are: []

what happened is that update coming or not
topic is: ['updat']
reasons are: []

very good
topic is: []
reasons are: []


topic is: []
reasons are: []

its good approach to improve technically n with our finger were ever we are we can create our account non of other bank provide this facili

reasons are: ['servic']

 nice app to access my account
topic is: ['app', 'account']
reasons are: []

its really nice
topic is: []
reasons are: []

very good and efficient app
topic is: ['app']
reasons are: []

          
topic is: []
reasons are: []

call me
topic is: []
reasons are: []

my best companion for banking solutions
topic is: []
reasons are: []

good one
topic is: []
reasons are: []

bast app
topic is: ['app']
reasons are: []

osam
topic is: []
reasons are: []

required more improvement
topic is: []
reasons are: []

it is the best to transfer and get money
topic is: []
reasons are: ['transfer']

good
topic is: []
reasons are: []

very good banking app
topic is: ['app']
reasons are: []

 i have been using it since  months and i have to complete kyc in  days but the problem i am facing here is there is no kyc point in hyderabad sooo plz make it available in all city
topic is: ['kyc']
reasons are: ['problem', 'face']

its hangs dont use it
topic is: []
reasons are: []

dbs ban

good service and easy to use
topic is: []
reasons are: ['servic']

good app
topic is: ['app']
reasons are: []

so essay banking
topic is: []
reasons are: []

this app is very helpful to us
topic is: ['app']
reasons are: []

i have forgot password and i reseted the password
topic is: []
reasons are: []

 every time i login it is showing reset password to maintain security youll have to reset password
topic is: ['login', 'secur']
reasons are: ['every time', 'reset']

 so again i reseted the password but it is telling again to reset
topic is: []
reasons are: ['reset']

 the loop is continuing but not letting me to login
topic is: ['login']
reasons are: []

 i gave five star for my banking experience
topic is: []
reasons are: []

 thank you
topic is: []
reasons are: []

awesome bank yaar sup 
topic is: []
reasons are: []

er
topic is: []
reasons are: []

very good
topic is: []
reasons are: []

very excellent and easy to use this asias safest bank
topic is: []
reasons are: []

 a great expe

In [182]:
sutility.showTopicCounts()
print('---------------------------------')
sutility.showReasonDict()

[('app', 806), ('account', 240), ('login', 186), ('debit card', 142), ('upus', 84), ('version', 40), ('atm', 34), ('customer care', 32), ('kyc', 30), ('balance', 26), ('customer service', 22), ('debit cards', 16), ('look', 14), (None, 12), ('offer', 12), ('speed', 12), ('security', 10), ('internet', 8), ('biometric', 8), ('charge', 6), ('signup', 6), ('international transactions', 2), ('deals and offers', 2), ('another bank', 2), ('back option', 2), ('postal code', 2)]
---------------------------------
topic:  app
[('open', 48), ('not working', 32), ('crash', 18), ('every time', 16), ('change', 10), ('bad', 10), ('service', 10), ('stop', 8), ('popup', 8), ('reset', 8), ('frustrate', 6), ('something went wrong', 6), ('unstable', 6), ('screen', 4), ('suck', 4), ('stuck', 4), ('transfer', 4), ('buggy', 4), ('deposite', 2), ('broke', 2), ('incorrect', 2), ('force', 2), ('fix it', 2), ('worst experience', 2)]

topic:  account
[('open', 52), ('service', 12), ('transfer', 12), ('bad', 6), ('c

In [48]:
sutility.showTopicCounts()
# sentList = commentsDocument.map(getSentiment)

In [105]:
l = list(commentsDocument)
p = list(processed_doc)

length = len(l)

for i in range(length):
    print(l[i])
    print(p[i])
    print()

i am not able to add amount from another upi acoount or also not able to transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id
i am cant add amount from another upi acoount or also cant transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id

Now i am facing another problem..i can't login don't know why solve the problem as soon as possible
now i am facing another problem

The lastest update broke the app.
i cant login dont know why solve the problem as soon as possible

Kya yr debit card se funds load nai kar sakte isme...koi kaam ka nai hai ye bank fir
the lastest update broke the app

Sir app is not opening and ifsc of most of the bank don't reflect. it was my favorite app but not now
kya yr debit card se funds load nai kar sakte isme

Hating your Services. Your Technical issues were never ending & Highly absence of Technological advances. Now, required to Stop you

too slow

Great
something went wrong error

Good app. One app for all kind of transaction. Only problem is app is not too fast. Lags a lot.
repeatedly asking for upi registration even after registering for upi

Banking experience is good ,but apps is too much slow
 doesnt allow to skip

Unable to log in. It's showing Not secure network.
 need to register for new upi

Please provide cheque book
 everytime i need to use this app

How to apply dbs personal loan.Show in apps
 ended up having  upi ids

That's good
thank you for stopping collection of data

Everything is good but the only problem is very poorly built UI
what a poor service from dbs

It can't take proper otp
i trans money from dbs to my other bank 

Paper less banking good progress
money got debited from dbs account but deposited to my other bank account 

Why not add any card..
its fraud

When compare with other banking apps dbs is quiet easy to use...
the verification by otp does not work

Not bad
 so whats the point



In [106]:
dump = list(zip(commentsDocument, processed_doc))
length = len(dump)
for i in range(length):
    print(dump[i][0])
    print(dump[i][1])
    print(sentList.iloc[i])
    print()

i am not able to add amount from another upi acoount or also not able to transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id
i am cant add amount from another upi acoount or also cant transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id


NameError: name 'sentList' is not defined

In [107]:
dictionary = gensim.corpora.Dictionary(processed_doc)
# dictionary.filter_extremes(no_below=5, no_above=0.5, keep_n=100000)
bow_corpus = [dictionary.doc2bow(doc) for doc in processed_doc]

NameError: name 'gensim' is not defined

In [None]:
from gensim import corpora, models
corpus_tfidf = models.TfidfModel(bow_corpus)[bow_corpus]

In [None]:
lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=5, id2word=dictionary, passes=2, workers=2)

In [None]:
# indexFor = 5
# print('..COMMENT :', commentsDocument[indexFor])
# print('\n..CORPOS :', processed_doc[indexFor])
# # print('\n..BOW:', bow_corpus[indexFor])
# for index, score in sorted(lda_model_tfidf[bow_corpus[indexFor]], key=lambda tup: -1*tup[1]):
#     print("\nScore: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

In [None]:
topics = lda_model_tfidf.print_topics(num_words=4)
for topic in topics:
    print(topic)

In [None]:
# print(processed_doc.values)

In [None]:
l = tokenDictWithPosNegSentiment(list(sentList), list(processed_doc))


In [None]:
# print(sortedMostPos(l))

In [None]:
from showBarGraph import *
# show positive bar graph
showBarCharForSentiment(sortedMostPos(l), pos=True)

In [None]:
# print(sortedMostNeg(l))
# show positive bar graph
showBarCharForSentiment(sortedMostNeg(l), pos=False)

In [None]:
showPiChart(sortedMostFreq(l))

In [None]:
# showTempBarChart()