# [Topic Modeling](https://towardsdatascience.com/topic-modeling-and-latent-dirichlet-allocation-in-python-9bf156893c24)

In [1]:

from pprint import pprint
import numpy as np
np.random.seed(2018)
import nltk

# reload only imported modules before run
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

'''
pantree bank tags: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
standford source: https://nlp.stanford.edu/software/CRF-NER.shtml
standford online text tree generater: http://nlp.stanford.edu:8080/parser/index.jsp
'''

'\npantree bank tags: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html\nstandford source: https://nlp.stanford.edu/software/CRF-NER.shtml\nstandford online text tree generater: http://nlp.stanford.edu:8080/parser/index.jsp\n'

### constants

In [2]:
start_date = '01-11-2018' #  09-Sep-2018 
end_date = '30-12-2018' # 01-Oct-2018

### We will perform the following steps:

#### 1. Tokenization: 
Split the text into sentences and the sentences into words. Lowercase the words and remove punctuation.

In [3]:
# def tokenize(text):
#     return gensim.utils.simple_preprocess(text)

#### 2. Remove small words:
Words that have fewer than 3 characters are removed.

In [4]:
# def isShortWord(token):
#     return len(token) < 3

#### 3. Remove stopwords:
All stopwords are removed.

In [5]:
# def isStopWord(token):
#     return token in gensim.parsing.preprocessing.STOPWORDS

#### 4. lemmatized +  Stemming:
Words are lemmatized — words in third person are changed to first person and verbs in past and future tenses are changed into present.

Words are stemmed — words are reduced to their root form.

In [6]:
# from nltk.corpus import wordnet as wn
# from nltk import pos_tag, word_tokenize
# from nltk.stem.porter import *
# from textblob import TextBlob


# def lemmatize_stemming(token):
#     stemmer = PorterStemmer() #gensim.parsing.stem_text(tokenize) #
#     for word, tag in pos_tag(word_tokenize(token)):
#         wntag = tag[0].lower()
#         wntag = wntag if wntag in ['a', 'r', 'n', 'v'] else None
#         lemma = WordNetLemmatizer().lemmatize(word, wntag) if wntag else word
#         return TextBlob(lemma).words[0].singularize()
#     return ''

#### 5. Replace Emojis:

In [7]:
# ## get emoji characters file path
# def getEmojis():
#     from dataSource import getEmojis
#     comments_file_path = getDataSourcePathFor(emoji_path)
#     return getEmojis(comments_file_path)#.head()

In [8]:
# def hasEmojicon(token):
    
# def replaceEmojicons(token, emojies):
#     pass
    

## Data Source

In [9]:
# def getNounList(sentence='', tokens = []):
#     from nltk import word_tokenize, pos_tag
#     if len(tokens) > 0:    
#         nouns = [token for token, pos in pos_tag(tokens) if pos.startswith('NN')]
#         return nouns
#     else:
#         nouns = [token for token, pos in pos_tag(word_tokenize(sentence)) if pos.startswith('NN')]
#         return nouns

In [10]:
# text = 'I Have done reviewing, Will be seeing by them'
# print(preprocess(text))

In [11]:
# key is file storage path
# def getDataSourcePathFor(keyForFilePath):
#     import json
#     import os
    
#     config_file_path = os.environ[virtual_env] + '/config.json'

#     with open(config_file_path) as f:
#         config = json.load(f)
#         if keyForFilePath in config:# ['comments_path', 'output_path']
#             return config[keyForFilePath] 
#     return None

In [12]:
# ## get list of comments from stored input csv file
# import dataSource 

# def getListOfComments():
#     ### This is to get csv rows between given dates
#     comments_file_path = getDataSourcePathFor(comments_path)
#     commentsList = getComments(comments_file_path, start_date, end_date) #['comments'] 
#     commentsList = commentsList.sort_values(by='ratings', ascending=True)['comments'] 
#     print('Total number of comments: %s between %s and %s' % (len(commentsList), start_date, end_date))
#     return commentsList

### Text processing

In [13]:
# import re

# def filterWord(token):
#     if not (isStopWord(token) or isShortWord(token)):
#         lemmaWord = lemmatize_stemming(token)
#         if not isShortWord(lemmaWord):
#             return ("".join(re.findall("[a-zA-Z]+", lemmaWord)).lower())
#     return None 
    
# def filters(sentence):
# #     print('..given comments:', sentence)
#     result = []
#     #nouns = getNounList(sentence) # fetch only Nouns
#     for token in tokenize(sentence):#nouns: ###tokenize(text):
#         result.append(filterWord(token))
#     return result

# def filterWords(tokens):
#     return list(filter(lambda token: filterWord(token), tokens))

In [14]:
# def preprocessCommentDocument(document):
#     return list(map(lambda sentence: filters(sentence), document))

### Word Cloud

In [15]:
from wordCloud import showWordCloud
def showCloud(topicCollection):
    showWordCloud(topicCollection) 

### Bag of Words on the Data set

In [16]:
def bow(processed_docs):
    dictionary = gensim.corpora.Dictionary(processed_docs)
    bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]
    return bow_corpus

### Sentiment

In [17]:
from getSentiment import getSentiment

def getSentFromCommentList(commentList):
    sentimentList = []
    for sentence in commentList:
        sentiment = getSentiment(sentence)
        sentimentList.append(sentiment)
    return sentimentList


def isNegative(sentiment):
    return sentiment > 0.5


def tokenDictWithPosNegSentiment(sentimentList, document):
    sentDict = {}
    size = range(len(document))
    
    for i in size:
        for token in document[i]:
            v = (0, 0, 0) # (neg, pos, freq)
            if token in sentDict:
                v = sentDict[token]
                
            if isNegative(sentimentList[i]):
                v = (v[0]+1, v[1], v[2]+1)
            else:
                v = (v[0], v[1]+1, v[2]+1)
            sentDict[token] = v
    sentDict.pop('', None)
    return sentDict 

### Main()

In [165]:
import SDataSource
import SConstants
from SWordList import SWordList
from sUtility import SUtility
from sPreprocessor import SPreprocessor


In [183]:
sutility = SUtility()
spreprocessor = SPreprocessor()

# SPreprocessor.resolveDependancy(trieCommon)
dateBetween = [start_date, end_date]
commentsDocument = SDataSource.getListOfComments(dateBetween).head(1000)

# document preprocessing, cleaning, filtering, replacement, spliting into multiple senetnces from one
processed_doc = []
for sentence in commentsDocument:
    for each in spreprocessor.docCleaning(sentence):
        processed_doc.append(each)


Total number of comments: 972 between 01-11-2018 and 30-12-2018


In [188]:
def makeDict(sentence):
    t = spreprocessor.parseToTokens(trieTopic, sentence)
    r = spreprocessor.parseToTokens(trieNReason, sentence)
    print(sentence)
    print('topic is:', t)
    print('reasons are:', r)
    print()
    sutility.dump(t, r)

In [189]:
wordList_file = SDataSource.getDataSourcePathFor(SConstants.wordFile_path)
trieCommon = SWordList(wordList_file)

topic_file = SDataSource.getDataSourcePathFor(SConstants.topic_path)
trieTopic = SWordList(topic_file)

n_reason_file = SDataSource.getDataSourcePathFor(SConstants.n_reason_path)
trieNReason = SWordList(n_reason_file)

..SWordList constructor called 
..SWordList constructor called 
..SWordList constructor called 


In [190]:
l = ['crashing', 'crashes']
print(spreprocessor.filterWords(l))
    

['crash', 'crash']


In [191]:
for each in processed_doc:
    makeDict(each)

i am cant add amount from another upi acoount or also cant transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id
topic is: ['upus', 'account']
reasons are: ['add amount', 'transfer', 'inactive']
now i am facing another problem
topic is: []
reasons are: []
i cant login dont know why solve the problem as soon as possible
topic is: ['login']
reasons are: []
the lastest update broke the app
topic is: ['app']
reasons are: ['broke']
kya yr debit card se funds load nai kar sakte isme
topic is: ['debit card']
reasons are: []
koi kaam ka nai hai ye bank fir
topic is: []
reasons are: []
sir app is not open and ifsc of most of the bank dont reflect
topic is: ['app']
reasons are: ['open']
 it was my favorite app but not now
topic is: ['app']
reasons are: []
hating your services
topic is: []
reasons are: ['service']
 your technical issues were dont ending  highly absence of technological advances
topic is: []
reasons are: ['technical issu

reasons are: []
 digi must return the feature
topic is: []
reasons are: []
wouldnt let me login
topic is: ['login']
reasons are: []
 keeps telling oops something went wrong
topic is: []
reasons are: ['something went wrong']
try again from a safer network
topic is: []
reasons are: []
app was working good until today 
topic is: ['app']
reasons are: []
app is crashing after the update 
topic is: ['app']
reasons are: ['crash']
please resolve it
topic is: []
reasons are: []
no star
topic is: []
reasons are: []
 new version we dont have option to credit the wallet with debit card and credit card
topic is: ['version', 'debit card']
reasons are: []
 if so what is use of this
topic is: []
reasons are: []
all will not have net banking and upi option
topic is: ['upus']
reasons are: []
can u explain what is use of this
topic is: []
reasons are: []
 why we need to use your app
topic is: ['app']
reasons are: []
opened my account and i forgot my password now i am trying to change password i am receiv

reasons are: []
i was asked to run an update by the app and once i did so i am unable to login as it says the pwd is wrong and when i try to reset the pwd it says both my regd mobile num and email id are incorrect
topic is: ['app', 'login']
reasons are: ['reset', 'incorrect']
 ridiculous app update
topic is: ['app']
reasons are: []
 it was working just fine before i was forced to update
topic is: []
reasons are: []
finger sensor doesnt work on vivo v pro
topic is: []
reasons are: []
im really frustrated with this app i v reset the password for once with confirmation email and message but still i cant login in or sign in feeling like to close the account i want to be fix
topic is: ['app', 'login', 'account']
reasons are: ['frustrate', 'reset']
worst experience
topic is: []
reasons are: ['worst experience']
 this app is not something that such a huge brand should have in their pocket
topic is: ['app']
reasons are: []
 please change your engineering teamitt services
topic is: []
reasons a

please dont open account in this bluddy bank guys they steal your money  i have dont seen such a poor service and customer care is waste and they wont solve the issues from this bank i lost my money
topic is: ['account', 'customer care']
reasons are: ['open', 'service']
in this app i dont see the option of activate your physical debit card and i got physical debit card so it not use full to me
topic is: ['app', 'debit card']
reasons are: []
the app isnt allowing to open the menu and sticks to the lets go page and even after cancelling the lets go page the app isnt responding 
topic is: ['app']
reasons are: ['open']
truly a messy app
topic is: ['app']
reasons are: []
not even liked a little bit
topic is: []
reasons are: []
i cant reset my password and username 
topic is: []
reasons are: ['reset']
 the app stuck
topic is: ['app']
reasons are: ['stuck']
 please help me
topic is: []
reasons are: []
 devoloper please help me
topic is: []
reasons are: []
every time transfer problems
topic is

guys its service is very worst i have been using this account suddenly they freeze my account without reasons telling it is under investigation at least they are unable to tell the reason and tat period when it is going to solve
topic is: ['account']
reasons are: ['service']
 thank god i was planning to do fd of  lakhs not yet done or else i should be wondering around its branches
topic is: []
reasons are: []
 at least solve my problem account is freezed
topic is: ['account']
reasons are: []
when i login to my account
topic is: ['login', 'account']
reasons are: []
 popup tells identification done now get your visa paywave debit card when i proceed and fill in the details
topic is: ['debit card']
reasons are: ['popup']
 it says digibank only select cities
topic is: []
reasons are: []
 and there is no way to cancel all this
topic is: []
reasons are: []
 i am cant use my account
topic is: ['account']
reasons are: []
 x button to close the popup doesnt work
topic is: []
reasons are: ['popu

topic is: []
reasons are: []
 this is not a good thing
topic is: []
reasons are: []
there was an option to add money using debit card which is not available now
topic is: ['debit card']
reasons are: []
 will it be available in the next update i can see the other options to add money
topic is: []
reasons are: []
 but i was looking to add money via debit card
topic is: ['debit card']
reasons are: []
 also there comes a popup frequently stating the application has crashed
topic is: []
reasons are: ['popup', 'crashed']
always giving error while open of the stable app
topic is: ['app']
reasons are: ['open']
i am cant login plz
topic is: ['login']
reasons are: []
help
topic is: []
reasons are: []
this app needs to follow india internet banking apps like sbi axis 
topic is: ['app', 'internet']
reasons are: []
etc no support we cannot expect from them if something went wrong with payment
topic is: []
reasons are: ['something went wrong']
when i login to my account
topic is: ['login', 'account'

 and so i cant use the app at all
topic is: ['app']
reasons are: []
 no other way there
topic is: []
reasons are: []
 i have to close the app
topic is: ['app']
reasons are: []
 i have one query too
topic is: []
reasons are: []
 is there any debit card charges 
topic is: ['debit card']
reasons are: []
the app lags the smoothness
topic is: ['app']
reasons are: []
every time i login the app directly takes me to the upi vpa page
topic is: ['login', 'app', 'upus']
reasons are: ['every time']
 despite uninstall and installing the app again the issue still persists and one cant really do anything unless this bug is fixed
topic is: ['app']
reasons are: []
 kindly ensure you look into it
topic is: ['look']
reasons are: []
not up to mark now
topic is: []
reasons are: []
good app but should have adding money from card option
topic is: ['app']
reasons are: []
 major disadvantages are no branches for this bank
topic is: []
reasons are: []
im cant login
topic is: ['login']
reasons are: []
 from  day

while open account relationship manager told you will get monthly codes on ur mobile after sending sms from ur mobile but still now not recd any codes
topic is: ['account']
reasons are: ['open']
please add this feature in app only so everytime we dont have to send any msg
topic is: ['app']
reasons are: []
crashes every  minutes
topic is: []
reasons are: ['crash']
the dbs bank app wont close even after pressing the back button
topic is: ['app']
reasons are: []
it just remains open forever and am forced to close the app by other means
topic is: ['app']
reasons are: ['open']
hope u tech guys will take conscious of the situation and correct it as soon as possible
topic is: []
reasons are: []
digibank isnt that much digital
topic is: []
reasons are: []
 errors in app cant be accepted from a virtual bank
topic is: ['app']
reasons are: []
everything is ok but the app is asking pancard mandatory those ppl who crossed  now like me are not having if u cn provide any other option for that thn it 

 please start this service again
topic is: []
reasons are: ['service']
good bank
topic is: []
reasons are: []
dg bank is good bat network slo
topic is: []
reasons are: []
nice app
topic is: ['app']
reasons are: []
great
topic is: []
reasons are: []
good app
topic is: ['app']
reasons are: []
 one app for all kind of transaction
topic is: ['app']
reasons are: []
 only problem is app is not too fast
topic is: ['app']
reasons are: []
 lags a lot
topic is: []
reasons are: []
banking experience is good 
topic is: []
reasons are: []
but apps is too much slow
topic is: []
reasons are: []
unable to login
topic is: ['login']
reasons are: []
 its showing not secure network
topic is: []
reasons are: []
please provide cheque book
topic is: []
reasons are: []
how to apply dbs personal loan
topic is: []
reasons are: []
show in apps
topic is: []
reasons are: []
thats good
topic is: []
reasons are: []
everything is good but the only problem is very poorly built ui
topic is: []
reasons are: []
it cant t

 dont listen to other people what they say about your app
topic is: ['app']
reasons are: []
 your app and bank is the best
topic is: ['app']
reasons are: []
 others are not that good a big god bless
topic is: []
reasons are: []
excellent
topic is: []
reasons are: []
wrost app bad app my amount rs  cut
topic is: ['app']
reasons are: ['bad']
give us an inteface like phone pe or paytm
topic is: []
reasons are: []
good bank
topic is: []
reasons are: []
nice banking experience
topic is: []
reasons are: []
the best bank in india
topic is: []
reasons are: []
good
topic is: []
reasons are: []
loved it
topic is: []
reasons are: []
good experience overall as of now
topic is: []
reasons are: []
best bank in world
topic is: []
reasons are: []
 love u digi
topic is: []
reasons are: []
nice
topic is: []
reasons are: []
best
topic is: []
reasons are: []
lovely
topic is: []
reasons are: []
thanks dbs digibank
topic is: []
reasons are: []
excellent
topic is: []
reasons are: []

topic is: []
reasons are

atm card new pin set is very problem
topic is: ['atm']
reasons are: []
i love it very easy but cash saving account nhi khulta and but i love its virtual debit card
topic is: ['account', 'debit card']
reasons are: []
like easy
topic is: []
reasons are: []
app is improving a lot with updates
topic is: ['app']
reasons are: []
 i received a free card and i use it for atm money withdrawal for free
topic is: ['atm']
reasons are: []
 thanks dbs
topic is: []
reasons are: []
good
topic is: []
reasons are: []
good
topic is: []
reasons are: []
love you dbs
topic is: []
reasons are: []
awesome
topic is: []
reasons are: []
very good service
topic is: []
reasons are: ['service']
please add money option in debit card kijiye
topic is: ['debit card']
reasons are: []
good
topic is: []
reasons are: []
really superb bank 
topic is: []
reasons are: []
i loved it
topic is: []
reasons are: []
i hope if u remove the limit on transactions number its added value to you
topic is: []
reasons are: []
and finger pr

app is working well
topic is: ['app']
reasons are: []
unmatchable experience
topic is: []
reasons are: []
good
topic is: []
reasons are: []
awesome banking app
topic is: ['app']
reasons are: []
icant get physical debit card
topic is: ['debit card']
reasons are: []
awesome bank and awesome app
topic is: ['app']
reasons are: []
i love dbs bank
topic is: []
reasons are: []
nice way to hasselfree banking
topic is: []
reasons are: []
respected madamsir
topic is: []
reasons are: []
 we are indian citizens who know to use debit card
topic is: ['debit card']
reasons are: []
our dbs bank also provide both physicalvirtal debit card for payments at onlineoffline 
topic is: ['debit card']
reasons are: []
please restore the option in new version for our dbs app for adding money via physicalvirtual debit card for better banking so that the bank coustmer can use it more often than they use 
topic is: ['version', 'app', 'debit card']
reasons are: []
it is not possible each and every coustmer for add m

In [182]:
sutility.showTopicCounts()
print('---------------------------------')
sutility.showReasonDict()

[('app', 806), ('account', 240), ('login', 186), ('debit card', 142), ('upus', 84), ('version', 40), ('atm', 34), ('customer care', 32), ('kyc', 30), ('balance', 26), ('customer service', 22), ('debit cards', 16), ('look', 14), (None, 12), ('offer', 12), ('speed', 12), ('security', 10), ('internet', 8), ('biometric', 8), ('charge', 6), ('signup', 6), ('international transactions', 2), ('deals and offers', 2), ('another bank', 2), ('back option', 2), ('postal code', 2)]
---------------------------------
topic:  app
[('open', 48), ('not working', 32), ('crash', 18), ('every time', 16), ('change', 10), ('bad', 10), ('service', 10), ('stop', 8), ('popup', 8), ('reset', 8), ('frustrate', 6), ('something went wrong', 6), ('unstable', 6), ('screen', 4), ('suck', 4), ('stuck', 4), ('transfer', 4), ('buggy', 4), ('deposite', 2), ('broke', 2), ('incorrect', 2), ('force', 2), ('fix it', 2), ('worst experience', 2)]

topic:  account
[('open', 52), ('service', 12), ('transfer', 12), ('bad', 6), ('c

In [48]:
sutility.showTopicCounts()
# sentList = commentsDocument.map(getSentiment)

In [105]:
l = list(commentsDocument)
p = list(processed_doc)

length = len(l)

for i in range(length):
    print(l[i])
    print(p[i])
    print()

i am not able to add amount from another upi acoount or also not able to transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id
i am cant add amount from another upi acoount or also cant transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id

Now i am facing another problem..i can't login don't know why solve the problem as soon as possible
now i am facing another problem

The lastest update broke the app.
i cant login dont know why solve the problem as soon as possible

Kya yr debit card se funds load nai kar sakte isme...koi kaam ka nai hai ye bank fir
the lastest update broke the app

Sir app is not opening and ifsc of most of the bank don't reflect. it was my favorite app but not now
kya yr debit card se funds load nai kar sakte isme

Hating your Services. Your Technical issues were never ending & Highly absence of Technological advances. Now, required to Stop you

too slow

Great
something went wrong error

Good app. One app for all kind of transaction. Only problem is app is not too fast. Lags a lot.
repeatedly asking for upi registration even after registering for upi

Banking experience is good ,but apps is too much slow
 doesnt allow to skip

Unable to log in. It's showing Not secure network.
 need to register for new upi

Please provide cheque book
 everytime i need to use this app

How to apply dbs personal loan.Show in apps
 ended up having  upi ids

That's good
thank you for stopping collection of data

Everything is good but the only problem is very poorly built UI
what a poor service from dbs

It can't take proper otp
i trans money from dbs to my other bank 

Paper less banking good progress
money got debited from dbs account but deposited to my other bank account 

Why not add any card..
its fraud

When compare with other banking apps dbs is quiet easy to use...
the verification by otp does not work

Not bad
 so whats the point



In [106]:
dump = list(zip(commentsDocument, processed_doc))
length = len(dump)
for i in range(length):
    print(dump[i][0])
    print(dump[i][1])
    print(sentList.iloc[i])
    print()

i am not able to add amount from another upi acoount or also not able to transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id
i am cant add amount from another upi acoount or also cant transfer from another banks account it shows me benificary account is inactive or major problem not setup a upi id


NameError: name 'sentList' is not defined

In [107]:
dictionary = gensim.corpora.Dictionary(processed_doc)
# dictionary.filter_extremes(no_below=5, no_above=0.5, keep_n=100000)
bow_corpus = [dictionary.doc2bow(doc) for doc in processed_doc]

NameError: name 'gensim' is not defined

In [None]:
from gensim import corpora, models
corpus_tfidf = models.TfidfModel(bow_corpus)[bow_corpus]

In [None]:
lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=5, id2word=dictionary, passes=2, workers=2)

In [None]:
# indexFor = 5
# print('..COMMENT :', commentsDocument[indexFor])
# print('\n..CORPOS :', processed_doc[indexFor])
# # print('\n..BOW:', bow_corpus[indexFor])
# for index, score in sorted(lda_model_tfidf[bow_corpus[indexFor]], key=lambda tup: -1*tup[1]):
#     print("\nScore: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

In [None]:
topics = lda_model_tfidf.print_topics(num_words=4)
for topic in topics:
    print(topic)

In [None]:
# print(processed_doc.values)

In [None]:
l = tokenDictWithPosNegSentiment(list(sentList), list(processed_doc))


In [None]:
# print(sortedMostPos(l))

In [None]:
from showBarGraph import *
# show positive bar graph
showBarCharForSentiment(sortedMostPos(l), pos=True)

In [None]:
# print(sortedMostNeg(l))
# show positive bar graph
showBarCharForSentiment(sortedMostNeg(l), pos=False)

In [None]:
showPiChart(sortedMostFreq(l))

In [None]:
# showTempBarChart()