In [1]:
import nltk
import pandas as pd

In [2]:
comments = pd.read_excel('Notebook_Data.xlsx')
comments.head()

Unnamed: 0,TopicName,Comment
0,What is going well and why?,The atmosphere in Wayne is more relaxed and it...
1,What can be improved and how?,You already know who I am based on the first f...
2,Other,I've been very loyal to this company and have ...
3,What is going well and why?,It is good that we don't have separate resourc...
4,What can be improved and how?,Working on 2 releases and production support a...


# Word and Sentence Tokenizer

In [3]:
from nltk.tokenize import sent_tokenize, word_tokenize

comments['Sentence Tokens'] = comments['Comment'].apply(sent_tokenize)
comments['Word Tokens'] = comments['Comment'].apply(word_tokenize)
#print(sent_tokenize(comments['Comment'][0]))
comments['Sentence Tokens'][0]

['The atmosphere in Wayne is more relaxed and it seems people are happier.',
 'Some leadership changes appear to have started this trend.']

In [4]:
comments['Word Tokens'][0]

['The',
 'atmosphere',
 'in',
 'Wayne',
 'is',
 'more',
 'relaxed',
 'and',
 'it',
 'seems',
 'people',
 'are',
 'happier',
 '.',
 'Some',
 'leadership',
 'changes',
 'appear',
 'to',
 'have',
 'started',
 'this',
 'trend',
 '.']

In [5]:
comments.head()

Unnamed: 0,TopicName,Comment,Sentence Tokens,Word Tokens
0,What is going well and why?,The atmosphere in Wayne is more relaxed and it...,[The atmosphere in Wayne is more relaxed and i...,"[The, atmosphere, in, Wayne, is, more, relaxed..."
1,What can be improved and how?,You already know who I am based on the first f...,[You already know who I am based on the first ...,"[You, already, know, who, I, am, based, on, th..."
2,Other,I've been very loyal to this company and have ...,[I've been very loyal to this company and have...,"[I, 've, been, very, loyal, to, this, company,..."
3,What is going well and why?,It is good that we don't have separate resourc...,[It is good that we don't have separate resour...,"[It, is, good, that, we, do, n't, have, separa..."
4,What can be improved and how?,Working on 2 releases and production support a...,[Working on 2 releases and production support ...,"[Working, on, 2, releases, and, production, su..."


# Stop Words

In [6]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))
def filter_stop(word_tokens):
    filtered_words = []
    for w in word_tokens:
        if w not in stop_words:
            filtered_words.append(w)
    return filtered_words
#filtered_sentence = [w for w in word_tokens if not w in stop_words]
comments['Filtered Words'] = comments['Word Tokens'].apply(filter_stop)
comments['Filtered Words'][0]

['The',
 'atmosphere',
 'Wayne',
 'relaxed',
 'seems',
 'people',
 'happier',
 '.',
 'Some',
 'leadership',
 'changes',
 'appear',
 'started',
 'trend',
 '.']

In [7]:
comments.head()

Unnamed: 0,TopicName,Comment,Sentence Tokens,Word Tokens,Filtered Words
0,What is going well and why?,The atmosphere in Wayne is more relaxed and it...,[The atmosphere in Wayne is more relaxed and i...,"[The, atmosphere, in, Wayne, is, more, relaxed...","[The, atmosphere, Wayne, relaxed, seems, peopl..."
1,What can be improved and how?,You already know who I am based on the first f...,[You already know who I am based on the first ...,"[You, already, know, who, I, am, based, on, th...","[You, already, know, I, based, first, question..."
2,Other,I've been very loyal to this company and have ...,[I've been very loyal to this company and have...,"[I, 've, been, very, loyal, to, this, company,...","[I, 've, loyal, company, worked, hard, ., For,..."
3,What is going well and why?,It is good that we don't have separate resourc...,[It is good that we don't have separate resour...,"[It, is, good, that, we, do, n't, have, separa...","[It, good, n't, separate, resources, releases,..."
4,What can be improved and how?,Working on 2 releases and production support a...,[Working on 2 releases and production support ...,"[Working, on, 2, releases, and, production, su...","[Working, 2, releases, production, support, ti..."


# Parts of Speech

In [8]:
pos = []
for comment in comments['Filtered Words']:
     pos.append(nltk.pos_tag(comment))

comments['POS'] = pos
comments['POS'][0]

[('The', 'DT'),
 ('atmosphere', 'JJ'),
 ('Wayne', 'NNP'),
 ('relaxed', 'VBD'),
 ('seems', 'VBZ'),
 ('people', 'NNS'),
 ('happier', 'RBR'),
 ('.', '.'),
 ('Some', 'DT'),
 ('leadership', 'NN'),
 ('changes', 'NNS'),
 ('appear', 'VBP'),
 ('started', 'VBN'),
 ('trend', 'NN'),
 ('.', '.')]

In [9]:
comments.head()

Unnamed: 0,TopicName,Comment,Sentence Tokens,Word Tokens,Filtered Words,POS
0,What is going well and why?,The atmosphere in Wayne is more relaxed and it...,[The atmosphere in Wayne is more relaxed and i...,"[The, atmosphere, in, Wayne, is, more, relaxed...","[The, atmosphere, Wayne, relaxed, seems, peopl...","[(The, DT), (atmosphere, JJ), (Wayne, NNP), (r..."
1,What can be improved and how?,You already know who I am based on the first f...,[You already know who I am based on the first ...,"[You, already, know, who, I, am, based, on, th...","[You, already, know, I, based, first, question...","[(You, PRP), (already, RB), (know, VBP), (I, P..."
2,Other,I've been very loyal to this company and have ...,[I've been very loyal to this company and have...,"[I, 've, been, very, loyal, to, this, company,...","[I, 've, loyal, company, worked, hard, ., For,...","[(I, PRP), ('ve, VBP), (loyal, JJ), (company, ..."
3,What is going well and why?,It is good that we don't have separate resourc...,[It is good that we don't have separate resour...,"[It, is, good, that, we, do, n't, have, separa...","[It, good, n't, separate, resources, releases,...","[(It, PRP), (good, JJ), (n't, RB), (separate, ..."
4,What can be improved and how?,Working on 2 releases and production support a...,[Working on 2 releases and production support ...,"[Working, on, 2, releases, and, production, su...","[Working, 2, releases, production, support, ti...","[(Working, VBG), (2, CD), (releases, NNS), (pr..."


# Sentiment Analysis using Afinn Word List

In [10]:
from afinn import Afinn
afinn = Afinn()
print(afinn.score(comments['Comment'][0]))

3.0


In [11]:
print(afinn.score(comments['Comment'][5]))

2.0


In [12]:
afinn_sent = [afinn.score(comment) for comment in comments['Comment']]
comments['AFINN Score'] = afinn_sent
comments[['Comment', 'AFINN Score']]

Unnamed: 0,Comment,AFINN Score
0,The atmosphere in Wayne is more relaxed and it...,3.0
1,You already know who I am based on the first f...,3.0
2,I've been very loyal to this company and have ...,2.0
3,It is good that we don't have separate resourc...,5.0
4,Working on 2 releases and production support a...,4.0
5,3 day telework will definitely help manage wor...,2.0
6,Project is going well most people are committ...,1.0
7,Need to improve work life balance and minimize...,3.0
8,UPS is changing to accommodate the needs for u...,9.0
9,The QPR process could be improved better bei...,8.0


In [13]:
all_words = []
for comment in comments['Filtered Words']:
     for word in comment:
            all_words.append(word)

all_words = nltk.FreqDist(all_words)
all_words.most_common(25)

[('.', 53),
 ('I', 11),
 ('work', 11),
 ('We', 9),
 ('well', 9),
 ('new', 8),
 ('many', 8),
 ('UPS', 8),
 ('team', 8),
 ('people', 7),
 ("n't", 7),
 ('always', 6),
 ('good', 5),
 ('time', 5),
 ('company', 4),
 ('get', 4),
 ('opportunities', 4),
 ('way', 4),
 ('going', 4),
 ('within', 4),
 ("'s", 4),
 ('technologies', 4),
 ('The', 3),
 ('leadership', 3),
 ('leaving', 3)]

# Applying Stemming and Investigating AFINN more

In [14]:
from nltk.stem import PorterStemmer

ps = PorterStemmer()
stem_scores = []
for comment in comments['Word Tokens']:
    total_score = 0
    for word in comment:
        word = ps.stem(word)
    sentence = ' '.join(comment)
    score = afinn.score(sentence)
    total_score+=score
    stem_scores.append(total_score)
    
comments['AFINN scores by stemming'] = stem_scores
comments['AFINN scores by stemming']

0      3.0
1      3.0
2      2.0
3      5.0
4      4.0
5      2.0
6      1.0
7      3.0
8      9.0
9      8.0
10    11.0
11     2.0
12     6.0
13     7.0
14     7.0
15     4.0
16     2.0
17     0.0
18     3.0
19     7.0
20     3.0
21     3.0
22     2.0
23     0.0
24    17.0
25     9.0
26     0.0
27     1.0
Name: AFINN scores by stemming, dtype: float64

In [15]:
comments

Unnamed: 0,TopicName,Comment,Sentence Tokens,Word Tokens,Filtered Words,POS,AFINN Score,AFINN scores by stemming
0,What is going well and why?,The atmosphere in Wayne is more relaxed and it...,[The atmosphere in Wayne is more relaxed and i...,"[The, atmosphere, in, Wayne, is, more, relaxed...","[The, atmosphere, Wayne, relaxed, seems, peopl...","[(The, DT), (atmosphere, JJ), (Wayne, NNP), (r...",3.0,3.0
1,What can be improved and how?,You already know who I am based on the first f...,[You already know who I am based on the first ...,"[You, already, know, who, I, am, based, on, th...","[You, already, know, I, based, first, question...","[(You, PRP), (already, RB), (know, VBP), (I, P...",3.0,3.0
2,Other,I've been very loyal to this company and have ...,[I've been very loyal to this company and have...,"[I, 've, been, very, loyal, to, this, company,...","[I, 've, loyal, company, worked, hard, ., For,...","[(I, PRP), ('ve, VBP), (loyal, JJ), (company, ...",2.0,2.0
3,What is going well and why?,It is good that we don't have separate resourc...,[It is good that we don't have separate resour...,"[It, is, good, that, we, do, n't, have, separa...","[It, good, n't, separate, resources, releases,...","[(It, PRP), (good, JJ), (n't, RB), (separate, ...",5.0,5.0
4,What can be improved and how?,Working on 2 releases and production support a...,[Working on 2 releases and production support ...,"[Working, on, 2, releases, and, production, su...","[Working, 2, releases, production, support, ti...","[(Working, VBG), (2, CD), (releases, NNS), (pr...",4.0,4.0
5,Work/Life Balance/Environment,3 day telework will definitely help manage wor...,[3 day telework will definitely help manage wo...,"[3, day, telework, will, definitely, help, man...","[3, day, telework, definitely, help, manage, w...","[(3, CD), (day, NN), (telework, NN), (definite...",2.0,2.0
6,What is going well and why?,Project is going well most people are committ...,[Project is going well most people are commit...,"[Project, is, going, well, most, people, are, ...","[Project, going, well, people, committed, work...","[(Project, NN), (going, VBG), (well, RB), (peo...",1.0,1.0
7,What can be improved and how?,Need to improve work life balance and minimize...,[Need to improve work life balance and minimiz...,"[Need, to, improve, work, life, balance, and, ...","[Need, improve, work, life, balance, minimize,...","[(Need, NNP), (improve, VB), (work, NN), (life...",3.0,3.0
8,What is going well and why?,UPS is changing to accommodate the needs for u...,[UPS is changing to accommodate the needs for ...,"[UPS, is, changing, to, accommodate, the, need...","[UPS, changing, accommodate, needs, us, great,...","[(UPS, NNP), (changing, VBG), (accommodate, NN...",9.0,9.0
9,What can be improved and how?,The QPR process could be improved better bei...,[The QPR process could be improved better be...,"[The, QPR, process, could, be, improved, bette...","[The, QPR, process, could, improved, better, n...","[(The, DT), (QPR, NNP), (process, NN), (could,...",8.0,8.0


No difference if you stem the words or not.

## More info about AFINN

https://github.com/fnielsen/afinn

Looks like they can look for emoticons!!

def __init__(self, language="en", emoticons=False, word_boundary=True):

In [16]:
afinn.score('This company is great!')

3.0

In [17]:
afinn = Afinn(emoticons=True)
afinn.score('This company is great! :)))))))')

7.0

In [18]:
afinn = Afinn()
afinn.score('I hate this company!')

-3.0

In [19]:
afinn = Afinn(emoticons=True)
afinn.score('I hate this company! :((((((')

-6.0

In [20]:
afinn = Afinn()
afinn.score('I hate this company! :((((((')

-3.0

# LDA Models (Topic Modelling) Latent Dirichlet Allocation

resources:
1.  https://github.com/bmabey/pyLDAvis interesting tool for visualizing to explore

https://nbviewer.jupyter.org/github/bmabey/pyLDAvis/blob/master/notebooks/Movie%20Reviews%2C%20AP%20News%2C%20and%20Jeopardy.ipynb

2.  http://www.analyticsvidhya.com/blog/2016/08/beginners-guide-to-topic-modeling-in-python/
           
3.  http://chdoig.github.io/pygotham-topic-modeling/#/1/3 

4.  https://radimrehurek.com/gensim/  library

5.  http://pythonhosted.org//lda/   library

6.  https://github.com/tdhopper/notes-on-dirichlet-processes notebooks

In [21]:
import gensim

