In [1]:
import numpy as np 
import pandas as pd 
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt

In [2]:
#Import CSV and load into a dataframe.
data = pd.read_csv("DailyComments.csv")
data

Unnamed: 0,Day of Week,comments
0,Monday,"Hello, how are you?"
1,Tuesday,Today is a good day!
2,Wednesday,It's my birthday so it's a really special day!
3,Thursday,Today is neither a good day or a bad day!
4,Friday,I'm having a bad day.
5,Saturday,There' s nothing special happening today.
6,Sunday,Today is a SUPER good day!


In [3]:
#I have decided to use the vader method from NLTK as it seems to be an excellent out-of-the-box solution for
#sentiment analysis. One of the reasons it stood out to me was its ability to work with smaller datasets where an
#analyst really doesnt have much training data, like our dataset this week. The one issue that I noticed upon the completion 
#of this assignment is that statements like "Hello, how are you?" are absolutely neutral statements and the constraints 
#of the assignment are that the method needs to bin a string as either positive or negative, so I added an extra statement
#to handle a situation where the neutral coefficient is 1.00.

In [4]:
#Instantiate our sentiment analyzer
sid = SentimentIntensityAnalyzer()

In [5]:
#Define the parameters for what constitutes as a positive, negative, or neutral statement. 
def find_sentiment(pos, neg, neu):
    sentiment = ""
    if pos > neg:
        sentiment = "Positive"
    elif neg > pos:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
    return sentiment

In [6]:
#Method 1: Give each the pos, neg, and neu values a field each in the dataset
data['neg'] = data.apply(lambda row: (sid.polarity_scores(row['comments'])['neg']), axis = 1)
data['pos'] = data.apply(lambda row: (sid.polarity_scores(row['comments'])['pos']), axis = 1)
data['neu'] = data.apply(lambda row: (sid.polarity_scores(row['comments'])['neu']), axis = 1)
data['sentiment'] = data.apply(lambda row: find_sentiment(row['pos'], row['neg'], row['neu']), axis = 1)

In [7]:
data

Unnamed: 0,Day of Week,comments,neg,pos,neu,sentiment
0,Monday,"Hello, how are you?",0.0,0.0,1.0,Neutral
1,Tuesday,Today is a good day!,0.0,0.516,0.484,Positive
2,Wednesday,It's my birthday so it's a really special day!,0.0,0.336,0.664,Positive
3,Thursday,Today is neither a good day or a bad day!,0.508,0.0,0.492,Negative
4,Friday,I'm having a bad day.,0.538,0.0,0.462,Negative
5,Saturday,There' s nothing special happening today.,0.361,0.0,0.639,Negative
6,Sunday,Today is a SUPER good day!,0.0,0.723,0.277,Positive


In [8]:
#Method 2: showing only the final determination if a string is pos, neg, or neutral.
data['sentiment'] = data.apply(lambda row: find_sentiment((sid.polarity_scores(row['comments'])['pos']), 
                                                          (sid.polarity_scores(row['comments'])['neg']), 
                                                          (sid.polarity_scores(row['comments'])['neu'])), 
                               axis = 1)

In [9]:
data

Unnamed: 0,Day of Week,comments,neg,pos,neu,sentiment
0,Monday,"Hello, how are you?",0.0,0.0,1.0,Neutral
1,Tuesday,Today is a good day!,0.0,0.516,0.484,Positive
2,Wednesday,It's my birthday so it's a really special day!,0.0,0.336,0.664,Positive
3,Thursday,Today is neither a good day or a bad day!,0.508,0.0,0.492,Negative
4,Friday,I'm having a bad day.,0.538,0.0,0.462,Negative
5,Saturday,There' s nothing special happening today.,0.361,0.0,0.639,Negative
6,Sunday,Today is a SUPER good day!,0.0,0.723,0.277,Positive
