### Load the data file DailyComments.csv from the Week 4 Data Files into a data frame.

In [2]:
import pandas as pd
# Importing the NaiveBayesAnalyzer classifier from NLTK
from textblob.sentiments import NaiveBayesAnalyzer

In [3]:
#read csv file into Pandas DataFrame
daily_comm = pd.read_csv("DailyComments.csv")
daily_comm.head()

Unnamed: 0,Day of Week,comments
0,Monday,"Hello, how are you?"
1,Tuesday,Today is a good day!
2,Wednesday,It's my birthday so it's a really special day!
3,Thursday,Today is neither a good day or a bad day!
4,Friday,I'm having a bad day.


### Identify a scheme to categorize each comment as positive or negative. You can devise your own scheme or find a commonly used scheme to perform this sentiment analysis. However you decide to do this, make sure to explain the scheme you decide to use.

The TextBlob library is a way to process textual data. It uses a sentiment lexicon (consisting of predefined words) to assign scores for each word, which are then averaged out using a weighted average to give an overall sentence sentiment score. There are three scores that are calculated for each word: polarity (negative vs. positive), subjectivity (objective vs. subjective) and intensity (does it modify the next word?). 

In [4]:
#convenient package for NLP tasks
#TextBlob for sentiment analysis
from textblob import TextBlob

In [5]:
#Example of getting sentiment score for a comment in the dataframe
TextBlob(daily_comm['comments'].iloc[1]).sentiment

Sentiment(polarity=0.875, subjectivity=0.6000000000000001)

### Implement your sentiment analysis with code and display the results. Note: DailyComments.csv is a purposely small file, so you will be able to clearly see why the results are what they are.

In [6]:
#Function for getting sentiment scores of texts/comments in dataframe
#TextBlob.sentiment will return polarity and subjectivity values
def get_sentimentScores(df,text_col):
    polarity_vals = []
    subjectivity_vals = []
    for text in df[text_col]:
        #TextBlob object around comment/text
        blob_text = TextBlob(text)
        
        #returns a named tuple of the form Sentiment(polarity,subjectivity)
        #float between [-1.0,1.0]
        #polarity --> indicates negative sentiment (-1) to positive sentiment (1)
        #subjectivity --> opinion, emotion or judgement
        sent_values = blob_text.sentiment
        
        #Separating out polarity and subjectivity
        polarity = sent_values[0]
        subjectivity = sent_values[1]
        
        #creating lists to append to dataframe
        polarity_vals.append(polarity)
        subjectivity_vals.append(subjectivity)
    
    #creating new columns in dataframe for scores
    df['Polarity'] = polarity_vals
    df['Subjectivity'] = subjectivity_vals
    
    #return updated DataFrame
    return df

In [7]:
def sentiment_label(score):
    #neutral, polarity score = 0
    if score == 0:
        return "Neutral"
    #polarity score > 0, positive sentiment
    elif score > 0:
        return "Positive"
    #polarity score < 0, negative sentiment
    elif score < 0:
        return "Negative"
    return score

In [8]:
def sentiment_assign(df,score_col):
    #apply sentiment_label function on Polarity values in dataframe
    #assess positive or negative and also neutral sentiment
    df['Sentiment'] = df[score_col].apply(lambda score: sentiment_label(score))
    return df

In [9]:
#getting the sentiment scores from TextBlob for the 'comments' in the dataset
daily_comm = get_sentimentScores(daily_comm,'comments')
#getting the sentiment label based on 'Polarity' for the dataset using the sentiment_label function
daily_comm = sentiment_assign(daily_comm,'Polarity')

In [10]:
daily_comm.head()

Unnamed: 0,Day of Week,comments,Polarity,Subjectivity,Sentiment
0,Monday,"Hello, how are you?",0.0,0.0,Neutral
1,Tuesday,Today is a good day!,0.875,0.6,Positive
2,Wednesday,It's my birthday so it's a really special day!,0.446429,0.571429,Positive
3,Thursday,Today is neither a good day or a bad day!,-0.0875,0.633333,Negative
4,Friday,I'm having a bad day.,-0.7,0.666667,Negative


### Testing NaiveBayesAnalyzer

In [11]:
#uses NeiveBayesAnalyzer to classify sentiment of text/comment as pos or neg
def sentiment_analysis(df,text_col):
    class_labels = []
    for text in df[text_col]:
        #applying the Naive Bayes Analayzer
        blob_object = TextBlob(text,analyzer=NaiveBayesAnalyzer())
        #running sentiment analysis with positive and negative
        analysis = blob_object.sentiment
    
        #pulling classification from textblob.sentiments
        classification_label = analysis[0]
        
        #append to list for adding to dataframe
        class_labels.append(classification_label)
        
    df['Sentiment'] = class_labels
    
    #returning classification label for assignment in df
    return df

### Extra Credit

In [25]:
#dataset for IMDB movie reviews
#we will use 'review' column for sentiment analysis
imdb_df = pd.read_csv("IMDB Dataset.csv")
imdb_df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [26]:
#dropping 'sentiment' column in imdb dataset since we will try to find that out with our scoring
imdb_df = imdb_df.drop('sentiment',1)

In [27]:
#applying sentiment analysis on 'review' column in imdb dataset
#getting the sentiment scores from TextBlob for the 'review' in the dataset
imdb_df = get_sentimentScores(imdb_df,'review')
#getting the sentiment label based on 'Polarity' for the dataset using the sentiment_label function
imdb_df = sentiment_assign(imdb_df,'Polarity')

In [28]:
imdb_df.head()

Unnamed: 0,review,Polarity,Subjectivity,Sentiment
0,One of the other reviewers has mentioned that ...,0.023433,0.490369,Positive
1,A wonderful little production. <br /><br />The...,0.109722,0.559343,Positive
2,I thought this was a wonderful way to spend ti...,0.354008,0.65873,Positive
3,Basically there's a family where a little boy ...,-0.057813,0.454167,Negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",0.217952,0.452916,Positive
