# Sentiment Analysis of comments using VaderSentiment

## Import Libraries

### Main Libraries

In [1]:
import pandas as pd
import numpy as np
import re

### NLP Libraries

In [2]:
# NLTK
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# VaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

## Load Dataset

In [3]:
# Load dataset
df = pd.read_csv('C:/Users/cherryb/Desktop/Personal Projects/Datasets/Telus - Fintech/cleaned/commentsCleaned_VSent-TBlob.tsv', sep='\t')
# Inspect df
df.head(3)

Unnamed: 0.1,Unnamed: 0,is_reply,comment_message,comment_published,comment_like_count,attachment_type
0,0,1,Why?,2019-06-18T00:08:26+0000,0,
1,1,0,The worst card ever it’s lawsuit time,2019-06-04T19:53:28+0000,6,
2,2,0,I think some needs to hear this. Over the year...,2019-05-14T03:28:34+0000,0,


## Sentiment Analysis

In [4]:
# Get comment_message column, store it in df_comments
df_comments = df.copy()
df_comments = df_comments[['comment_message']]

In [5]:
# Set data type to object
df_comments['comment_message'] = df_comments['comment_message'].astype('object')

In [6]:
# Check for null values
df_comments.isnull().sum()

comment_message    0
dtype: int64

In [7]:
# Create 'segment_message' column for sentence segmentation
df_comments['segment_message'] = df_comments['comment_message'].apply(lambda list_words: sent_tokenize(list_words))

In [8]:
SentiAnalyzer = SentimentIntensityAnalyzer()

# Function to extract compound score 
def get_compound_score(sentence):
    '''
    Input: Sentence (Str)
    Return: Compound score (Float)
    This function will extract the compound score [from Polarity score] of the sentence
    '''
    return SentiAnalyzer.polarity_scores(sentence)['compound']

In [9]:
# Function to get sentiment from compound score
def get_sentiment(score):
    '''
    Input: Score (Int)
    Return: Compound score (Float)
    
    1. positive sentiment: compound score >= 0.05
    2. neutral sentiment: compound score > -0.05 and < 0.05
    3. negative sentiment: compound score <= -0.05
    '''
    if score >= 0.05:
        return 'Positive'
    elif (score > -0.05) and (score < 0.05):
        return 'Neutral'
    else:
        return 'Negative'

In [10]:
# Create 'sentimen_score' column for compound score of the comment
df_comments['sentiment_score'] = df_comments['segment_message'].apply(lambda comments: [get_compound_score(sentence) for sentence in comments])
# Get average of the sentiment_score
df_comments['avg_sentiment_score'] = df_comments['sentiment_score'].apply(lambda list_scores: sum(list_scores)/len(list_scores))
# Get sentiment
df_comments['sentiment_vader'] = df_comments['avg_sentiment_score'].apply(get_sentiment)

# Drop segment_message
df_comments = df_comments.drop(['segment_message'], axis=1)

In [11]:
df_comments.head()

Unnamed: 0,comment_message,sentiment_score,avg_sentiment_score,sentiment_vader
0,Why?,[0.0],0.0,Neutral
1,The worst card ever it’s lawsuit time,[-0.7184],-0.7184,Negative
2,I think some needs to hear this. Over the year...,"[0.0, 0.0, 0.7783, 0.0, 0.8126, 0.5413]",0.355367,Positive
3,"How long does it take to get refunded money, P...","[0.0, 0.0]",0.0,Neutral
4,REALLY? Recipes From Heaven,"[0.0, 0.5106]",0.2553,Positive


## Save as comments_with_sentiment.tsv

In [12]:
df_comments.to_csv('C:/Users/cherryb/Desktop/Personal Projects/Datasets/Telus - Fintech/results/commentsVaderSentiments.tsv', sep='\t')