In [2]:
! pip install nltk



## Import the data

Link to data - https://www.kaggle.com/datasets/mdismielhossenabir/sentiment-analysis/data

In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('sentiment_analysis.csv')

In [None]:
df

Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook
2,2017,8,18,night,Don't angry me,negative,Facebook
3,2022,6,8,morning,We attend in the class just for listening teac...,negative,Facebook
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram
...,...,...,...,...,...,...,...
494,2015,10,18,night,"According to , a quarter of families under six...",negative,Twitter
495,2021,2,25,morning,the plan to not spend money is not going well,negative,Instagram
496,2022,5,30,noon,uploading all my bamboozle pictures of facebook,neutral,Facebook
497,2018,8,10,night,congratulations ! you guys finish a month ear...,positive,Twitter


In [5]:
df['sentiment'].unique()

array(['positive', 'negative', 'neutral'], dtype=object)

In [6]:
df['sentiment'].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
neutral,199
positive,166
negative,134


In [7]:
df = df[['text','sentiment']]

In [8]:
df

Unnamed: 0,text,sentiment
0,What a great day!!! Looks like dream.,positive
1,"I feel sorry, I miss you here in the sea beach",positive
2,Don't angry me,negative
3,We attend in the class just for listening teac...,negative
4,"Those who want to go, let them go",negative
...,...,...
494,"According to , a quarter of families under six...",negative
495,the plan to not spend money is not going well,negative
496,uploading all my bamboozle pictures of facebook,neutral
497,congratulations ! you guys finish a month ear...,positive


## using Vader and SIA

In [12]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()

In [10]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

### Understanding SIA

In [13]:
sentence = "This movie was amazing, but the acting was a bit disappointing."
sentiment_scores = sia.polarity_scores(sentence)
print(sentiment_scores)

{'neg': 0.293, 'neu': 0.544, 'pos': 0.163, 'compound': -0.4404}


In [15]:
def analyze_sentiment_nltk(review):
    score = sia.polarity_scores(review)
    final_score = score['compound']
    if final_score > 0.4:
        return 'positive'
    elif final_score >= -0.3 and final_score <= 0.4:
        return 'neutral'
    else :
        return 'negative'

In [16]:
df['predicted_sentiment_nltk'] = df['text'].apply(analyze_sentiment_nltk)


In [17]:
df

Unnamed: 0,text,sentiment,predicted_sentiment_nltk
0,What a great day!!! Looks like dream.,positive,positive
1,"I feel sorry, I miss you here in the sea beach",positive,neutral
2,Don't angry me,negative,positive
3,We attend in the class just for listening teac...,negative,neutral
4,"Those who want to go, let them go",negative,neutral
...,...,...,...
494,"According to , a quarter of families under six...",negative,negative
495,the plan to not spend money is not going well,negative,neutral
496,uploading all my bamboozle pictures of facebook,neutral,negative
497,congratulations ! you guys finish a month ear...,positive,positive


In [18]:
df['prediction_correctness_nltk'] = np.where(df['sentiment']==df['predicted_sentiment_nltk'],1,0)

In [19]:
df

Unnamed: 0,text,sentiment,predicted_sentiment_nltk,prediction_correctness_nltk
0,What a great day!!! Looks like dream.,positive,positive,1
1,"I feel sorry, I miss you here in the sea beach",positive,neutral,0
2,Don't angry me,negative,positive,0
3,We attend in the class just for listening teac...,negative,neutral,0
4,"Those who want to go, let them go",negative,neutral,0
...,...,...,...,...
494,"According to , a quarter of families under six...",negative,negative,1
495,the plan to not spend money is not going well,negative,neutral,0
496,uploading all my bamboozle pictures of facebook,neutral,negative,0
497,congratulations ! you guys finish a month ear...,positive,positive,1


In [21]:
df['prediction_correctness_nltk'].value_counts()

Unnamed: 0_level_0,count
prediction_correctness_nltk,Unnamed: 1_level_1
1,323
0,176


## Using textblob

In [22]:
! pip install textblob



In [23]:
from textblob import TextBlob


In [24]:
def analyze_sentiment_textblob(review):
    blob = TextBlob(review)
    if blob.sentiment.polarity >= 0.4:
        return 'positive'
    elif blob.sentiment.polarity >= -0.3 and blob.sentiment.polarity <= 0.4:
        return 'neutral'
    else :
        return 'negative'

In [25]:
analyze_sentiment_textblob('This is a good movie')

'positive'

In [26]:
df['predicted_sentiment_textblob'] = df['text'].apply(analyze_sentiment_textblob)

In [27]:
df['prediction_correctness_textblob'] = np.where(df['sentiment']==df['predicted_sentiment_textblob'],1,0)

In [28]:
df

Unnamed: 0,text,sentiment,predicted_sentiment_nltk,prediction_correctness_nltk,predicted_sentiment_textblob,prediction_correctness_textblob
0,What a great day!!! Looks like dream.,positive,positive,1,positive,1
1,"I feel sorry, I miss you here in the sea beach",positive,neutral,0,negative,0
2,Don't angry me,negative,positive,0,negative,1
3,We attend in the class just for listening teac...,negative,neutral,0,neutral,0
4,"Those who want to go, let them go",negative,neutral,0,neutral,0
...,...,...,...,...,...,...
494,"According to , a quarter of families under six...",negative,negative,1,neutral,0
495,the plan to not spend money is not going well,negative,neutral,0,neutral,0
496,uploading all my bamboozle pictures of facebook,neutral,negative,0,neutral,1
497,congratulations ! you guys finish a month ear...,positive,positive,1,neutral,0


In [29]:
df['prediction_correctness_textblob'].value_counts()

Unnamed: 0_level_0,count
prediction_correctness_textblob,Unnamed: 1_level_1
1,283
0,216
