# Prepare Sentiment Analysis with VADER

## Import Packages and Load Data

In [None]:
# Necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(font_scale=1.5, style = 'whitegrid', color_codes=True)

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Skripsi/Twitter_topics.csv')

In [None]:
df.head()

Unnamed: 0,Text,Token,Dominant_Topic,Contribution%,Topic_Name
0,mathlievetradingsignaltime075924design closing point accord thecryptocurrency ai btc etherum bnb orc20 bch xrp usdt shib link dydx chatgpt,"['design', 'closing', 'point', 'accord', 'ai', 'btc', 'etherum', 'bnb', 'orc', 'bch', 'xrp', 'usdt', 'shib', 'link', 'dydx', 'chatgpt']",3,60.46,T3 Experiment AI
1,chatgpt cause website scrap stop get updated plan train model still also think far early ai company get sue oblivion ip theft,"['chatgpt', 'cause', 'website', 'scrap', 'stop', 'get', 'updated', 'plan', 'train', 'model', 'still', 'also', 'think', 'far', 'early', 'ai', 'company', 'get', 'sue', 'oblivion', 'ip', 'theft']",1,91.16,T1 Information Technology
2,one way education foundation could stop pay many people endless englishmajoring chatgpt start pay people system work systemsmatter,"['one', 'way', 'education', 'foundation', 'could', 'stop', 'pay', 'many', 'people', 'endless', 'englishmajoring', 'chatgpt', 'start', 'pay', 'people', 'system', 'work', 'systemsmatter']",4,91.76,T4 Random Ask and Conversations
3,african union commission fall victim cybercrime use deep fake impersonate head moussa faki incident highlight need strong cybersecurity measure worldwide cybersecurity deepfakes africanunioncommis...,"['african', 'union', 'commission', 'fall', 'victim', 'cybercrime', 'use', 'deep', 'fake', 'impersonate', 'head', 'moussa', 'faki', 'incident', 'highlight', 'need', 'strong', 'cybersecurity', 'meas...",1,74.41,T1 Information Technology
4,chatgpt,['chatgpt'],1,25.0,T1 Information Technology


In [None]:
# convert the string of words in the 'Tweet_lemmatized' column to a list for each row in the dataframe
df['Token'] = df['Token'].apply(lambda x: ast.literal_eval(x))

In [None]:
df['Text'] = df['Token'].apply(lambda x: ' '.join(word for word in x))

In [None]:
df.head()

Unnamed: 0,Text,Token,Dominant_Topic,Contribution%,Topic_Name
0,design closing point accord ai btc etherum bnb orc bch xrp usdt shib link dydx chatgpt,"[design, closing, point, accord, ai, btc, etherum, bnb, orc, bch, xrp, usdt, shib, link, dydx, chatgpt]",3,60.46,T3 Experiment AI
1,chatgpt cause website scrap stop get updated plan train model still also think far early ai company get sue oblivion ip theft,"[chatgpt, cause, website, scrap, stop, get, updated, plan, train, model, still, also, think, far, early, ai, company, get, sue, oblivion, ip, theft]",1,91.16,T1 Information Technology
2,one way education foundation could stop pay many people endless englishmajoring chatgpt start pay people system work systemsmatter,"[one, way, education, foundation, could, stop, pay, many, people, endless, englishmajoring, chatgpt, start, pay, people, system, work, systemsmatter]",4,91.76,T4 Random Ask and Conversations
3,african union commission fall victim cybercrime use deep fake impersonate head moussa faki incident highlight need strong cybersecurity measure worldwide cybersecurity deepfakes,"[african, union, commission, fall, victim, cybercrime, use, deep, fake, impersonate, head, moussa, faki, incident, highlight, need, strong, cybersecurity, measure, worldwide, cybersecurity, deepfa...",1,74.41,T1 Information Technology
4,chatgpt,[chatgpt],1,25.0,T1 Information Technology


In [None]:
#only save the columns that we need for sentiment analysis
df_s = df[['Text', 'Dominant_Topic', 'Topic_Name']].copy()
# drop date column
df_s.head()

Unnamed: 0,Text,Dominant_Topic,Topic_Name
0,design closing point accord ai btc etherum bnb orc bch xrp usdt shib link dydx chatgpt,3,T3 Experiment AI
1,chatgpt cause website scrap stop get updated plan train model still also think far early ai company get sue oblivion ip theft,1,T1 Information Technology
2,one way education foundation could stop pay many people endless englishmajoring chatgpt start pay people system work systemsmatter,4,T4 Random Ask and Conversations
3,african union commission fall victim cybercrime use deep fake impersonate head moussa faki incident highlight need strong cybersecurity measure worldwide cybersecurity deepfakes,1,T1 Information Technology
4,chatgpt,1,T1 Information Technology


## VADER Model Implementation

In [None]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [None]:
%%time
#Initialize the vader sentiment analyser
vader_model = SentimentIntensityAnalyzer()
#apply VADER model to the dataset
df_s['VADER_Scores'] = df_s['Text'].apply(lambda x: vader_model.polarity_scores(x))

CPU times: user 332 ms, sys: 799 Âµs, total: 333 ms
Wall time: 361 ms


In [None]:
#VADER returned the scores as a dictionary
#get the compound score as the polarity score
df_s['VADER_Polarity'] = df_s['VADER_Scores'].apply(lambda x: x['compound'])

#Assign labels for each tweet, depending on the polarity.
#Sentiment labels are assigned according to the polarity score:
'''
-1 to -0.25 => negative;
-0.25 and 0.25 => neutral;
0.25 to 1 => positive;
'''
df_s['VADER_Sentiment'] = pd.cut(df_s['VADER_Polarity'],
                                bins = [-1, -0.25, 0.25, 1],
                                labels=['Negative', 'Neutral', 'Positive'])

In [None]:
df_s.drop(['VADER_Scores'], axis=1, inplace = True)

In [None]:
df_s.head()

Unnamed: 0,Text,Dominant_Topic,Topic_Name,VADER_Polarity,VADER_Sentiment
0,design closing point accord ai btc etherum bnb orc bch xrp usdt shib link dydx chatgpt,3,T3 Experiment AI,0.0,Neutral
1,chatgpt cause website scrap stop get updated plan train model still also think far early ai company get sue oblivion ip theft,1,T1 Information Technology,-0.296,Negative
2,one way education foundation could stop pay many people endless englishmajoring chatgpt start pay people system work systemsmatter,4,T4 Random Ask and Conversations,-0.4588,Negative
3,african union commission fall victim cybercrime use deep fake impersonate head moussa faki incident highlight need strong cybersecurity measure worldwide cybersecurity deepfakes,1,T1 Information Technology,0.128,Neutral
4,chatgpt,1,T1 Information Technology,0.0,Neutral


In [None]:
# Simpan DataFrame ke dalam file CSV
df_s.to_csv('Twitter_VADER.csv', index=False)
