# Sentiment Analysis on Youtube Comments

In [49]:
import pandas as pd
import numpy as np
import nltk
nltk.download(['vader_lexicon'])
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from collections import Counter
import matplotlib.pyplot as plt

%matplotlib inline
pd.options.mode.chained_assignment = None
pd.set_option('display.max_colwidth', 100)

with open('youtube_comments_clean.csv',encoding="utf8") as file:
    df = pd.read_csv(file)
file.close()
df.head()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\seung\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Unnamed: 0.1,Unnamed: 0,link_title,channel,no_of_views,time_uploaded,comment,upvotes,no_contract,comments_str,tokenized,lower,no_punct,no_stopwords,pos_tags,wordnet_pos,lemmatized
0,0,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,SciShow,"85,566 views","Jan 30, 2021","I swear, whoever is playing Plague Inc needs to stop it😤😒",486,"['I', 'swear,', 'whoever', 'is', 'playing', 'Plague', 'Inc', 'needs', 'to', 'stop', 'it😤😒']","I swear, whoever is playing Plague Inc needs to stop it😤😒","['I', 'swear', ',', 'whoever', 'is', 'playing', 'Plague', 'Inc', 'needs', 'to', 'stop', 'it😤😒']","['i', 'swear', ',', 'whoever', 'is', 'playing', 'plague', 'inc', 'needs', 'to', 'stop', 'it😤😒']","['i', 'swear', 'whoever', 'is', 'playing', 'plague', 'inc', 'needs', 'to', 'stop', 'it😤😒']","['swear', 'whoever', 'playing', 'plague', 'inc', 'needs', 'stop', 'it😤😒']","[('swear', 'JJ'), ('whoever', 'WP'), ('playing', 'VBG'), ('plague', 'NN'), ('inc', 'NN'), ('need...","[('swear', 'a'), ('whoever', 'n'), ('playing', 'v'), ('plague', 'n'), ('inc', 'n'), ('needs', 'v...","['swear', 'whoever', 'play', 'plague', 'inc', 'need', 'stop', 'it😤😒']"
1,1,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,SciShow,"85,566 views","Jan 30, 2021",I am getting real tired of being part of major historical events.,426,"['I', 'am', 'getting', 'real', 'tired', 'of', 'being', 'part', 'of', 'major', 'historical', 'eve...",I am getting real tired of being part of major historical events.,"['I', 'am', 'getting', 'real', 'tired', 'of', 'being', 'part', 'of', 'major', 'historical', 'eve...","['i', 'am', 'getting', 'real', 'tired', 'of', 'being', 'part', 'of', 'major', 'historical', 'eve...","['i', 'am', 'getting', 'real', 'tired', 'of', 'being', 'part', 'of', 'major', 'historical', 'eve...","['getting', 'real', 'tired', 'part', 'major', 'historical', 'events']","[('getting', 'VBG'), ('real', 'JJ'), ('tired', 'JJ'), ('part', 'NN'), ('major', 'JJ'), ('histori...","[('getting', 'v'), ('real', 'a'), ('tired', 'a'), ('part', 'n'), ('major', 'a'), ('historical', ...","['get', 'real', 'tired', 'part', 'major', 'historical', 'event']"
2,2,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,SciShow,"85,566 views","Jan 30, 2021",This channel's COVID reports are a model of how science reporting should be done. Matter-of-fac...,797,"['This', ""channel's"", 'COVID', 'reports', 'are', 'a', 'model', 'of', 'how', 'science', 'reportin...",This channel's COVID reports are a model of how science reporting should be done. Matter-of-fact...,"['This', 'channel', ""'s"", 'COVID', 'reports', 'are', 'a', 'model', 'of', 'how', 'science', 'repo...","['this', 'channel', ""'s"", 'covid', 'reports', 'are', 'a', 'model', 'of', 'how', 'science', 'repo...","['this', 'channel', ""'s"", 'covid', 'reports', 'are', 'a', 'model', 'of', 'how', 'science', 'repo...","['channel', ""'s"", 'covid', 'reports', 'model', 'science', 'reporting', 'done', 'matter-of-fact',...","[('channel', 'NN'), (""'s"", 'POS'), ('covid', 'NN'), ('reports', 'NNS'), ('model', 'NN'), ('scien...","[('channel', 'n'), (""'s"", 'n'), ('covid', 'n'), ('reports', 'n'), ('model', 'n'), ('science', 'n...","['channel', ""'s"", 'covid', 'report', 'model', 'science', 'report', 'do', 'matter-of-fact', 'pres..."
3,3,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,SciShow,"85,566 views","Jan 30, 2021","To people who understand science, the development of these vaccines in so short of a time frame ...",105,"['To', 'people', 'who', 'understand', 'science,', 'the', 'development', 'of', 'these', 'vaccines...","To people who understand science, the development of these vaccines in so short of a time frame ...","['To', 'people', 'who', 'understand', 'science', ',', 'the', 'development', 'of', 'these', 'vacc...","['to', 'people', 'who', 'understand', 'science', ',', 'the', 'development', 'of', 'these', 'vacc...","['to', 'people', 'who', 'understand', 'science', 'the', 'development', 'of', 'these', 'vaccines'...","['people', 'understand', 'science', 'development', 'vaccines', 'short', 'time', 'frame', 'amazin...","[('people', 'NNS'), ('understand', 'VBP'), ('science', 'NN'), ('development', 'NN'), ('vaccines'...","[('people', 'n'), ('understand', 'v'), ('science', 'n'), ('development', 'n'), ('vaccines', 'n')...","['people', 'understand', 'science', 'development', 'vaccine', 'short', 'time', 'frame', 'amazing..."
4,4,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,SciShow,"85,566 views","Jan 30, 2021","So with these new variants this whole thing feels like it will be fought like the flu, at least ...",205,"['So', 'with', 'these', 'new', 'variants', 'this', 'whole', 'thing', 'feels', 'like', 'it', 'wil...","So with these new variants this whole thing feels like it will be fought like the flu, at least ...","['So', 'with', 'these', 'new', 'variants', 'this', 'whole', 'thing', 'feels', 'like', 'it', 'wil...","['so', 'with', 'these', 'new', 'variants', 'this', 'whole', 'thing', 'feels', 'like', 'it', 'wil...","['so', 'with', 'these', 'new', 'variants', 'this', 'whole', 'thing', 'feels', 'like', 'it', 'wil...","['new', 'variants', 'whole', 'thing', 'feels', 'like', 'fought', 'like', 'flu', 'least', 'long',...","[('new', 'JJ'), ('variants', 'NNS'), ('whole', 'JJ'), ('thing', 'NN'), ('feels', 'NNS'), ('like'...","[('new', 'a'), ('variants', 'n'), ('whole', 'a'), ('thing', 'n'), ('feels', 'n'), ('like', 'n'),...","['new', 'variant', 'whole', 'thing', 'feel', 'like', 'fought', 'like', 'flu', 'least', 'long', '..."


In [50]:
df.drop(['time_uploaded','no_of_views','channel','no_contract','tokenized','lower','no_punct','no_stopwords','pos_tags','wordnet_pos'], axis=1,inplace=True)

In [51]:
# listify the stringed list
df["lemmatized"] = df["lemmatized"].apply(lambda comment : eval(comment))
df['lemma_str'] = [' '.join(map(str,l)) for l in df['lemmatized']]
df.head()

Unnamed: 0.1,Unnamed: 0,link_title,comment,upvotes,comments_str,lemmatized,lemma_str
0,0,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,"I swear, whoever is playing Plague Inc needs to stop it😤😒",486,"I swear, whoever is playing Plague Inc needs to stop it😤😒","[swear, whoever, play, plague, inc, need, stop, it😤😒]",swear whoever play plague inc need stop it😤😒
1,1,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,I am getting real tired of being part of major historical events.,426,I am getting real tired of being part of major historical events.,"[get, real, tired, part, major, historical, event]",get real tired part major historical event
2,2,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,This channel's COVID reports are a model of how science reporting should be done. Matter-of-fac...,797,This channel's COVID reports are a model of how science reporting should be done. Matter-of-fact...,"[channel, 's, covid, report, model, science, report, do, matter-of-fact, presenting, detail, not...",channel 's covid report model science report do matter-of-fact presenting detail note limit stud...
3,3,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,"To people who understand science, the development of these vaccines in so short of a time frame ...",105,"To people who understand science, the development of these vaccines in so short of a time frame ...","[people, understand, science, development, vaccine, short, time, frame, amazing, understand, sci...",people understand science development vaccine short time frame amazing understand science sound ...
4,4,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,"So with these new variants this whole thing feels like it will be fought like the flu, at least ...",205,"So with these new variants this whole thing feels like it will be fought like the flu, at least ...","[new, variant, whole, thing, feel, like, fought, like, flu, least, long, term]",new variant whole thing feel like fought like flu least long term


In [52]:
sia = SentimentIntensityAnalyzer()

In [53]:
# make a column of VADER polarity scores, with -1 being most negative and 1 being most positive
df["sentiment_compound"] = df["lemma_str"].apply(lambda comment: sia.polarity_scores(comment)["compound"])

In [54]:
df.head()

Unnamed: 0.1,Unnamed: 0,link_title,comment,upvotes,comments_str,lemmatized,lemma_str,sentiment_compound
0,0,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,"I swear, whoever is playing Plague Inc needs to stop it😤😒",486,"I swear, whoever is playing Plague Inc needs to stop it😤😒","[swear, whoever, play, plague, inc, need, stop, it😤😒]",swear whoever play plague inc need stop it😤😒,0.0
1,1,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,I am getting real tired of being part of major historical events.,426,I am getting real tired of being part of major historical events.,"[get, real, tired, part, major, historical, event]",get real tired part major historical event,-0.4404
2,2,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,This channel's COVID reports are a model of how science reporting should be done. Matter-of-fac...,797,This channel's COVID reports are a model of how science reporting should be done. Matter-of-fact...,"[channel, 's, covid, report, model, science, report, do, matter-of-fact, presenting, detail, not...",channel 's covid report model science report do matter-of-fact presenting detail note limit stud...,0.0
3,3,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,"To people who understand science, the development of these vaccines in so short of a time frame ...",105,"To people who understand science, the development of these vaccines in so short of a time frame ...","[people, understand, science, development, vaccine, short, time, frame, amazing, understand, sci...",people understand science development vaccine short time frame amazing understand science sound ...,0.743
4,4,We Know More About Those COVID-19 Variants. It's Not Great | SciShow News,"So with these new variants this whole thing feels like it will be fought like the flu, at least ...",205,"So with these new variants this whole thing feels like it will be fought like the flu, at least ...","[new, variant, whole, thing, feel, like, fought, like, flu, least, long, term]",new variant whole thing feel like fought like flu least long term,0.0258


In [55]:
print(type(df['sentiment_compound']))

<class 'pandas.core.series.Series'>


In [1]:
# graph out sentiment distribution
plt.figure(figsize=(50,30))
plt.xlabel('Sentiment', fontsize=50)
plt.xticks(fontsize=40)
plt.ylabel('Frequency', fontsize=50)
plt.yticks(fontsize=40)
plt.hist(df['sentiment_compound'], bins=50)
plt.title('sentiment Distribution', fontsize=60)
plt.show()

NameError: name 'plt' is not defined