## My Twitter Text Report (Reworked)
By Spencer Murphy

#### Date Created: 3/31/21
#### Last Updated: 3/31/21

In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import warnings
from matplotlib import pyplot as plt

In [2]:
you = pd.read_csv('YouTubeVaccination_Dec14-Dec26_2020_toshare.csv')
you.keys()



Index(['Date', 'Title', 'Url', 'Domain', 'Page Type', 'Language', 'Author',
       'Full Name', 'Full Text', 'Original Url', 'Thread Entry Type',
       'Page Type Name', 'Pub Type'],
      dtype='object')

In [3]:
you.head()

Unnamed: 0,Date,Title,Url,Domain,Page Type,Language,Author,Full Name,Full Text,Original Url,Thread Entry Type,Page Type Name,Pub Type
0,56:24.0,Comment on video https://www.youtube.com/watch...,https://www.youtube.com/watch?v=sT80A5uPREolc=...,youtube.com,youtube,en,RJ MacReady,RJ MacReady,"I got the Moderna vaccine, here’s my list of s...",https://www.youtube.com/watch?v=sT80A5uPREolc=...,reply,YouTube,VCOMMENT
1,36:27.0,Comment on video https://www.youtube.com/watch...,https://www.youtube.com/watch?v=dxVCqwAD8gQlc=...,youtube.com,youtube,en,dontaskme,dontaskme,@Charlotte Haggerty Do you really believe ever...,https://www.youtube.com/watch?v=dxVCqwAD8gQlc=...,reply,YouTube,VCOMMENT
2,34:33.0,Comment on video https://www.youtube.com/watch...,https://www.youtube.com/watch?v=2Wz7KvLFgYQlc=...,youtube.com,youtube,en,Salavin,Salavin,"I hate it when they say ""You don't want a covi...",https://www.youtube.com/watch?v=2Wz7KvLFgYQlc=...,reply,YouTube,VCOMMENT
3,30:26.0,Comment on video https://www.youtube.com/watch...,https://www.youtube.com/watch?v=dxVCqwAD8gQlc=...,youtube.com,youtube,en,dontaskme,dontaskme,"@Sweet Heart Well, Quantas, an Australian airl...",https://www.youtube.com/watch?v=dxVCqwAD8gQlc=...,reply,YouTube,VCOMMENT
4,27:13.0,Comment on video https://www.youtube.com/watch...,https://www.youtube.com/watch?v=7VG_s2PCH_clc=...,youtube.com,youtube,en,Kristi Allman,Kristi Allman,@AngryAngy Here they recommend a flu shot duri...,https://www.youtube.com/watch?v=7VG_s2PCH_clc=...,reply,YouTube,VCOMMENT


In [4]:
you['Full Text'].head()

0    I got the Moderna vaccine, here’s my list of s...
1    @Charlotte Haggerty Do you really believe ever...
2    I hate it when they say "You don't want a covi...
3    @Sweet Heart Well, Quantas, an Australian airl...
4    @AngryAngy Here they recommend a flu shot duri...
Name: Full Text, dtype: object

### Word Frequency Analysis

In [5]:
you['lowercase'] = [x.lower() for x in you['Full Text']]
you['lowercase'].head(10)

0    i got the moderna vaccine, here’s my list of s...
1    @charlotte haggerty do you really believe ever...
2    i hate it when they say "you don't want a covi...
3    @sweet heart well, quantas, an australian airl...
4    @angryangy here they recommend a flu shot duri...
5    we don't need a corona vaccine, we need a come...
6    i had no idea of who this guy was prior to thi...
7    @angryangy zero medical test is done before se...
8    @justin kirschenman i don't know about your lo...
9    remember when the french scientist said test a...
Name: lowercase, dtype: object

In [6]:
warnings.filterwarnings('ignore')

In [7]:
import nltk
from nltk import tokenize as tk
from nltk import FreqDist
from nltk.corpus import stopwords
import string
from collections import Counter

  
mystopwords = stopwords.words('english')
mystopwords.append("n't")
mystopwords.append("'s'")
len(mystopwords)

181

In [8]:
you['tokens'] = [tk.word_tokenize(x) for x in you['lowercase']]
you['tokens'].head(10)

0    [i, got, the, moderna, vaccine, ,, here, ’, s,...
1    [@, charlotte, haggerty, do, you, really, beli...
2    [i, hate, it, when, they, say, ``, you, do, n'...
3    [@, sweet, heart, well, ,, quantas, ,, an, aus...
4    [@, angryangy, here, they, recommend, a, flu, ...
5    [we, do, n't, need, a, corona, vaccine, ,, we,...
6    [i, had, no, idea, of, who, this, guy, was, pr...
7    [@, angryangy, zero, medical, test, is, done, ...
8    [@, justin, kirschenman, i, do, n't, know, abo...
9    [remember, when, the, french, scientist, said,...
Name: tokens, dtype: object

In [9]:
filtered = []
for i in you['tokens']:
    filtered.append([word for word in i if word not in mystopwords])

you['filtered'] = filtered

In [10]:
you['filtered'][1]

['@',
 'charlotte',
 'haggerty',
 'really',
 'believe',
 'everything',
 'told',
 '?',
 ',',
 'vaccines',
 'end',
 'pandemic',
 '.',
 'herd',
 'immunization',
 '.',
 'however',
 ',',
 'claim',
 'vaccine',
 '.',
 'virus',
 'gone',
 'august',
 ',',
 'btw',
 '.']

In [11]:
no_punct = []
for i in you['filtered']:
    no_punct.append([word for word in i if word not in string.punctuation])

you['no_punct'] = no_punct
you['no_punct'][1]

['charlotte',
 'haggerty',
 'really',
 'believe',
 'everything',
 'told',
 'vaccines',
 'end',
 'pandemic',
 'herd',
 'immunization',
 'however',
 'claim',
 'vaccine',
 'virus',
 'gone',
 'august',
 'btw']

In [12]:
freq1 = [FreqDist(x) for x in you['no_punct']]
you['freqs1'] = freq1
you['freqs1'].head(20)

0     {'got': 1, 'moderna': 1, 'vaccine': 1, '’': 1,...
1     {'charlotte': 1, 'haggerty': 1, 'really': 1, '...
2     {'hate': 1, 'say': 1, '``': 1, 'want': 1, 'cov...
3     {'sweet': 1, 'heart': 1, 'well': 1, 'quantas':...
4     {'angryangy': 1, 'recommend': 1, 'flu': 5, 'sh...
5     {'need': 3, 'corona': 1, 'vaccine': 1, 'comeba...
6     {'idea': 1, 'guy': 1, 'prior': 1, 'interview':...
7     {'angryangy': 1, 'zero': 1, 'medical': 1, 'tes...
8     {'justin': 1, 'kirschenman': 1, 'know': 1, 'lo...
9     {'remember': 1, 'french': 1, 'scientist': 1, '...
10    {'angryangy': 1, ''ve': 1, 'said': 1, 'limit':...
11    {'use': 1, 'brain': 1, 'please': 1, 'samiitb':...
12    {'krish': 1, 'soni': 1, 'assuming': 1, 'englis...
13    {'south': 1, 'african': 1, 'variant': 1, 'one'...
14    {'part': 1, 'satan': 1, 'gates': 1, 'vaccine':...
15    {'enough': 1, 'lies': 1, 'already': 1, 'patent...
16    {'angryangy': 1, 'america': 3, 'different': 1,...
17    {'love': 1, 'jre': 1, 'joe': 2, 'pretty': 

In [13]:
you['f1_counts'] = [Counter(x) for x in you['freqs1']]
you['f1_counts'][1]

Counter({'charlotte': 1,
         'haggerty': 1,
         'really': 1,
         'believe': 1,
         'everything': 1,
         'told': 1,
         'vaccines': 1,
         'end': 1,
         'pandemic': 1,
         'herd': 1,
         'immunization': 1,
         'however': 1,
         'claim': 1,
         'vaccine': 1,
         'virus': 1,
         'gone': 1,
         'august': 1,
         'btw': 1})

In [14]:
wordcounts = Counter()
for i in you['f1_counts']:
    wordcounts += i

In [15]:
counts = pd.DataFrame.from_dict(dict(wordcounts), orient='index')
counts.head()

Unnamed: 0,0
got,705
moderna,378
vaccine,17251
’,5448
list,96


In [16]:
counts = counts.rename(columns = {0: 'counts'})
counts

Unnamed: 0,counts
got,705
moderna,378
vaccine,17251
’,5448
list,96
...,...
adventure,1
hindi,1
urdu,1
👍💜👍,1


In [17]:
count_sort = counts.sort_values('counts', ascending = False)
count_sort

Unnamed: 0,counts
vaccine,17251
’,5448
people,4929
's,4614
https,4469
...,...
thinkers,1
ivor,1
cummins,1
nerseries,1


In [None]:
fig, ax0 = plt.subplots(figsize=(15,10))
ax0.bar(count_sort.index, count_sort['counts'])
plt.xticks(rotation=90, fontsize = 'small')
plt.show()

In [18]:
stemmer = nltk.stem.SnowballStemmer("english")
stems = []
for i in you['no_punct']:
    stems.append([stemmer.stem(word) for word in i])
    
you['stemmed'] = stems

In [19]:
freq2 = [FreqDist(x) for x in you['stemmed']]
you['freqs2'] = freq2
you['freqs2']

0        {'got': 1, 'moderna': 1, 'vaccin': 1, '’': 1, ...
1        {'charlott': 1, 'haggerti': 1, 'realli': 1, 'b...
2        {'hate': 1, 'say': 1, '``': 1, 'want': 1, 'cov...
3        {'sweet': 1, 'heart': 1, 'well': 1, 'quanta': ...
4        {'angryangi': 1, 'recommend': 1, 'flu': 5, 'sh...
                               ...                        
12486    {'pfizer': 1, 'fine': 1, 'corpor': 1, 'us': 1,...
12487    {'women': 1, 'first': 3, 'person': 2, 'get': 1...
12488    {'breast': 1, 'cancer': 1, 'survivor': 1, 'goo...
12489    {'panel': 1, 'member': 1, 'first': 1, 'get': 1...
12490    {'tom': 1, 'r': 1, 'wo': 1, 'return': 1, 'easi...
Name: freqs2, Length: 12491, dtype: object

In [23]:
you['f2_counts'] = [Counter(x) for x in you['freqs2']]
you['f2_counts'][1]

Counter({'charlott': 1,
         'haggerti': 1,
         'realli': 1,
         'believ': 1,
         'everyth': 1,
         'told': 1,
         'vaccin': 2,
         'end': 1,
         'pandem': 1,
         'herd': 1,
         'immun': 1,
         'howev': 1,
         'claim': 1,
         'virus': 1,
         'gone': 1,
         'august': 1,
         'btw': 1})

In [24]:
wordcounts = Counter()
for i in you['f2_counts']:
    wordcounts += i

In [25]:
counts2 = pd.DataFrame.from_dict(dict(wordcounts), orient='index')
counts2

Unnamed: 0,0
got,706
moderna,378
vaccin,20729
’,5448
list,150
...,...
//www.mdpi.com/1660-4601/17/22/8674,1
adventur,1
hindi,1
urdu,1


In [32]:
counts2 = counts2.rename(columns = {0: 'counts'})
counts2

Unnamed: 0,counts
got,706
moderna,378
vaccin,20729
’,5448
list,150
...,...
//www.mdpi.com/1660-4601/17/22/8674,1
adventur,1
hindi,1
urdu,1


In [34]:
count_sort2 = counts2.sort_values('counts', ascending = False)
count_sort2.head(10)

Unnamed: 0,counts
vaccin,20729
’,5448
peopl,4975
's,4614
https,4469
get,4235
take,3917
virus,3090
``,2983
'',2975


In [None]:
fig, ax1 = plt.subplots(figsize=(10,10))
ax1.bar(count_sort2.index, count_sort2['counts'])
plt.xticks(rotation=90, fontsize = 'medium')
plt.show()