In [26]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [27]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!pip3 install contractions

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [29]:
import pandas as pd
import numpy as np
import json
from nltk.tokenize import sent_tokenize, word_tokenize
import nltk
nltk.download('punkt')
import contractions
import spacy
nltk.download('stopwords')
import matplotlib.pyplot as plt
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.stem import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from tqdm.auto import tqdm
tqdm.pandas(desc = "Processing Rows")


def data_pre(input_data):
    punc = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
    
    with open(input_data, "r", encoding="utf-8") as f:
        d = json.load(f)

    metadata = ['article_url', 'comment_count']
    df = pd.json_normalize(d['articles'], 'comments', metadata)

    data = df.loc[:,['commentID','userID','commentBody','approveDate','recommendations',
                     'replyCount','editorsSelection','article_url','comment_count']]
    
    expanded_text = data['commentBody'].apply(lambda x: " ".join([contractions.fix(word) for word in x.split()]))
    data['commentBody'] = expanded_text
    #data["commentBody"] = data["commentBody"].str.lower().str.translate(str.maketrans("", "", punc))

    splits = df['article_url'].str.split("/")
    dates = splits.str[-5] + "/" + splits.str[-4] + "/" + splits.str[-3]
    sections = splits.str[-2]
    titles = [' '.join(j) for j in [i.split('-') for i in [sub[: -5] for sub in splits.str[-1]]]]
    output = pd.DataFrame({"Date": dates, "Section": sections, "Title": titles})
    
    return pd.concat([data, output], axis=1)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Concatenating Various Data Files

In [30]:
data_1=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_1.json')
data_2=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_2.json')
data_3=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_3.json')
data_4=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_4.json')
data_5=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_5.json')
data_6=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_6.json')
data_7=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_7.json')
data_8=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_8.json')
data_9=data_pre('/content/drive/MyDrive/STAT427/2013/06/6_9.json')

data1 = pd.concat([data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9], axis=0)
print(len(data1))
data1.head()

563


Unnamed: 0,commentID,userID,commentBody,approveDate,recommendations,replyCount,editorsSelection,article_url,comment_count,Date,Section,Title
0,10348515,68302733,Did Not realize I was replying to such an pld ...,1382630347,0,0,False,https://krugman.blogs.nytimes.com/2013/06/01/w...,0,2013/06/01,we-are-not-having-a-serious-discussion-obamaca...,
1,10348466,68302733,Why is it that those who favor the ACA (cough)...,1382597945,0,0,False,https://krugman.blogs.nytimes.com/2013/06/01/w...,0,2013/06/01,we-are-not-having-a-serious-discussion-obamaca...,
2,10934882,68676589,"Coco Chanel said ""Once you have dressed, and b...",1389541582,0,0,False,https://opinionator.blogs.nytimes.com/2013/06/...,0,2013/06/01,the-myth-of-gatsbys-suffering-middle-class,
3,21868321,61659985,Martha Nussbaum is a philosopher (as is the au...,1490033226,0,0,False,https://opinionator.blogs.nytimes.com/2013/06/...,0,2013/06/01,does-great-literature-make-us-better,
4,11880086,69423821,it was -i think- a good introduction to politi...,1401186369,0,0,False,https://opinionator.blogs.nytimes.com/2013/06/...,0,2013/06/01,does-great-literature-make-us-better,


## Creating Single Frequency Word List

In [31]:
def single_freq(temp_str):   
    str_list = temp_str.split()
    frequency = Counter(str_list)
    single_freq_words =[]
    for word in frequency:
        if frequency[word] == 1:
            single_freq_words.append(word)
    print("The total number of single frequency words are: ",len(single_freq_words))
    #print(single_freq_words)
    return single_freq_words
    
total_words_before_stem = data1['commentBody'].str.cat(sep=' ')
print(type(total_words_before_stem))
print("The total words in the corpus are: ",len(total_words_before_stem.split()))    
single_freq_words = single_freq(total_words_before_stem)

<class 'str'>
The total words in the corpus are:  55527
The total number of single frequency words are:  8871


## Removing Single Frequency Words

In [32]:
from tqdm.auto import tqdm
tqdm.pandas(desc = "Processing Rows")
data1["commentBody_processed"] = data1["commentBody"]
data1.head()

Unnamed: 0,commentID,userID,commentBody,approveDate,recommendations,replyCount,editorsSelection,article_url,comment_count,Date,Section,Title,commentBody_processed
0,10348515,68302733,Did Not realize I was replying to such an pld ...,1382630347,0,0,False,https://krugman.blogs.nytimes.com/2013/06/01/w...,0,2013/06/01,we-are-not-having-a-serious-discussion-obamaca...,,Did Not realize I was replying to such an pld ...
1,10348466,68302733,Why is it that those who favor the ACA (cough)...,1382597945,0,0,False,https://krugman.blogs.nytimes.com/2013/06/01/w...,0,2013/06/01,we-are-not-having-a-serious-discussion-obamaca...,,Why is it that those who favor the ACA (cough)...
2,10934882,68676589,"Coco Chanel said ""Once you have dressed, and b...",1389541582,0,0,False,https://opinionator.blogs.nytimes.com/2013/06/...,0,2013/06/01,the-myth-of-gatsbys-suffering-middle-class,,"Coco Chanel said ""Once you have dressed, and b..."
3,21868321,61659985,Martha Nussbaum is a philosopher (as is the au...,1490033226,0,0,False,https://opinionator.blogs.nytimes.com/2013/06/...,0,2013/06/01,does-great-literature-make-us-better,,Martha Nussbaum is a philosopher (as is the au...
4,11880086,69423821,it was -i think- a good introduction to politi...,1401186369,0,0,False,https://opinionator.blogs.nytimes.com/2013/06/...,0,2013/06/01,does-great-literature-make-us-better,,it was -i think- a good introduction to politi...


In [33]:

total_words_before_stem = data1['commentBody_processed'].str.cat(sep=' ')
print(type(total_words_before_stem))
print("The total words in the corpus after single word removal are: ",len(total_words_before_stem.split()))    
single_freq_words = single_freq(total_words_before_stem)

<class 'str'>
The total words in the corpus after single word removal are:  55527
The total number of single frequency words are:  8871


## Create 3 different dataframes for C, V and VC.

In [34]:
# The 3 different wordlists
vaccine_wordlist_lower = ["booster","antibody","antibodies","antigen","antigenic","antigens","epidemic","herd immunity","injection","inject",
                          "injector","pcr","injecting","reinject","injections","injects","vaccine","polymerase chain reaction","vax",
                          "vaccination","vaxxed","vaxxer","vaxify","jab","vaccinate","vaccinates","revaccination","immunity","herd immunization",
                          "immunization","immunizations"]

covid_wordlist_lower = ["covid","corona","alpha","beta","gamma","delta","omicron","quarantine","pandemic","lockdown", "gamma variant", 
                    "herd immunity","CoV-2","n95","cov19","antibody","antigen","pcr","antiviral","comorbidity", "herd immunization",
                    "myocarditis","shutdown","epidemic","face mask","ventilator" ,"community spread","symptomatic", "comorbitities",
                    "covid-19", "coronavirus",	"alpha variant", "beta variant", "beta mutation", "beta spike protein", "post-symptomatic",
                    "delta variant", "omicron variant",	"quaratined",	"self-quarantine", "quarantines","herd immunity",	
                    "sars-cov-2",	"antibodies",	"antigenic", "antigenictiy", "antigens","polymerase chain reaction", "comorbid", 
                    "myocardial",	"shutdowns", "epidemiology", "face masks", "ventilation", "ventilators", "presymptomatic"]

common_wordlist_lower = ["antibody","epidemic","herd immunity","pcr", "antibodies", "epidemiology", "herd immunization","polymerase chain reaction"]


vaccine_wordlist_capitalize = [vaccine.capitalize() for vaccine in vaccine_wordlist_lower]
covid_wordlist_capitalize = [covid.capitalize() for covid in covid_wordlist_lower]
common_wordlist_capitalize = [common.capitalize() for common in common_wordlist_lower]

vaccine_wordlist_upper = [vaccine.upper() for vaccine in vaccine_wordlist_lower]
covid_wordlist_upper = [covid.upper() for covid in covid_wordlist_lower]
common_wordlist_upper = [common.upper() for common in common_wordlist_lower]


vaccine_wordlist = vaccine_wordlist_lower + vaccine_wordlist_capitalize + vaccine_wordlist_upper
covid_wordlist = common_wordlist_lower + common_wordlist_capitalize + common_wordlist_upper
common_wordlist = common_wordlist_lower + common_wordlist_capitalize + common_wordlist_upper

#### Vaccine_Data


In [35]:

data_vaccine = data1[data1['commentBody_processed'].str.contains('|'.join(vaccine_wordlist))].reset_index(drop=True)
print("The number of rows in vaccine data is",len(data_vaccine))
data_vaccine.head()

The number of rows in vaccine data is 5


Unnamed: 0,commentID,userID,commentBody,approveDate,recommendations,replyCount,editorsSelection,article_url,comment_count,Date,Section,Title,commentBody_processed
0,22406041,56831107,"@Nikos: Before fire, there were no humans. Dai...",1494410414,0,0,False,https://well.blogs.nytimes.com/2013/06/03/micr...,0,2013/06/03,microsampling-air-pollution,,"@Nikos: Before fire, there were no humans. Dai..."
1,15124798,72214154,I am so grateful for you getting the word out ...,1433416318,0,0,False,https://well.blogs.nytimes.com/2013/06/26/the-...,0,2013/06/26,the-problem-with-pain-pills,,I am so grateful for you getting the word out ...
2,10001697,48098006,"Mr. Meier is an unprincipled, ill-informed zea...",1379419454,8,0,False,https://well.blogs.nytimes.com/2013/06/26/the-...,0,2013/06/26,the-problem-with-pain-pills,,"Mr. Meier is an unprincipled, ill-informed zea..."
3,16515301,73339369,This is absolute propaganda. It just is not un...,1446208319,0,0,False,https://well.blogs.nytimes.com/2013/06/27/stud...,0,2013/06/27,study-finds-no-vaccine-link-to-guillain-barr,,This is absolute propaganda. It just is not un...
4,13024605,59630210,"I was a healthy, athletic woman who had the fl...",1413104775,0,0,False,https://well.blogs.nytimes.com/2013/06/27/stud...,0,2013/06/27,study-finds-no-vaccine-link-to-guillain-barr,,"I was a healthy, athletic woman who had the fl..."


In [36]:
data_vaccine['commentBody'][0]

'@Nikos: Before fire, there were no humans. Daily exposure to high air pollution and CO in particular caused the changes in brain<br/> that made us human. Air pollution also desensitizes us to sensory stimuli (like smoking dulls the senses).<br/><br/> The EPA experiment of lowering air pollution to almost nothing<br/>has resulted in epidemic increases in neuro sensitization disorders like asthma and autism. ( epidemics not seen in countries that still have high air pollution.).'

In [37]:
data_vaccine['commentBody'][1]

'I am so grateful for you getting the word out about addicting pain pills. Everything you have written, I have found out to be true. I am a caretaker suffering in silence trying to stand up to the medical community that insists that people need to take these addicting pain pills to stay ahead of the pain. What they have been supporting is addiction and a mountiain of suffering caused by these horrible opiate pills and early, needless death. These addicting pain pills should only be used SHORT TERM otherwise it is defeating their whole purpose. Before my beloved aunt died from an overdose of opiates, she said, this is what the pain pills did to me. She was mentally gone with bruises and abrasions all over her body from falling. She said do not let this happen to you, there is a place way worse than hell and you do not know about it until it happens to you. This epidemic is all around me. Thank you for your efforts to stop this nightmare of stupidity!'

In [38]:
data_vaccine['commentBody'][2]

"Mr. Meier is an unprincipled, ill-informed zealot, who believes that opioids are inherently bad &amp; should be withheld from patients with chronic pain, regardless of consequences. To support his contention, Meier scours medical &amp; popular literature for snippets of information, anecdotes, opinions, isolated clinical &amp; laboratory studies, then uses these to concoct his meretricious arguments.<br/>Consider “Opioid Induced Hyperalgesia” (OIH). Meier implies that this is an inevitable consequence of opioid treatment, yet there is absolutely no convincing evidence for this. As other readers note, most of the meager data on OIH comes from animal studies &amp; even these provide little support for Meier's claims. Researchers who study OIH admit that they have no idea of its prevalence among chronic pain patients &amp; that it is difficult or impossible to distinguish OIH from habituation.<br/>I am 67. For nearly 20 years I have relied on Oxycodone/Oxycontin to relieve my otherwise i

In [39]:
data_vaccine['commentBody'][3]

'This is absolute propaganda. It just is not untrue. Statistics released in March, 2014 by the YOU.S. Department of Health and Human Services revealed that the flu vaccine remains the top vaccine causing injuries which are being compensated through vaccine court, and that Guillain-Barré Syndrome, a crippling disease, remains the top injury being awarded compensation due to the seasonal flu vaccine. guillain-Barre syndrome is also listed as a "side effect" on most vaccine inserts. The epidemic of this particular syndrome has shot up so much in recent years from these erm, unknown "triggers", that they are considering revising the figures of incidence from 1 in 100,000 to 1 in 50,000. Just goes to show how much faith we should give to any study, they are politically motivated to say vaccines are safe when they are not.'

In [40]:
data_vaccine['commentBody'][4]

'I was a healthy, athletic woman who had the flu once. Two weeks after my second flu shot ever at age 52, I had abrupt onset of intense burning pain that went through my shoulder, down my arm and into my right hand. I had no injury, trauma or fall. My right pectoral muscle fibrillated uncontrollably during the attack. The pain lasted a few weeks, treated with oral steroids. Within a few days I developed severe weakness of my right shoulder, arm and hand, atrophy of my right triceps and right serratus anterior muscle, and a "winged scapula". One year later, I could not lift my right arm up, using my left arm to lift it up several times a day, to prevent "frozen shoulder". 18 months later my shoulder range of motion returned but I still have weakness and stiffness. Diagnosis: brachial neuritis, AKA Parsonage Turner Syndrome, neuralgic amyotrophy or brachial plexopathy - from the vaccine. <br/><br/>20 months later, persistent numbness of my right hand/fingers/thumb. My fingers are weak/st

#### Covid Data


In [41]:

data_covid = data1[data1['commentBody_processed'].str.contains('|'.join(covid_wordlist))].reset_index(drop=True)
print("The number of rows in vaccine data is",len(data_covid))
data_covid.head()

The number of rows in vaccine data is 4


Unnamed: 0,commentID,userID,commentBody,approveDate,recommendations,replyCount,editorsSelection,article_url,comment_count,Date,Section,Title,commentBody_processed
0,22406041,56831107,"@Nikos: Before fire, there were no humans. Dai...",1494410414,0,0,False,https://well.blogs.nytimes.com/2013/06/03/micr...,0,2013/06/03,microsampling-air-pollution,,"@Nikos: Before fire, there were no humans. Dai..."
1,15124798,72214154,I am so grateful for you getting the word out ...,1433416318,0,0,False,https://well.blogs.nytimes.com/2013/06/26/the-...,0,2013/06/26,the-problem-with-pain-pills,,I am so grateful for you getting the word out ...
2,10078110,68176225,Nurse practitioners are highly educated health...,1380121862,2,0,False,https://well.blogs.nytimes.com/2013/06/27/the-...,0,2013/06/27,the-gulf-between-doctors-and-nurse-practitioners,,Nurse practitioners are highly educated health...
3,16515301,73339369,This is absolute propaganda. It just is not un...,1446208319,0,0,False,https://well.blogs.nytimes.com/2013/06/27/stud...,0,2013/06/27,study-finds-no-vaccine-link-to-guillain-barr,,This is absolute propaganda. It just is not un...


In [42]:
data_covid['commentBody'][0]

'@Nikos: Before fire, there were no humans. Daily exposure to high air pollution and CO in particular caused the changes in brain<br/> that made us human. Air pollution also desensitizes us to sensory stimuli (like smoking dulls the senses).<br/><br/> The EPA experiment of lowering air pollution to almost nothing<br/>has resulted in epidemic increases in neuro sensitization disorders like asthma and autism. ( epidemics not seen in countries that still have high air pollution.).'

In [43]:
data_covid['commentBody'][1]

'I am so grateful for you getting the word out about addicting pain pills. Everything you have written, I have found out to be true. I am a caretaker suffering in silence trying to stand up to the medical community that insists that people need to take these addicting pain pills to stay ahead of the pain. What they have been supporting is addiction and a mountiain of suffering caused by these horrible opiate pills and early, needless death. These addicting pain pills should only be used SHORT TERM otherwise it is defeating their whole purpose. Before my beloved aunt died from an overdose of opiates, she said, this is what the pain pills did to me. She was mentally gone with bruises and abrasions all over her body from falling. She said do not let this happen to you, there is a place way worse than hell and you do not know about it until it happens to you. This epidemic is all around me. Thank you for your efforts to stop this nightmare of stupidity!'

In [44]:
data_covid['commentBody'][2]

'Nurse practitioners are highly educated health care providers. I have posted below the "core courses" in the ACNP curriculum:<br/><br/>1. Prerequisites- BSN Admissions: (GPA must be &gt;3.5 in prerequisites to apply)<br/>Organic Chemistry, Biochemistry, Statistics, Nutrition, Psychology, Sociology, Cultural Anthropology, Developmental Psych, Life-Span Human Growth and Development, Human Anatomy, Human Physiology &amp; Lab, Biology, Clinical Microbiology &amp; Lab.<br/><br/>2. BSN (4 years):<br/>Med/Surg Didactic &amp; Clinical, Health Assessment &amp; Lab, Med/Surg Didactic II &amp; Clinical , Clinical Lab, Human Pathophysiology, Pharmacology, OB &amp; Clinical, Adult Health/Gero I Didactic &amp; Clinical, Psych/Mental Didactic &amp; Clinical, Research, Adult Health/Gero II Didactic &amp; Clinical, Community Health &amp; Clinical, Ethics.<br/><br/>3. MSN ANCP Courses: (2-3 years)<br/>Adult Acute Care Theory I, Strategy &amp; Health, Advanced Pharmacology (P695 &amp; P660), Policy &amp

#### Covid + Vaccine Data



In [45]:
data_cv_1 = data1[data1['commentBody_processed'].str.contains('|'.join(common_wordlist))].reset_index(drop=True)
print("The number of rows in common data is",len(data_cv_1))
data_cv_1.head()

The number of rows in common data is 4


Unnamed: 0,commentID,userID,commentBody,approveDate,recommendations,replyCount,editorsSelection,article_url,comment_count,Date,Section,Title,commentBody_processed
0,22406041,56831107,"@Nikos: Before fire, there were no humans. Dai...",1494410414,0,0,False,https://well.blogs.nytimes.com/2013/06/03/micr...,0,2013/06/03,microsampling-air-pollution,,"@Nikos: Before fire, there were no humans. Dai..."
1,15124798,72214154,I am so grateful for you getting the word out ...,1433416318,0,0,False,https://well.blogs.nytimes.com/2013/06/26/the-...,0,2013/06/26,the-problem-with-pain-pills,,I am so grateful for you getting the word out ...
2,10078110,68176225,Nurse practitioners are highly educated health...,1380121862,2,0,False,https://well.blogs.nytimes.com/2013/06/27/the-...,0,2013/06/27,the-gulf-between-doctors-and-nurse-practitioners,,Nurse practitioners are highly educated health...
3,16515301,73339369,This is absolute propaganda. It just is not un...,1446208319,0,0,False,https://well.blogs.nytimes.com/2013/06/27/stud...,0,2013/06/27,study-finds-no-vaccine-link-to-guillain-barr,,This is absolute propaganda. It just is not un...


In [46]:
data_cv_2 = data_vaccine[data_vaccine['commentBody_processed'].str.contains('|'.join(covid_wordlist))].reset_index(drop=True)
print("The number of rows in v+c data is",len(data_cv_2))
data_cv_2.head()

The number of rows in v+c data is 3


Unnamed: 0,commentID,userID,commentBody,approveDate,recommendations,replyCount,editorsSelection,article_url,comment_count,Date,Section,Title,commentBody_processed
0,22406041,56831107,"@Nikos: Before fire, there were no humans. Dai...",1494410414,0,0,False,https://well.blogs.nytimes.com/2013/06/03/micr...,0,2013/06/03,microsampling-air-pollution,,"@Nikos: Before fire, there were no humans. Dai..."
1,15124798,72214154,I am so grateful for you getting the word out ...,1433416318,0,0,False,https://well.blogs.nytimes.com/2013/06/26/the-...,0,2013/06/26,the-problem-with-pain-pills,,I am so grateful for you getting the word out ...
2,16515301,73339369,This is absolute propaganda. It just is not un...,1446208319,0,0,False,https://well.blogs.nytimes.com/2013/06/27/stud...,0,2013/06/27,study-finds-no-vaccine-link-to-guillain-barr,,This is absolute propaganda. It just is not un...


In [47]:
data_cv = pd.concat([data_cv_1,data_cv_2]).drop_duplicates().reset_index(drop=True)
print("The number of rows in vaccine data is",len(data_cv))
data_cv.head()

The number of rows in vaccine data is 4


Unnamed: 0,commentID,userID,commentBody,approveDate,recommendations,replyCount,editorsSelection,article_url,comment_count,Date,Section,Title,commentBody_processed
0,22406041,56831107,"@Nikos: Before fire, there were no humans. Dai...",1494410414,0,0,False,https://well.blogs.nytimes.com/2013/06/03/micr...,0,2013/06/03,microsampling-air-pollution,,"@Nikos: Before fire, there were no humans. Dai..."
1,15124798,72214154,I am so grateful for you getting the word out ...,1433416318,0,0,False,https://well.blogs.nytimes.com/2013/06/26/the-...,0,2013/06/26,the-problem-with-pain-pills,,I am so grateful for you getting the word out ...
2,10078110,68176225,Nurse practitioners are highly educated health...,1380121862,2,0,False,https://well.blogs.nytimes.com/2013/06/27/the-...,0,2013/06/27,the-gulf-between-doctors-and-nurse-practitioners,,Nurse practitioners are highly educated health...
3,16515301,73339369,This is absolute propaganda. It just is not un...,1446208319,0,0,False,https://well.blogs.nytimes.com/2013/06/27/stud...,0,2013/06/27,study-finds-no-vaccine-link-to-guillain-barr,,This is absolute propaganda. It just is not un...


#Sentiment Analysis

#### Vaccine_Data

In [48]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer


analyzer = SentimentIntensityAnalyzer()
data_vaccine['final_score'] = [analyzer.polarity_scores(x)['compound'] for x in data_vaccine['commentBody_processed']]


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [49]:
max(data_vaccine.loc[:, 'final_score'])

0.6124

In [50]:
print("The mean of final sentiment score for current month is:",data_vaccine.loc[:, 'final_score'].mean())

The mean of final sentiment score for current month is: -0.59036


#### Covid_Data

In [51]:
analyzer = SentimentIntensityAnalyzer()
data_covid['final_score'] = [analyzer.polarity_scores(x)['compound'] for x in data_covid['commentBody_processed']]
data_covid[['commentBody_processed','final_score']].iloc[0]

commentBody_processed    @Nikos: Before fire, there were no humans. Dai...
final_score                                                        -0.6249
Name: 0, dtype: object

In [52]:
max(data_covid.loc[:, 'final_score'])

0.9853

In [53]:
print("The mean of final sentiment score for current month is:",data_covid.loc[:, 'final_score'].mean())

The mean of final sentiment score for current month is: -0.003899999999999987


#### Covid + Vaccine Data


In [54]:
analyzer = SentimentIntensityAnalyzer()
data_cv['final_score'] = [analyzer.polarity_scores(x)['compound'] for x in data_cv['commentBody_processed']]
data_cv[['commentBody_processed','final_score']].iloc[0]

commentBody_processed    @Nikos: Before fire, there were no humans. Dai...
final_score                                                        -0.6249
Name: 0, dtype: object

In [55]:
print("The mean of final sentiment score for current month is:",data_cv.loc[:, 'final_score'].mean())

The mean of final sentiment score for current month is: -0.003899999999999987
