# Covid-19 Vaccine Sentiment Analysis

In [None]:
import os
import tweepy as tw
import pandas as pd
import numpy as np
import re
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

In [None]:
twitter_keys = pd.read_csv('data/twitter_keys.csv') #Twitter API keys

In [None]:
auth = tw.OAuthHandler(twitter_keys.consumer_key[0], twitter_keys.consumer_secret[0])
auth.set_access_token(twitter_keys.access_token[0], twitter_keys.access_token_secret[0])
api = tw.API(auth, wait_on_rate_limit=True,wait_on_rate_limit_notify=True)

# Retrieve Data from Twitter API

In [None]:
f_data = pd.DataFrame()
places = api.geo_search(query='USA',granularity='country') #Select the country
if places:
    place_id = places[0].id
# Define the search term and the date_since date as variables
search_words = "(#covid OR vaccine OR covid-19 OR pfizer OR moderna OR covid OR #vaccine) -filter:retweets place:%s " % place_id
date_since = "2020-01-01" #Aqui se podria aplicar al dia de ayer
# Collect tweets
tweets = tw.Cursor(api.search,
              q=search_words,
              lang="en",
              since=date_since).items(5000)
# Put tweets in a DataFrame
for tweet in tweets:
    if tweet.place is not None:        
        f_data= f_data.append({
            'date':tweet.created_at,
            'country_code':tweet.place.country_code,
            'place_full_name':tweet.place.full_name,
            'place_type': tweet.place.place_type,
            'place_name':tweet.place.name,
            'verified':tweet.user.verified,
            'retweets':tweet.retweet_count,
            'likes':tweet.favorite_count,
            'text':tweet.text
        },ignore_index=True) 
print('Data for US retrieved')

Rate limit reached. Sleeping for: 819


Data for US retrieved


In [None]:
#Data cleaning
f_data.text = f_data.text.str.lower()
f_data.text = f_data.text.apply(lambda x:re.sub('@[^\s]+','',x)) #Remove twitter handlers
f_data.text = f_data.text.apply(lambda x:re.sub(r'\B#\S+','',x)) #Remove hashtags
f_data.text = f_data.text.apply(lambda x:re.sub(r"http\S+", "", x)) # Remove URLS
f_data.text = f_data.text.apply(lambda x:' '.join(re.findall(r'\w+', x))) # Remove all the special characters
f_data.text = f_data.text.apply(lambda x:re.sub(r'\s+[a-zA-Z]\s+', ' ', x)) #remove all single characters
f_data.text = f_data.text.apply(lambda x:re.sub(r'\s+', ' ', x, flags=re.I)) # Substituting multiple spaces with single space
#f_data['date'] = pd.to_datetime(f_data.date).dt.date #Get only date from datetime

In [None]:
#Vader Sentiment Analysis
f_data['sentiments'] = f_data['text'].apply(lambda x: SIA().polarity_scores(' '.join(re.findall(r'\w+',x.lower()))))
f_data['Positive Sentiment'] = f_data['sentiments'].apply(lambda x: x['pos']) 
f_data['Neutral Sentiment'] = f_data['sentiments'].apply(lambda x: x['neu'])
f_data['Negative Sentiment'] = f_data['sentiments'].apply(lambda x: x['neg'])

In [None]:
#Compare positve and negative sentiments
conditions = [
    (f_data['Positive Sentiment'] >= f_data['Negative Sentiment']),
    (f_data['Negative Sentiment'] >= f_data['Positive Sentiment']),
    ]

# create a list of the values we want to assign for each condition
values = ['Positive', 'Negative']

# create a new column and use np.select to assign values to it using our lists as arguments
f_data['Sentiment Label']= np.select(conditions, values)

In [None]:
us_cities = pd.read_csv('uscities.csv')

In [None]:
# Data preparation
f_data = f_data[f_data.place_type=='city']
f_data.drop(columns=['sentiments'],inplace=True)
us_cities=us_cities[['city','state_id','state_name','county_name','lat','lng','id']]
us_cities['place_full_name']=us_cities['city']+', '+us_cities['state_id']
final_df = f_data.merge(us_cities,on='place_full_name',how='left')
final_df['date']=pd.to_datetime(final_df['date'])
final_df['round_date']=final_df['date'].dt.floor('h') #Fecha por hora utilizada en el dashboard
final_df.drop(columns=['id','Positive Sentiment','Neutral Sentiment','Negative Sentiment'],inplace=True)

In [None]:
f_data.to_csv('tweets_dashboard.csv',index=False)