## How the new Covid-19 vaccine from Pfizer and BioNTech was received by Tweeter Public

In [None]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20,10]
#%matplotlib inline

In [None]:
vaccineTweet_DF = pd.read_csv('/kaggle/input/pfizer-vaccine-tweets/vaccination_tweets.csv')
vaccineTweet_DF.head()

In [None]:
vaccineTweet_DF.shape

In [None]:
# Checking null
vaccineTweet_DF.isnull().sum()

In [None]:
# User location and hashtags and descripton NULL values will be replace with unknown and None, and NoDesc respectivetly 
vaccineTweet_DF['user_location'].fillna('Unknown', inplace=True)
vaccineTweet_DF['hashtags'].fillna('None', inplace=True)
vaccineTweet_DF['user_description'].fillna('NoDesc', inplace=True)

In [None]:
vaccineTweet_DF.isnull().sum()

In [None]:
vaccineTweet_DF.head()

## Analazing data with NLP

In [None]:
import nltk
nltk.download('stopwords')

In [None]:
# Plan is to analyze text and hashtags columns 
import string
string.punctuation

In [None]:
# Remove punctuation FUNCTION
def remove_punctuation(text):
    for eachPunct in string.punctuation:
        text = text.replace(eachPunct, '')
    return text 

In [None]:
vaccineTweet_DF['text'] = vaccineTweet_DF['text'].apply(remove_punctuation)
vaccineTweet_DF.head()

In [None]:
vaccineTweet_DF.shape

In [None]:
# Let's check some retweets 
vaccineTweet_DF['retweets'].value_counts().plot(kind='bar', title='No of Retweets Twitter Acounts')

In [None]:
vaccineTweet_DF['favorites'].value_counts().plot(kind='bar', color='red',title='No of Favorites Twitter Acounts')

In [None]:
# Let's do some tokenize FUNCTION  
from nltk.tokenize import word_tokenize
def tokenize_text(text):
    text = word_tokenize(text.lower())
    return text

In [None]:
# Apply the tokenize function to the text column
vaccineTweet_DF['text'] = vaccineTweet_DF['text'].apply(tokenize_text)
vaccineTweet_DF.head()

In [None]:
pip install textblob

In [None]:
pip install langdetect 

In [None]:
from langdetect import detect
from textblob import TextBlob    
myword = detect("hello")
myword

In [None]:
# stop words FUNCTION
from nltk.corpus import stopwords
def remove_stopwords(text):
    #global stopwords
    stop_words = set(stopwords.words('english'))
    text = [word for word in text if not word in stop_words]
    return text

In [None]:
vaccineTweet_DF['text'] = vaccineTweet_DF['text'].apply(remove_stopwords)
vaccineTweet_DF

In [None]:
import nltk
nltk.download('wordnet')

In [None]:
# Lemmatization FUNCTION
def lemmatize_words(text):
    wn = nltk.WordNetLemmatizer()
    text = [wn.lemmatize(word) for word in text]
    return text


In [None]:
#vaccineTweet_DF['text'] = vaccineTweet_DF.apply(lemmatize_words)

In [None]:
from nltk.stem import PorterStemmer

In [None]:
# Stemming 
def stemming_word(text):
    porter = PorterStemmer()
    text = [porter.stem(t) for t in text]
    return text

In [None]:
vaccineTweet_DF['text'].apply(stemming_word)
vaccineTweet_DF.head()

In [None]:
from textblob import TextBlob

In [None]:
# Polarity and Subjectivity
def sentiment_analysis(text):
    analisys = TextBlob(text).sentiment
    return analisys

vaccineTweet_DF.head()

In [None]:
# To String
def listToStr(myList):
    if type(myList) is list:
        return ";".join(myList)
    else:
        return myList

In [None]:
vaccineTweet_DF['text'].apply(lambda x: [listToStr(i) for i in x])

In [None]:
vaccineTweet_DF['stringText'] = vaccineTweet_DF['text'].apply(lambda x: x[1:])
vaccineTweet_DF.head()

In [None]:
vaccineTweet_DF['text'] = vaccineTweet_DF['text'].astype(str)

In [None]:
vaccineTweet_DF['senti_score'] = vaccineTweet_DF['text'].apply(sentiment_analysis)

In [None]:
vaccineTweet_DF.head()

In [None]:
sentiment_series = vaccineTweet_DF['senti_score'].tolist()

In [None]:
#vaccineTweet_DF['senti_score'].value_counts().plot(kind='pie')
user_verify_plot = vaccineTweet_DF.groupby('user_verified').hashtags.count()
user_verify_plot.plot(kind='pie', title='User Verify Twitter Account')

In [None]:
# Check top 20 locations (USER)
user_location = vaccineTweet_DF['user_location'].value_counts().index[:20]
user_location

In [None]:
sns.countplot(y='user_location', data=vaccineTweet_DF, order=user_location, color='cornflowerblue')
plt.title('Number of USERS per LOCATION', loc='center')
plt.xlabel('Number of users', weight='bold')
plt.ylabel('Location', weight='bold')
plt.show()

In [None]:
# Analize Polarity & Subjetivity together
fig, ax = plt.subplots(figsize=(8,6), sharex=True)
plt.ylim(0,2)
vaccineTweet_DF['senti_score'].hist(ax=ax)

In [None]:
# Analizing Polarity and Subjetivity separate
sentiment_series = vaccineTweet_DF['senti_score'].tolist()
cols = ['Polarity', 'Subjetivity']
sentimentDF = pd.DataFrame(sentiment_series, columns=cols, index=vaccineTweet_DF.index)
sentimentDF.head()

In [None]:
# remove all 0.00 's 
sentimentDF = sentimentDF.loc[(sentimentDF != 0).any(axis=1)].reset_index(drop=True)
sentimentDF

In [None]:
# Polarity Distribution
plt.hist(sentimentDF['Polarity'], color='darkred', edgecolor='black', density=False, bins= int(30))
plt.title('Polarity Distribution')
plt.xlabel('Polarity')
plt.ylabel('Number of Times')

In [None]:
#Subjetivity Distribution
sns.distplot(sentimentDF['Subjetivity'], hist=True, kde=True, bins=int(30), color='darkred', hist_kws={'edgecolor':
                                                                                                   'black'}, axlabel='Subjetivity')
plt.xlabel('Subjetivity')
plt.ylabel('Number of times')
plt.title('Subjetivity Distribution')