In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
from wordcloud import WordCloud, ImageColorGenerator

In [None]:
tweet_df = pd.read_csv('../input/all-covid19-vaccines-tweets/vaccination_all_tweets.csv')
tweet_df.head(3)

In [None]:
tweet_df = tweet_df[['user_location', 'date', 'text']]
tweet_df['date'] = pd.to_datetime(tweet_df['date'])
tweet_df = tweet_df.drop_duplicates('text')

for i in tweet_df.columns:
    null_percent = (tweet_df[i].isna().sum() / len(tweet_df[i]) * 100).round(2)
    print(f'null percent {i}: {null_percent}%')

In [None]:
#tweet_df['user_location'] = tweet_df['user_location'].fillna(tweet_df['user_location'].mode()[0])

In [None]:
def clean_data(text):
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'#', '', text)
    text = re.sub(r'RT[\s]+', '', text)
    text = re.sub(r'https?:\/\/\S+', '', text)
    text = text.lower()
    
    return text

In [None]:
tweet_df['text'] = tweet_df['text'].apply(clean_data)
tweet_df.head(3)

In [None]:
def get_subjectivity(text):
    return TextBlob(text).sentiment.subjectivity

def get_polarity(text):
    return TextBlob(text).sentiment.polarity

def get_sentiment(score):
    if score > 0:
        return 'Positive'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Negative'

In [None]:
tweet_df['subjectivity'] = tweet_df['text'].apply(get_subjectivity)
tweet_df['polarity'] = tweet_df['text'].apply(get_polarity)
tweet_df['sentiment'] = tweet_df['polarity'].apply(get_sentiment)
tweet_df.head(3)

In [None]:
words = ' '.join([word for word in tweet_df['text']])
word_cloud = WordCloud(background_color='white', width=1000, height=500, max_words=250, random_state=20).generate(words)

fig, ax = plt.subplots(figsize=(12,6))
plt.imshow(word_cloud, interpolation='bilinear')
plt.axis('off')

In [None]:
def remove_border():
    for i in ['top', 'right', 'bottom', 'left']:
        ax.spines[i].set_visible(False)

In [None]:
colors = ['#9ED9CCFF', '#B0B8B4FF', '#FAA094FF']
labels = ['Positive', 'Neutral', 'Negative']
explode = [0.01, 0.01, 0.01]

In [None]:
percent_sentiment = (tweet_df['sentiment'].value_counts() / tweet_df['sentiment'].value_counts().sum())[labels]

font={'fontname': 'Helvetica'}

fig = plt.figure(figsize=(8,8))
plt.pie(percent_sentiment, labels=labels, colors=colors, explode=explode ,textprops={'fontsize': 14, 'fontname': 'Comic Sans'})

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
patches, texts, autotexts = ax.pie(percent_sentiment, colors=colors, autopct='%1.2f%%')

for i in autotexts:
    i = i.set_color('white')
    i = i.set_fontsize(14)#.set_weight('bold')

ax.legend(labels, frameon=False, loc='right')