In [None]:
import numpy as np
import pandas as pd

from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import nltk
from nltk.probability import FreqDist
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

import time
from datetime import datetime as dt, date, time, timedelta

import plotly.express as px
import re
import plotly.graph_objects as go

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from subprocess import check_output
from wordcloud import WordCloud, STOPWORDS

import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [None]:
df1 = pd.read_csv('../input/us-election-2020-tweets/hashtag_donaldtrump.csv', lineterminator='\n')
df2 = pd.read_csv('../input/us-election-2020-tweets/hashtag_joebiden.csv', lineterminator='\n')
df1 = df1.dropna()
df2 = df2.dropna()
df1 = df1[['created_at', 'tweet', 'user_screen_name', 'user_location', 'source', 'country', 'continent', 'state', 'lat', 'long']]
df2 = df2[['created_at', 'tweet', 'user_screen_name', 'user_location', 'source', 'country', 'continent', 'state', 'lat', 'long']]
df1.rename(columns={'created_at': 'Timestamp', 'tweet': 'Text', 'user_screen_name': 'Username', 'user_location': 'Location', 'source': 'Device', 'lat': 'Latitude', 'long': 'Longitude'}, inplace=True)
df2.rename(columns={'created_at': 'Timestamp', 'tweet': 'Text', 'user_screen_name': 'Username', 'user_location': 'Location', 'source': 'Device', 'lat': 'Latitude', 'long': 'Longitude'}, inplace=True)
df1.to_csv("tweet_Trump.csv", index=None)
df2.to_csv("tweet_Biden.csv", index=None)

## Trump Tweets Data-Preprocessing

In [None]:
analysis_Trump = pd.read_csv('tweet_Trump.csv')
print(analysis_Trump.shape)
analysis_Trump.head()

In [None]:
analysis_Trump['Text'] = analysis_Trump['Text'].astype(str)
analysis_Trump.dtypes

In [None]:
Tdesc_blob = [TextBlob(desc) for desc in analysis_Trump['Text']]
#add the sentiment metrics to the dataframe
analysis_Trump['polarity'] = [b.sentiment.polarity for b in Tdesc_blob]
analysis_Trump['Subjectivity'] = [b.sentiment.subjectivity for b in Tdesc_blob]
#show dataframe
analysis_Trump

In [None]:
analysis_Trump.loc[analysis_Trump.polarity > 0,'SENTIMENT'] = 'positive'
analysis_Trump.loc[analysis_Trump.polarity == 0,'SENTIMENT'] = 'neutral'
analysis_Trump.loc[analysis_Trump.polarity < 0,'SENTIMENT'] = 'negative'
analysis_Trump

## Biden Tweets Data-Preprocessing

In [None]:
analysis_Biden = pd.read_csv('tweet_Biden.csv')
print(analysis_Biden.shape)
analysis_Biden.head()

In [None]:
analysis_Biden['Text'] = analysis_Biden['Text'].astype(str)
analysis_Biden.dtypes

In [None]:
Bdesc_blob = [TextBlob(desc) for desc in analysis_Biden['Text']]
#add the sentiment metrics to the dataframe
analysis_Biden['polarity'] = [b.sentiment.polarity for b in Bdesc_blob]
analysis_Biden['Subjectivity'] = [b.sentiment.subjectivity for b in Bdesc_blob]
#show dataframe
analysis_Biden

In [None]:
analysis_Biden.loc[analysis_Biden.polarity > 0,'SENTIMENT'] = 'positive'
analysis_Biden.loc[analysis_Biden.polarity == 0,'SENTIMENT'] = 'neutral'
analysis_Biden.loc[analysis_Biden.polarity < 0,'SENTIMENT'] = 'negative'
analysis_Biden

## Visualization of Trump-Related Tweets

In [None]:
fig1 = px.scatter(analysis_Trump, x="Timestamp", # date on the x axis
               y="polarity", # sentiment on the y axis
               hover_data=["Location", "Username"], # data to show on hover
               color_discrete_sequence=["lightseagreen", "indianred", "mediumpurple"], # colors to use
               color="SENTIMENT", # what should the color depend on?
               size="Subjectivity", # the more votes, the bigger the circle
               size_max=10, # not too big
               labels={"polarity": "Tweet positivity", "Timestamp": "Date Tweet was posted"}, # axis names
               title=f"Trump-Related Tweets Analysis", # title of figure
          )

fig1.show()

In [None]:
content_Trump = ' '.join(analysis_Trump["Text"])
content_Trump = re.sub(r"http\S+", "", content_Trump)
content_Trump = content_Trump.replace('rt ', ' ').replace('&amp;', 'and')
content_Trump = re.sub('[^A-Za-z0-9]+', ' ', content_Trump)
content_Trump = content_Trump.lower()

In [None]:
tokenized_word = word_tokenize(content_Trump)
stop_words=set(stopwords.words("english"))
filtered_sent=[]

for w in tokenized_word:
    if w not in stop_words:
        filtered_sent.append(w)
fdist = FreqDist(filtered_sent)
fd = pd.DataFrame(fdist.most_common(10), columns = ["Word","Frequency"]).drop([0]).reindex()

In [None]:
fig2 = px.bar(fd, x="Word", y="Frequency")
fig2.update_traces(marker_color='rgb(240,128,128)', marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.8)
fig2.show()

In [None]:
plt.subplots(1,1, figsize=(9,9))
wc_b = WordCloud(stopwords=STOPWORDS, 
                 background_color="white", max_words=2000,
                 max_font_size=256, random_state=42,
                 width=1600, height=1600)
wc_b.generate(content_Trump)
plt.imshow(wc_b, interpolation="bilinear")
plt.axis('off')
plt.show()

In [None]:
analysis_Trump['polarity'].iplot(
    kind='hist',
    bins=50,
    xTitle='polarity',
    linecolor='black',
    yTitle='count',
    title='Sentiment Polarity Distribution')

In [None]:
def make_dictionary(dataframe,column):
    dictt = {}
    for i in list(dataframe[column]):
        if i in dictt:
            dictt[i] = dictt.get(i) + 1
        else:
            dictt[i] = 1
    return dictt

In [None]:
country_Trump = make_dictionary(analysis_Trump,'country')
state_Trump = make_dictionary(analysis_Trump,'state')
continent_Trump = make_dictionary(analysis_Trump,'continent')
continent_Trump

In [None]:
labelsT = ['North America', 'Europe', 'Oceania', 'Asia', 'South America', 'Africa']
valuesT = ['108279', '54961', '5410', '14153', '4439', '2035']

In [None]:
figT = go.Figure(data=[go.Pie(labels=labelsT, values=valuesT)])
figT.show()

## Visualization of Biden Related Tweets

In [None]:
fig1 = px.scatter(analysis_Biden, x="Timestamp", # date on the x axis
               y="polarity", # sentiment on the y axis
               hover_data=["Location", "Username"], # data to show on hover
               color_discrete_sequence=["lightseagreen", "indianred", "mediumpurple"], # colors to use
               color="SENTIMENT", # what should the color depend on?
               size="Subjectivity", # the more votes, the bigger the circle
               size_max=10, # not too big
               labels={"polarity": "Tweet positivity", "Timestamp": "Date Tweet was posted"}, # axis names
               title=f"Biden-Related Tweets Analysis", # title of figure
          )

fig1.show()

In [None]:
content_Biden = ' '.join(analysis_Biden["Text"])
content_Biden = re.sub(r"http\S+", "", content_Biden)
content_Biden = content_Biden.replace('rt ', ' ').replace('&amp;', 'and')
content_Biden = re.sub('[^A-Za-z0-9]+', ' ', content_Biden)
content_Biden = content_Biden.lower()

In [None]:
tokenized_word = word_tokenize(content_Biden)
stop_words=set(stopwords.words("english"))
filtered_sent=[]

for w in tokenized_word:
    if w not in stop_words:
        filtered_sent.append(w)
fdist1 = FreqDist(filtered_sent)
fd1 = pd.DataFrame(fdist1.most_common(10), columns = ["Word","Frequency"]).drop([0]).reindex()

In [None]:
fig3 = px.bar(fd1, x="Word", y="Frequency")
fig3.update_traces(marker_color='rgb(240,128,128)', marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.8)
fig3.show()

In [None]:
plt.subplots(1,1, figsize=(9,9))
wc_b = WordCloud(stopwords=STOPWORDS, 
                 background_color="white", max_words=2000,
                 max_font_size=256, random_state=42,
                 width=1600, height=1600)
wc_b.generate(content_Biden)
plt.imshow(wc_b, interpolation="bilinear")
plt.axis('off')
plt.show()

In [None]:
analysis_Biden['polarity'].iplot(
    kind='hist',
    bins=50,
    xTitle='polarity',
    linecolor='black',
    yTitle='count',
    title='Sentiment Polarity Distribution')

In [None]:
country_biden = make_dictionary(analysis_Biden,'country')
state_biden = make_dictionary(analysis_Biden,'state')
continent_biden = make_dictionary(analysis_Biden,'continent')
continent_biden

In [None]:
labelsB = ['North America', 'Europe', 'Oceania', 'Asia', 'South America', 'Africa']
valuesB = ['93087', '38722', '3364', '15270', '3949', '1558']

In [None]:
figB = go.Figure(data=[go.Pie(labels=labelsB, values=valuesB)])
figB.show()