# who won in US president elections 2020? according to tweets

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import io
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from textblob import TextBlob
from nltk.util import ngrams
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/us-election-2020-tweets/hashtag_donaldtrump.csv
/kaggle/input/us-election-2020-tweets/hashtag_joebiden.csv




# Import datasets

In [2]:
Trump_dataset = pd.read_csv('/kaggle/input/us-election-2020-tweets/hashtag_donaldtrump.csv', lineterminator='\n')
Biden_dataset = pd.read_csv('../input/us-election-2020-tweets/hashtag_joebiden.csv',lineterminator='\n')

In [3]:
Trump_dataset = Trump_dataset.drop(columns = ['tweet_id','collected_at','user_description','collected_at'])
Biden_dataset = Biden_dataset.drop(columns = ['tweet_id','collected_at','user_description','collected_at'])

In [4]:
Trump_dataset.loc[:,'condidat'] = 'Trump'
Biden_dataset.loc[:,'condidat'] = 'Biden'

In [5]:
Data_Mixed = pd.concat([Trump_dataset,Biden_dataset])
Data_Mixed.sort_values(by='created_at')
Data_Mixed.head()

Unnamed: 0,created_at,tweet,likes,retweet_count,source,user_id,user_name,user_screen_name,user_join_date,user_followers_count,user_location,lat,long,city,country,continent,state,state_code,condidat
0,2020-10-15 00:00:01,#Elecciones2020 | En #Florida: #JoeBiden dice ...,0.0,0.0,TweetDeck,360666500.0,El Sol Latino News,elsollatinonews,2011-08-23 15:33:45,1860.0,"Philadelphia, PA / Miami, FL",25.77427,-80.19366,,United States of America,North America,Florida,FL,Trump
1,2020-10-15 00:00:01,"Usa 2020, Trump contro Facebook e Twitter: cop...",26.0,9.0,Social Mediaset,331617600.0,Tgcom24,MediasetTgcom24,2011-07-08 13:12:20,1067661.0,,,,,,,,,Trump
2,2020-10-15 00:00:02,"#Trump: As a student I used to hear for years,...",2.0,1.0,Twitter Web App,8436472.0,snarke,snarke,2007-08-26 05:56:11,1185.0,Portland,45.520247,-122.674195,Portland,United States of America,North America,Oregon,OR,Trump
3,2020-10-15 00:00:02,2 hours since last tweet from #Trump! Maybe he...,0.0,0.0,Trumpytweeter,8.283556e+17,Trumpytweeter,trumpytweeter,2017-02-05 21:32:17,32.0,,,,,,,,,Trump
4,2020-10-15 00:00:08,You get a tie! And you get a tie! #Trump ‘s ra...,4.0,3.0,Twitter for iPhone,47413800.0,Rana Abtar - رنا أبتر,Ranaabtar,2009-06-15 19:05:35,5393.0,Washington DC,38.894992,-77.036558,Washington,United States of America,North America,District of Columbia,DC,Trump


# Sentiment Analysis

In [6]:
def clean(text):
    '''Make text lowercase, remove text in square brackets,remove links,remove punctuation
    and remove words containing numbers.'''
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text=re.sub(r'@[A-Za-z0-9]+','',text)
    text=re.sub(r'#','',text)
    text=re.sub(r'RT[\s]+','',text)
    text=re.sub(r'[^\w]', ' ', text)
    return text

# create fuction to get the subjectivity and polarity
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity
def getPolarity(text):
    return TextBlob(text).sentiment.polarity
def getAnalysis(score):
    if score < 0:
        return 'negative'
    elif score==0:
        return 'neutral'
    else:
        return 'positive'

# DonaldTrump tweets sentiment analysis

In [7]:
Trump_Tweets = Data_Mixed.query('(condidat == "Trump")').sort_values('user_followers_count',ascending = False).drop_duplicates(['user_name'])[['tweet','country','state']]
Trump_Tweets = Trump_Tweets.dropna().loc[Trump_Tweets.country == 'United States of America']

Trump_Tweets.reset_index(inplace = True, drop = True)

Trump_Tweets['ClearTweet'] = Trump_Tweets['tweet'].apply(clean)

Trump_Tweets['subjectivity']= Trump_Tweets['ClearTweet'].apply(getSubjectivity)
Trump_Tweets['polarity']    = Trump_Tweets['ClearTweet'].apply(getPolarity)
Trump_Tweets['analysis']    = Trump_Tweets['polarity'].apply(getAnalysis)
Trump_Tweets.head()

Unnamed: 0,tweet,country,state,ClearTweet,subjectivity,polarity,analysis
0,"Without a doubt, the #FourSeasons debacle is t...",United States of America,California,without a doubt the fourseasons debacle is th...,0.483333,0.333333,positive
1,"President Donald #Trump’s youngest daughter, T...",United States of America,Illinois,president donald trump s youngest daughter ti...,0.0,0.0,neutral
2,Woah. Have you read this article?\n\nAll the p...,United States of America,California,woah have you read this article all the presi...,0.0,0.0,neutral
3,The latest episode of #SNL tackled #DonaldTrum...,United States of America,California,the latest episode of snl tackled donaldtrump ...,0.8,-0.033333,negative
4,🇺🇸 #Decision2020: @JoeBiden firmará una serie ...,United States of America,Florida,firmará una serie de órdenes ejecutivas ...,0.0,0.0,neutral


In [8]:
Trump_Tweets.polarity = Trump_Tweets.polarity.apply(lambda x: getAnalysis(x))

# JoeBiden tweets sentiment analysis

In [9]:
Biden_Tweets = Data_Mixed.query('(condidat == "Biden")').sort_values('user_followers_count',ascending = False).drop_duplicates(['user_name'])[['tweet','country','state']]
Biden_Tweets = Biden_Tweets.dropna().loc[Biden_Tweets.country == 'United States of America']

Biden_Tweets.reset_index(inplace = True, drop = True)

Biden_Tweets['ClearTweet'] = Biden_Tweets['tweet'].apply(clean)
 
Biden_Tweets['subjectivity']= Biden_Tweets['ClearTweet'].apply(getSubjectivity)
Biden_Tweets['polarity']    = Biden_Tweets['ClearTweet'].apply(getPolarity)
Biden_Tweets['analysis']    = Biden_Tweets['polarity'].apply(getAnalysis)
Biden_Tweets.head()

Unnamed: 0,tweet,country,state,ClearTweet,subjectivity,polarity,analysis
0,#JoeBiden is enlisting A-list star power to he...,United States of America,California,joebiden is enlisting a list star power to hel...,0.0,0.0,neutral
1,While we were campaigning across battleground ...,United States of America,Illinois,while we were campaigning across battleground ...,0.75,0.8,positive
2,The big fight TODAY!!!!! @realDonaldTrump vs #...,United States of America,Nevada,the big fight today vs joebiden who you ...,0.1,0.0,neutral
3,"During his victory speech, #JoeBiden said he u...",United States of America,California,during his victory speech joebiden said he un...,0.75,-0.75,negative
4,Los nueve minutos de colorida celebración tras...,United States of America,Florida,los nueve minutos de colorida celebración tras...,0.0,0.0,neutral


In [10]:
Biden_Tweets.polarity = Biden_Tweets.polarity.apply(lambda x: getAnalysis(x))

In [11]:
res_df = pd.DataFrame(columns=['State','Winner'])

for s in Trump_Tweets.state.unique():
    if s == 'Guam' or s == 'Puerto Rico':
        continue
    trump_copy = Trump_Tweets.copy()
    trump_copy = trump_copy.dropna().loc[trump_copy.state == s]
    trump_neutral  = trump_copy[trump_copy['analysis'] == 'neutral'].ClearTweet.count()
    trump_positive = trump_copy[trump_copy['analysis'] == 'positive'].ClearTweet.count()
    trump_negative = trump_copy[trump_copy['analysis'] == 'negative'].ClearTweet.count()
    biden_copy = Biden_Tweets.copy()
    biden_copy = biden_copy.dropna().loc[biden_copy.state == s]
    biden_neutral  = biden_copy[biden_copy['analysis'] == 'neutral'].ClearTweet.count()
    biden_positive = biden_copy[biden_copy['analysis'] == 'positive'].ClearTweet.count()
    biden_negative = biden_copy[biden_copy['analysis'] == 'negative'].ClearTweet.count()

    if (trump_positive/(trump_neutral+trump_positive+trump_negative) > biden_positive/(biden_neutral+biden_positive+biden_negative)):
        winner = "Donald Trump"
    else:
        winner = "Joe Biden"
    res_df.loc[len(res_df)] = [s, winner]

In [12]:
res_df = res_df.sort_values('State')
res_df = res_df.reset_index(drop=True)
display(res_df)

Unnamed: 0,State,Winner
0,Alabama,Donald Trump
1,Alaska,Joe Biden
2,Arizona,Joe Biden
3,Arkansas,Joe Biden
4,California,Joe Biden
5,Colorado,Joe Biden
6,Connecticut,Joe Biden
7,Delaware,Joe Biden
8,District of Columbia,Joe Biden
9,Florida,Joe Biden


In [13]:
t_general  = Trump_Tweets.groupby('analysis').analysis.count()
t_neutral  = Trump_Tweets[Trump_Tweets['analysis'] == 'neutral'].ClearTweet.count()
t_positive = Trump_Tweets[Trump_Tweets['analysis'] == 'positive'].ClearTweet.count()
t_negative = Trump_Tweets[Trump_Tweets['analysis'] == 'negative'].ClearTweet.count()

fig = go.Figure(data = [go.Funnelarea(labels = ["positivity","negativity","neutrality"], values = [t_positive,t_negative,t_neutral])])
fig.update_layout(title_text ='sentimat analysis tweets Donald Trump')
fig.show()

b_general  = Biden_Tweets.groupby('analysis').analysis.count()
b_neutral  = Biden_Tweets[Biden_Tweets['analysis'] == 'neutral'].ClearTweet.count()
b_positive = Biden_Tweets[Biden_Tweets['analysis'] == 'positive'].ClearTweet.count()
b_negative = Biden_Tweets[Biden_Tweets['analysis'] == 'negative'].ClearTweet.count()

fig = go.Figure(data = [go.Funnelarea(labels = ["positivity","negativity","neutrality"], values = [b_positive,b_negative,b_neutral])])
fig.update_layout(title_text ='sentimat analysis tweets Joe Biden')
fig.show()