In [2]:
# Required libraries
import pandas as pd
from textblob import TextBlob
from wordcloud import WordCloud, STOPWORDS
from collections import Counter
import emoji
from datetime import datetime

In [3]:
# Load dataset with caching
def load_data():
    return pd.read_csv(r"C:\Users\User\iCloudDrive\Cursos\Data Circle\DataCircle_Twitter_Project\twitter_cleaned_data.csv", lineterminator='\n')

# Load data
twitter_df = load_data()

In [4]:
# Clean column names
twitter_df.columns = twitter_df.columns.str.replace('\r', '')

# Convert created_at to datetime
twitter_df['created_at'] = pd.to_datetime(twitter_df['created_at'], errors='coerce')

In [5]:
# Sentiment analysis function
def get_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity == 0:
        return 'neutral'
    else:
        return 'negative'

# Apply sentiment analysis to the DataFrame
twitter_df['sentiment'] = twitter_df['tweet_cleaned'].apply(get_sentiment)

In [6]:
twitter_df.head(5)

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_join_date,user_followers_count,user_location,city,state,candidate,tweet_cleaned,country,sentiment
0,2020-10-15 00:00:02,1316529228091846912,"#Trump: As a student I used to hear for years,...",2,1,twitter web app,8436472,2007-08-26 05:56:11,1185,portland,portland,oregon,trump,#trump student used hear years ten years heard...,united states\r,positive
1,2020-10-15 00:00:02,1316529227471237120,2 hours since last tweet from #Trump! Maybe he...,0,0,trumpytweeter,828355589206056960,2017-02-05 21:32:17,32,unknown,unknown,unknown,trump,2 hours since last tweet #trump maybe busy tre...,unknown\r,positive
2,2020-10-15 00:00:08,1316529252301451264,You get a tie! And you get a tie! #Trump ‘s ra...,4,3,twitter for iphone,47413798,2009-06-15 19:05:35,5393,washington dc,washington,district of columbia,trump,get tie get tie #trump rally #iowa,united states\r,neutral
3,2020-10-15 00:00:17,1316529291052675072,@CLady62 Her 15 minutes were over long time ag...,2,0,twitter for android,1138416104,2013-02-01 01:37:38,2363,perriscalifornia,unknown,california,trump,clady62 15 minutes long time ago omarosa never...,united states\r,negative
4,2020-10-15 00:00:17,1316529289949569024,@richardmarx Glad u got out of the house! DICK...,0,0,twitter for iphone,767401841030209536,2016-08-21 16:43:51,75,powell tn,unknown,unknown,trump,richardmarx glad u got house dick#trump 2020,unknown\r,positive


In [7]:
twitter_df['country'] = twitter_df['country'].str.replace('\r', '')

In [8]:
twitter_df.head(5)

Unnamed: 0,created_at,tweet_id,tweet,likes,retweet_count,source,user_id,user_join_date,user_followers_count,user_location,city,state,candidate,tweet_cleaned,country,sentiment
0,2020-10-15 00:00:02,1316529228091846912,"#Trump: As a student I used to hear for years,...",2,1,twitter web app,8436472,2007-08-26 05:56:11,1185,portland,portland,oregon,trump,#trump student used hear years ten years heard...,united states,positive
1,2020-10-15 00:00:02,1316529227471237120,2 hours since last tweet from #Trump! Maybe he...,0,0,trumpytweeter,828355589206056960,2017-02-05 21:32:17,32,unknown,unknown,unknown,trump,2 hours since last tweet #trump maybe busy tre...,unknown,positive
2,2020-10-15 00:00:08,1316529252301451264,You get a tie! And you get a tie! #Trump ‘s ra...,4,3,twitter for iphone,47413798,2009-06-15 19:05:35,5393,washington dc,washington,district of columbia,trump,get tie get tie #trump rally #iowa,united states,neutral
3,2020-10-15 00:00:17,1316529291052675072,@CLady62 Her 15 minutes were over long time ag...,2,0,twitter for android,1138416104,2013-02-01 01:37:38,2363,perriscalifornia,unknown,california,trump,clady62 15 minutes long time ago omarosa never...,united states,negative
4,2020-10-15 00:00:17,1316529289949569024,@richardmarx Glad u got out of the house! DICK...,0,0,twitter for iphone,767401841030209536,2016-08-21 16:43:51,75,powell tn,unknown,unknown,trump,richardmarx glad u got house dick#trump 2020,unknown,positive


In [9]:
# Create CSV for sentiment analysis
twitter_df.to_csv('twitter_sentiment.csv', index=False)

In [10]:
# Create a df for each candidate
biden_df = twitter_df.loc[twitter_df['candidate']=='biden']
trump_df = twitter_df.loc[twitter_df['candidate']=='trump']

In [11]:
# WordCloud Function
def generate_wordcloud(df, sentiment):
        # Get the text to be used
        text = ' '.join(df[df['sentiment'] == sentiment]['tweet_cleaned'])

        # Add 'amp' as a stopword
        stopwords = STOPWORDS.union({'amp'})
        
        # Generate the word cloud
        wordcloud = WordCloud(stopwords=stopwords).generate(text)
        
        # Extract word frequencies
        word_freq = wordcloud.words_
        
        # Convert to DataFrame
        df = pd.DataFrame(list(word_freq.items()), columns=['Word', 'Frequency'])
        
        return df

# Apply wordcloud function for each candidate sentiment (positive and negative)
biden_positive_wordcloud_df = generate_wordcloud(biden_df, 'positive')
biden_negative_wordcloud_df = generate_wordcloud(biden_df, 'negative')
trump_positive_wordcloud_df = generate_wordcloud(trump_df, 'positive')
trump_negative_wordcloud_df = generate_wordcloud(trump_df, 'negative')

In [13]:
# Create CSV for WordCloud analysis
biden_positive_wordcloud_df.to_csv('biden_positive_wordcloud.csv', index=False)
biden_negative_wordcloud_df.to_csv('biden_negative_wordcloud.csv', index=False)
trump_positive_wordcloud_df.to_csv('trump_positive_wordcloud.csv', index=False)
trump_negative_wordcloud_df.to_csv('trump_negative_wordcloud.csv', index=False)

In [50]:
biden_negative_wordcloud_df

Unnamed: 0,Word,Frequency
0,joebiden,1.000000
1,biden,0.985406
2,trump,0.361263
3,realdonaldtrump,0.246084
4,joe biden,0.240549
...,...,...
195,point,0.031390
196,biden corrupt,0.031201
197,2020election,0.031138
198,name,0.030949


In [38]:
# Biden Emoji creation
biden_emojis_list = []
biden_recognized_emojis = set(emoji.EMOJI_DATA.keys())

for tweet in biden_df['tweet']:
    for char in tweet:
        if char in biden_recognized_emojis:
            biden_emojis_list.append(char)


# Trump Emoji creation
trump_emojis_list = []
trump_recognized_emojis = set(emoji.EMOJI_DATA.keys())

for tweet in trump_df['tweet']:
    for char in tweet:
        if char in trump_recognized_emojis:
            trump_emojis_list.append(char)

In [43]:
# Frequency count of emojis

# Biden
biden_emoji_counts = Counter(biden_emojis_list)
biden_top_emojis = biden_emoji_counts.most_common(10)

# Trump
trump_emoji_counts = Counter(trump_emojis_list)
trump_top_emojis = trump_emoji_counts.most_common(10)

In [46]:
# Create DataFrame for the top emojis
biden_emoji_df = pd.DataFrame(biden_top_emojis, columns=['Emoji', 'Frequency'])
trump_emoji_df = pd.DataFrame(trump_top_emojis, columns=['Emoji', 'Frequency'])

In [49]:
# Create CSV for emoji analysis
biden_emoji_df.to_csv('biden_emojis.csv')
trump_emoji_df.to_csv('trump_emojis.csv')

In [51]:
biden_emoji_df

Unnamed: 0,Emoji,Frequency
0,💙,17506
1,😂,12943
2,🤣,8944
3,🌊,6947
4,👏,6741
5,❤,6107
6,🏻,4997
7,🙏,4813
8,👇,3912
9,🏼,3907
