In [87]:
#Dependencies
import pandas as pd
import tweepy
from datetime import datetime as dt
from pprint import pprint
import numpy as np
import re
from time import strftime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

from config import consumer_key, consumer_secret, access_token, access_token_secret

In [88]:
# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [89]:
# Target User
target_user = "@realDonaldTrump"


# Variable to hold Tweet Texts, created date, number of retweets, number of likes
tweet_texts = []
created_date =[]
retweets=[]
likes=[]


# Create a loop to iteratively run API requests to bypass weekly tweeter API limit
for x in range(1,4):

    # Get all tweets from Trump home feed (for specified number of pages)
    public_tweets = api.user_timeline(target_user, page=x)

    # Loop through all tweets
    for tweet in public_tweets:
        #Converting text to lower case
        txt = tweet['text'].lower()
        tweet_texts.append(txt)
        created_date.append(tweet["created_at"])
        retweets.append(tweet["retweet_count"])
        likes.append(tweet["favorite_count"])
        
#Created new dictionary to hold above variables
trump_tweets = {
                "Tweet" : tweet_texts,
                "Created" : created_date,
                "Retweets" : retweets,
                "Likes" : likes,
                }

#Converted dictionary to dataframe 
trump_tweets = pd.DataFrame(trump_tweets)
trump_tweets.head(5)

# print(len(trump_tweets))




Unnamed: 0,Created,Likes,Retweets,Tweet
0,Wed Jun 27 23:14:58 +0000 2018,23172,4760,just landed in north dakota with @senjohnhoeve...
1,Wed Jun 27 21:18:03 +0000 2018,35224,6808,"today, it was my great honor to welcome presid..."
2,Wed Jun 27 20:28:58 +0000 2018,41650,10263,heading to north dakota to fully stand with an...
3,Wed Jun 27 19:13:34 +0000 2018,33140,8472,statement on justice anthony kennedy. #scotus ...
4,Wed Jun 27 17:33:03 +0000 2018,40474,9002,"today, i was thrilled to join student leaders ..."


In [90]:
# Created list of key words
keywords = ['sad','earnings', 'election', 'futures', 'investor', 'low', 'market', 'nasdaq', 'north korea', 'stock',
'market', 'correction', 'trading', 'unemployment','wall street','china','ban',
'bull', 'kim', 'price', 'high', 'corporate', 'global warming', 'tariff', 'tax cut', 'jobs', 'economy', 'trade', 'missile', 'nuclear', 'jobs', 'wages',
'maga']

In [91]:
#Filtered tweets based on keywords and defined variables to hold
filtered_tweet_texts = []
filtered_created_date =[]
filtered_retweets=[]
filtered_likes=[]
filtered_id=[]
keys=[]
#Variables to hold sentiment analysis
compound_list = []
pos_list = []
neu_list = []
neg_list = []
# i = 0

# Looping through tweets
for tweet in trump_tweets.iterrows():
    for key in keywords:
        #Used regular expression to find specific tweets based on keyword
        if re.search(key,tweet[1]['Tweet']):
            #Not all tweets will have keyword, catching tweets that match
            try:
                filtered_tweet_texts.append(tweet[1]["Tweet"])
                filtered_created_date.append(tweet[1]["Created"])
                filtered_retweets.append(tweet[1]["Retweets"])
                filtered_likes.append(tweet[1]["Likes"])
                keys.append(key)
                # Performed sentiment analysis using Vader
                compound = analyzer.polarity_scores(tweet[1]["Tweet"])["compound"]
                pos = analyzer.polarity_scores(tweet[1]["Tweet"])["pos"]
                neu = analyzer.polarity_scores(tweet[1]["Tweet"])["neu"]
                neg = analyzer.polarity_scores(tweet[1]["Tweet"])["neg"]
                compound_list.append(compound)
                pos_list.append(pos)
                neu_list.append(neu)
                neg_list.append(neg)
#                 i += 1   
            except:
                    print('Missing Data')                      
#Create new dictionary for filtered tweets

filtered_trump_tweets = {
                "Tweet" : filtered_tweet_texts,
                "Created" : filtered_created_date,
                "Retweets" : filtered_retweets,
                "Likes" : filtered_likes,
                "Keyword" : keys,
                "Positive" : pos_list,
                "Negative" : neg_list,
                "Neutral" : neu_list,
                "Compound" : compound_list,
                }               

#Convert dictionary to dataframe
filtered_trump_tweets = pd.DataFrame(filtered_trump_tweets)

#Convert date YYYY/MM/DD format
filtered_trump_tweets["Created"] = pd.to_datetime(filtered_trump_tweets["Created"])
filtered_trump_tweets["Created"] = filtered_trump_tweets["Created"].dt.strftime("%Y-%m-%d")

# filtered_trump_tweets.to_csv("DJ_filter.csv", index=False, header=True,encoding='utf-8-sig')


In [92]:
# Grouping by Tweets and concatenating keywords
grouped_keywords = filtered_trump_tweets.groupby(['Tweet'])['Keyword'].apply(list).reset_index()


#merged grouped data frame with filtered data frame
merged_table = pd.merge(grouped_keywords, filtered_trump_tweets, on="Tweet")
# merged_table=merged_table.drop_duplicates(['Tweet'], keep=False, inplace=False)

#Dropped unwanted column
merged_table = merged_table.drop(['Keyword_y'], axis=1)

#Dropped duplicate tweets
merged_table=merged_table.drop_duplicates(['Tweet'], keep='first', inplace=False).reset_index()

#Display
merged_table


Unnamed: 0,index,Tweet,Keyword_x,Compound,Created,Likes,Negative,Neutral,Positive,Retweets
0,0,....we are finishing our study of tariffs on c...,[tariff],0.25,2018-06-26,56674,0.0,0.917,0.083,12361
1,1,....we are getting other countries to reduce a...,"[tariff, trade]",0.0,2018-06-26,63762,0.0,1.0,0.0,14293
2,3,....when i had harley-davidson officials over ...,[tariff],0.0,2018-06-26,56106,0.0,1.0,0.0,12305
3,4,.@repclayhiggins has been a great help to me o...,[low],0.8885,2018-06-25,62000,0.121,0.438,0.441,14502
4,5,congratulations to governor henry mcmaster on ...,[election],0.9412,2018-06-27,63160,0.0,0.52,0.48,12973
5,6,"congresswoman maxine waters, an extraordinaril...",[low],-0.2732,2018-06-25,144698,0.11,0.89,0.0,42332
6,7,heading to nevada to talk trade and immigratio...,"[economy, trade]",0.6705,2018-06-23,80073,0.0,0.756,0.244,15549
7,9,it’s very sad that nancy pelosi and her sideki...,[sad],-0.624,2018-06-23,154190,0.26,0.595,0.145,38290
8,10,just landed in south carolina - will be at the...,[maga],0.0,2018-06-25,51963,0.0,1.0,0.0,7955
9,11,major wall street journal opinion piece today ...,[wall street],-0.6908,2018-06-24,84691,0.231,0.769,0.0,25524


In [102]:
# Grouped by date 
grouped_date = merged_table.groupby(['Created']).agg({"Likes": "sum","Retweets": "sum","Compound":"mean",
                                                      "Negative":"mean","Neutral":"mean","Positive":"mean"}).apply(list).reset_index()
grouped_date

Unnamed: 0,Created,Likes,Retweets,Compound,Negative,Neutral,Positive
0,2018-06-23,276846,63462,0.155467,0.086667,0.646667,0.266667
1,2018-06-24,319553,85162,-0.234433,0.131,0.826333,0.042667
2,2018-06-25,258661,78551,0.375325,0.05775,0.74125,0.201
3,2018-06-26,344789,76548,0.218475,0.05875,0.79475,0.1465
4,2018-06-27,140860,30894,0.0786,0.1725,0.5475,0.28


In [58]:
filtered_trump_tweets.index

RangeIndex(start=0, stop=10, step=1)