# 1. Loading Modules and API Keys

In [19]:
import os
from datetime import datetime
from matplotlib import pyplot as plt
import tweepy
import pandas as pd
from dotenv import load_dotenv
import geocoder
import seaborn as sns

In [20]:
load_dotenv()
consumer_key = os.environ['API_KEY']
consumer_secret = os.environ['API_KEY_SECRET']
access_token = os.environ['ACCESS_TOKEN']
access_token_secret = os.environ['ACCESS_TOKEN_SECRET']
bearer_token = os.environ['BEARER_TOKEN']

# 2. API Authorization

In [21]:
auth = tweepy.OAuth1UserHandler(
  consumer_key, 
  consumer_secret, 
  access_token, 
  access_token_secret
)

In [22]:
api = tweepy.API(auth, wait_on_rate_limit=True)

# 3. Defining Functions

## 3.1 Function for Location-specific trends

We will create a method that 
1. converts **Location** to **Long & Lat** using geocoder.osm()
2. get a response for the closest location in the form of WOEID using closet_trends()
3. get place-specific trends by providing WOEID to get_place_trends()
4. then, returns top 50 trending topics for that location.

In [23]:
def get_trends(api, loc):
    # Object that has location's latitude and longitude.
    g = geocoder.osm(loc)
    
    # we will retrieve the closest latitude and longitude and apply as parameters to API's closest_trends() method.
    closest_loc = api.closest_trends(g.lat, g.lng)
    
    # Once we get the closest_lo
    trends = api.get_place_trends(closest_loc[0]["woeid"])
    return trends[0]["trends"]

## 3.2 Functions to Find Locations with Currently Available Trends
Optional section.

In [24]:
def get_locations():
    countries = []
    available_trends = api.available_trends()
    country_data = pd.DataFrame.from_records(available_trends)
    
    #The response contains unnecessary info such as parentid, url and 'Town' from placeType column, to be removed or filtered.
    country_data = country_data.drop(columns = ['parentid'])
    #towns = country_data['placeType'] == {'code': 7, 'name': 'Town'}
    #country_data = country_data[~towns]
    return country_data

#country_location_data = get_locations()
#country_location_data['country'].unique()

## 3.3 Function to find location-specific trends

In [25]:
def get_location_specific_trends(locations):
    trends_in_countries = []
    for location in locations:
        trend = get_trends(api, location)
        trend_data = pd.DataFrame.from_records(trend)
        trend_data['country'] = location

        # remove the rows if the tweet volume is not available
        trend_data = trend_data[~trend_data['tweet_volume'].isna()]

        # remove the promoted_content column as it serves no purpose
        trend_data = trend_data.drop(columns = ['promoted_content'])

        #remove any duplicates in the 'name' column since the location contents are more than one.
        trend_data = trend_data.drop_duplicates(subset=['name'], keep='first')

        # sort records by tweet_volume
        trend_data = trend_data.sort_values('tweet_volume', ascending = False)
        
        trend_data = trend_data[['country','name','tweet_volume','url']]
        trend_data.columns = ['country','trending_topic','tweet_volume','url']
        trends_in_countries.append(trend_data)
    return trends_in_countries

## 3.4 Functions to extract all hashtags and phrases trending.

In [26]:
def extract_hashtags(trend_list):
    hashtags = []
    for name in trend_list['trending_topic']:
        if name not in hashtags:
            if '#' in name:
                hashtags.append(name)
    return hashtags

def extract_phrases(trend_list):
    phrases = []
    for name in trend_list['trending_topic']:
        if name not in phrases:
            if '#' not in name:
                phrases.append(name)
    return phrases
#popular_hashtags = extract_hashtags(merged_df)
#popular_hashtags

## 3.5 Function to export all records to CSV format
(Optional cell below, uncomment the line and run the function to export the data)

In [27]:
def export2csv(trends_df):
    trends_df.to_csv('export.csv')

## 3.6 Function to find Top Tweets in a trend
Find Top 50 tweets that are contributing to a trend. (50 is default number of tweets allowed by API)

In [28]:
def trendsetters(keywords, limit, until_date):
    tweet_list = []
    for keyword in keywords:
        tweets = tweepy.Cursor(api.search_tweets,
                               q=keyword + "-is:retweet",
                               lang='en',
                               tweet_mode='extended',
                               result_type='popular',
                               until=until_date).items(limit)
        for tweet in tweets:
            tweet_data = [keyword,
                          tweet.user.screen_name,
                          tweet.created_at,
                          tweet.full_text, 
                          tweet.favorite_count, 
                          tweet.retweet_count
                         ]
            tweet_list.append(tweet_data)
    trending_tweets_df = pd.DataFrame(tweet_list, columns = ['topic',
                                                             'user',
                                                             'created_at',
                                                             'content',
                                                             'favorites',
                                                             'retweets'])
    #Show only date, not time.
    trending_tweets_df["created_at"] = pd.to_datetime(trending_tweets_df["created_at"]).dt.date
    trending_tweets_df = trending_tweets_df.sort_values('favorites', ascending = False)    
    return trending_tweets_df

# trendsetters(query, limit, until_date)

## 3.7 Function to retrieve tweets from a user
Get as many tweets from a user's timeline

In [29]:
"""
Use while True to keep extracting 200 records at a time
After one extraction, use the ID earliest created record,
and start the extraction from the previous one for another 200 records.
"""

#Start recording from the latest tweet
def loop_tweet_extraction(username):
    tweets = api.user_timeline(screen_name=username, 
                               # 200 is the maximum allowed count
                               count=3000,
                               include_rts = False,
                               tweet_mode = 'extended',
                               exclude_replies = True
                               )
    all_tweets = []
    all_tweets.extend(tweets)
    oldest_id = tweets[-1].id
    while True:
        tweets = api.user_timeline(screen_name=username, 
                               # 200 is the maximum allowed count
                               count=200,
                               include_rts = False,
                               max_id = oldest_id - 1,
                               # Necessary to keep full_text 
                               # otherwise only the first 140 words are extracted
                               tweet_mode = 'extended'
                               )
        if len(tweets) == 0:
            break
        oldest_id = tweets[-1].id
        all_tweets.extend(tweets)
        print('N of tweets downloaded till now {}'.format(len(all_tweets)))
    return all_tweets

# 4. ANALYSIS CELLS.

###### A. Get trending topics by selected regions.

In [30]:
worldwide = ['Worldwide']
asean = ["Myanmar", "Thailand","Singapore", "Malaysia", "Philippines", "Vietnam", "Indonesia", "Laos", "Cambodia", "Brunei"]
south_america = ['Brazil','Colombia','Argentina','Peru','Venezuela','Chile', 'Ecuador','Bolivia','Paraguay','Uruguay', 'Guyana','Suriname','French Guiana','Falkland Islands']

# Select the location here inside the array
select_country = ['United States']
selected_locations = select_country

# Select number of results to view. Max is 50.
total_results = 5

trends = get_location_specific_trends(selected_locations)
# Merging dataframes from all locations selected
trending_now = pd.concat(trends)
trending_now.sample(total_results)


Unnamed: 0,country,trending_topic,tweet_volume,url
29,United States,Soludo,192708.0,http://twitter.com/search?q=Soludo
11,United States,#WWERaw,54950.0,http://twitter.com/search?q=%23WWERaw
35,United States,Mike Pence,103185.0,http://twitter.com/search?q=%22Mike+Pence%22
9,United States,#WASvsPHI,26723.0,http://twitter.com/search?q=%23WASvsPHI
12,United States,Liz Cheney,91463.0,http://twitter.com/search?q=%22Liz+Cheney%22


###### B. Get the hashtags available

In [31]:
hashtags = extract_hashtags(trending_now)
phrases  = extract_phrases(trending_now)
print(hashtags)
print(phrases)

['#WWERaw', '#WASvsPHI', '#HTTC']
['Arizona', 'Kari Lake', 'Katie Hobbs', 'Eagles', 'Soludo', 'YOU LOST', 'Mike Pence', 'Liz Cheney', 'Pete Davidson', 'Ticketmaster', 'Graham', 'Terry', 'David Hundeyin', 'Slay', '8 Billion', 'McCain', 'Philly', 'Yale', 'Jalen', 'Layla', 'Heinicke', 'Klay', 'Hannity', 'Gannon']


###### C. Pick a trending topic or list of topics and see the top tweets

In [32]:
# Pick trending topic(s) by using hashtags (or) phrases variables or manually specifying it in an array
query = ['Twitter Blue']

# Incrase the limit to get more data
limit = 100

# Pick a date as latest available.
until_date = "2022-11-15"

# Select number of results to view. Max is 50. 
total_tweets_in_trend = 10

trendsetter_tweets = trendsetters(query, limit, until_date)
trendsetter_tweets = trendsetter_tweets.sort_values('favorites', ascending = False)

trendsetter_tweets.head(total_tweets_in_trend)

Unnamed: 0,topic,user,created_at,content,favorites,retweets
20,Twitter Blue,rafaelshimunov,2022-11-11,Did Twitter Blue tweet just cost Eli Lilly $LL...,419695,63634
26,Twitter Blue,elonmusk,2022-11-06,"Previously, we issued a warning before suspens...",164469,12651
22,Twitter Blue,litcapital,2022-11-11,Twitter Blue erased a few billion in market ca...,119101,10633
23,Twitter Blue,ZoeSchiffer,2022-11-11,NEW: Twitter has suspended the launch of Twitt...,82453,9998
24,Twitter Blue,MikeSington,2022-11-11,The chaos continues. Twitter pulls the plug on...,77923,12203
9,Twitter Blue,LilyPichu,2022-11-13,I said I’d dye my hair blue if DRX won 😊 it’s ...,56949,1017
27,Twitter Blue,MaryLTrump,2022-11-13,"Given historical precedent, conventional wisdo...",29339,5893
0,Twitter Blue,MuellerSheWrote,2022-11-14,Getting to 51 is important because we won’t ha...,12533,3057
42,Twitter Blue,InTheLittleWood,2022-11-06,You not gonna see me apologizing for buying Tw...,8107,120
4,Twitter Blue,gawrgura,2022-11-14,funny blue hedgehog does backflip (i made him ...,7528,886


In [33]:
# Export the data if you want.
#export2csv(trendsetter_tweets)

###### D. Pick a user to get retrieve tweets from their timeline as many as possible.
(Disclaimer - Must not be used for any malicious intent)

In [34]:
username = 'david_perell'
user_tweets = loop_tweet_extraction(username)

outtweets = [[tweet.id_str, 
              tweet.user.id_str, 
              tweet.user.name, 
              tweet.created_at,
              tweet.favorite_count,
              tweet.retweet_count,
              tweet.full_text.encode("utf-8").decode("utf-8")] 
             for idx,tweet in enumerate(user_tweets)]

tweets_df = pd.DataFrame (outtweets, columns = ["tweet_id", "user_id","user_name","created_at", "favorite_count","retweet_count","tweet"])

N of tweets downloaded till now 203
N of tweets downloaded till now 379
N of tweets downloaded till now 546
N of tweets downloaded till now 720
N of tweets downloaded till now 892
N of tweets downloaded till now 1072
N of tweets downloaded till now 1256
N of tweets downloaded till now 1429
N of tweets downloaded till now 1603
N of tweets downloaded till now 1779
N of tweets downloaded till now 1962
N of tweets downloaded till now 2141
N of tweets downloaded till now 2337
N of tweets downloaded till now 2526
N of tweets downloaded till now 2717
N of tweets downloaded till now 2780


In [35]:
tweets_df_final = tweets_df[['user_name','created_at','tweet','favorite_count','retweet_count']]
tweets_df_final.head()

Unnamed: 0,user_name,created_at,tweet,favorite_count,retweet_count
0,David Perell,2022-11-14 04:08:17+00:00,The most common mistake that beginner writers ...,367,21
1,David Perell,2022-11-14 04:05:21+00:00,If I could send every student to one writing c...,3400,465
2,David Perell,2022-11-14 03:48:16+00:00,You have to think clearly in order to write cl...,235,25
3,David Perell,2022-11-14 03:37:44+00:00,Learning science has advanced a bunch over the...,114,12
4,David Perell,2022-11-14 03:18:47+00:00,People look at these surveys and say: “The Lib...,155,7


# 5. To-be-continued - Brand Reputation Monitoring and Analysis

### Brand Mentions

### Sentiment Analysis