In [52]:
pip install pyyaml

Collecting pyyaml
  Downloading PyYAML-5.4.1-cp39-cp39-macosx_10_9_x86_64.whl (259 kB)
[K     |████████████████████████████████| 259 kB 8.4 MB/s eta 0:00:01
[?25hInstalling collected packages: pyyaml
Successfully installed pyyaml-5.4.1
Note: you may need to restart the kernel to use updated packages.


In [21]:
pip install tweepy==3.10.0

Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [9]:
import yaml

with open("config.yml", 'r') as ymlfile:
    cfg = yaml.safe_load(ymlfile)


In [10]:
### SETUP ###

import tweepy as tw
import pandas as pd

api_creds = cfg['api_creds']
API_KEY = api_creds['API_KEY']
API_SECRET = api_creds['API_SECRET']
ACCESS_TOKEN = api_creds['ACCESS_TOKEN']
ACCESS_SECRET = api_creds['ACCESS_SECRET']

class TwitterApi:
    def __init__(self):
        self.api = None
        self.raw_tweets = []
        self.clean_tweets = []
        
        self.authenticate_api()
        self.check_rate_limit()
        
    def exception_handler(func):
        def inner_function(*args, **kwargs):
            try:
                func(*args, **kwargs)
            except TypeError:
                print(f"{func.__name__} error")
        return inner_function
        
    @exception_handler
    def authenticate_api(self, api_key=API_KEY, api_secret=API_SECRET):
        auth = tw.OAuthHandler(API_KEY, API_SECRET)
        auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
        # authenticate
        self.api = tw.API(auth, wait_on_rate_limit=True)
    
    @exception_handler
    def check_rate_limit(self):
        try:
            data = self.api.rate_limit_status()
            return data
        except Exception as e:
            raise(e)

    def save_data(self, tweets_list, method):
        if method == 'append':
            self.raw_tweets.append(tweets_list)
        else:
            self.raw_tweets = tweets_list
        
    def get_raw_tweets_from_api(
        self, 
        q, 
        since, 
        until=None, 
        lang="en", 
        count=500, 
        method='append'
    ):
        tweets_list = []
        try:
            for tweet in self.api.search(
                      q=q,
                      lang=lang,
                      since=since, 
                      until=until,
                      count=count
            ):
                tweets_list.append(tweet._json)
        except RuntimeError as e:
            raise(e)   
        
        if len(tweets_list):
            self.save_data(tweets_list, method)
        else:
            print('get_raw_tweets_from_api, no tweets!')

    
    def get_clean_tweets(self):
        self.clean_tweets_df = pd.json_normalize(self.raw_tweets)
    
    def get_twitter_data_as_dataframe(
        self, 
        q, 
        since, 
        until=None, 
        lang="en", 
        count=500, 
        method='append'
    ):
        self.get_raw_tweets_from_api(q, since, until, lang, count, method)
        self.get_clean_tweets()
        return self.clean_tweets_df
    

In [11]:
SEARCH_QUERY = "#globalwarming -filter:retweets"
## keep these within a year or we will break twitter
SINCE = "2021-01-01"
METHOD = 'overwrite'
LANG = "en"
COUNT = 500

api = TwitterApi()
tweet_df = api.get_twitter_data_as_dataframe(q=SEARCH_QUERY, since=SINCE, lang=LANG, method=METHOD, count=COUNT)

In [12]:
tweet_df

Unnamed: 0,created_at,id,id_str,text,truncated,source,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,in_reply_to_user_id_str,...,quoted_status.coordinates,quoted_status.place,quoted_status.contributors,quoted_status.is_quote_status,quoted_status.retweet_count,quoted_status.favorite_count,quoted_status.favorited,quoted_status.retweeted,quoted_status.possibly_sensitive,quoted_status.lang
0,Sat Jun 05 17:00:27 +0000 2021,1401222398104440835,1401222398104440835,UK #WEATHER: Beachgoers bask in sunshine as te...,False,"<a href=""http://gridpointweather.com"" rel=""nof...",,,,,...,,,,,,,,,,
1,Sat Jun 05 17:00:07 +0000 2021,1401222313824161798,1401222313824161798,UK #WEATHER: Beachgoers bask in sunshine as te...,False,"<a href=""http://www.weather-globe.com"" rel=""no...",,,,,...,,,,,,,,,,
2,Sat Jun 05 16:59:56 +0000 2021,1401222267846246403,1401222267846246403,Happy #WorldEnvironmentDay! \n🗣 to the #youths...,True,"<a href=""http://twitter.com/download/iphone"" r...",,,,,...,,,,,,,,,,
3,Sat Jun 05 16:56:11 +0000 2021,1401221323695693826,1401221323695693826,Righties nailed it. Look at all this global co...,False,"<a href=""https://mobile.twitter.com"" rel=""nofo...",,,,,...,,,,,,,,,,
4,Sat Jun 05 16:43:45 +0000 2021,1401218193591443457,1401218193591443457,#WorldEnvironmentDay \nPlant a tree\nGift a gr...,True,"<a href=""http://twitter.com/download/android"" ...",,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Sat Jun 05 12:36:19 +0000 2021,1401155924077785092,1401155924077785092,"‘Sea snot’ 30 m deep covers Turkish coast, thr...",True,"<a href=""http://twitter.com/#!/download/ipad"" ...",,,,,...,,,,,,,,,,
96,Sat Jun 05 12:31:10 +0000 2021,1401154630638317578,1401154630638317578,Every day is #WorldEnvironmentDay for us\nRead...,True,"<a href=""https://ifttt.com"" rel=""nofollow"">IFT...",,,,,...,,,,,,,,,,
97,Sat Jun 05 12:31:07 +0000 2021,1401154615018721283,1401154615018721283,Every day is #WorldEnvironmentDay for us\nhttp...,True,"<a href=""https://ifttt.com"" rel=""nofollow"">IFT...",,,,,...,,,,,,,,,,
98,Sat Jun 05 12:30:35 +0000 2021,1401154484361781249,1401154484361781249,"@JamesTate121 1. DEFINITELY HAPPENING (June 3,...",False,"<a href=""https://mobile.twitter.com"" rel=""nofo...",1.400538e+18,1400537929060081665,1.356786e+18,1356786368656138241,...,,,,,,,,,,
