In [1]:
pip install -r requirements.txt

You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import yaml

with open("config.yml", 'r') as ymlfile:
    cfg = yaml.safe_load(ymlfile)


In [43]:
### SETUP ###

import tweepy as tw
import pandas as pd
import datetime

api_creds = cfg['api_creds']
API_KEY = api_creds['API_KEY']
API_SECRET = api_creds['API_SECRET']
ACCESS_TOKEN = api_creds['ACCESS_TOKEN']
ACCESS_SECRET = api_creds['ACCESS_SECRET']

class TwitterApi:
    def __init__(self):
        self.api = None
        self.raw_tweets = []
        self.clean_tweets_df = pd.DataFrame()
        
        self.authenticate_api()
        self.check_rate_limit()
        
    def exception_handler(func):
        def inner_function(*args, **kwargs):
            try:
                func(*args, **kwargs)
            except TypeError:
                print(f"{func.__name__} error")
        return inner_function
        
    @exception_handler
    def authenticate_api(self, api_key=API_KEY, api_secret=API_SECRET):
        auth = tw.OAuthHandler(API_KEY, API_SECRET)
        auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
        # authenticate
        self.api = tw.API(auth, wait_on_rate_limit=True)
    
    @exception_handler
    def check_rate_limit(self):
        try:
            data = self.api.rate_limit_status()
            return data
        except Exception as e:
            raise(e)
        
    def get_raw_tweets_from_api(
        self, 
        q, 
        tweet_mode,
        date_range, 
        lang, 
        count
    ):
        tweet_list = []
        try:
#             for d in date_range:
                for tweet in self.api.search(
                  q=q,
                  tweet_mode=tweet_mode,
                  lang=lang,
#                   since='2020-01-01',
                  until='2021-07-14',
                  count=count
                ):
                    tweet_list.append(tweet._json)
        except RuntimeError as e:
            raise(e)
        
        for tweet in tweet_list:
            self.raw_tweets.append(tweet)

    def get_date_range(self, start, end):
        if not end:
            end = pd.to_datetime(start) + pd.DateOffset(days=7)
        date_range = pd.date_range(start=start, end=end, freq="W").map(lambda t: t.strftime('%Y-%m-%d'))
        return date_range
    
    def get_clean_tweets(self):
        self.clean_tweets_df = pd.json_normalize(self.raw_tweets)
    
    def get_twitter_data_as_dataframe(
        self, 
        q, 
        tweet_mode,
        start, 
        end=None, 
        lang="en", 
        count=500, 
    ):
        date_range = self.get_date_range(start, end)
        self.get_raw_tweets_from_api(q, tweet_mode, date_range, lang, count)
        self.get_clean_tweets()
        return self.clean_tweets_df
    def get_historical_data(self):
        SEARCH_QUERY = "#globalwarming -filter:retweets"
        d = self.api.search_full_archive(
            environment_name="Hobbyist",
            query=SEARCH_QUERY,
            fromDate='2021-01-01',
            toDate='2021-06-01'
        )
    

In [45]:
SEARCH_QUERY = "#globalwarming -filter:retweets"
## If end is None then you will only get one week of data
START = "2021-01-01"
END = None
LANG = "en"
COUNT = 500
TWEET_MODE = 'extended'

api = TwitterApi()
tweet_df = api.get_historical_data()
# tweet_df = api.get_twitter_data_as_dataframe(q=SEARCH_QUERY, tweet_mode=TWEET_MODE, start=START, end=END, lang=LANG, count=COUNT)

TweepError: {'message': 'Forbidden: Authentication succeeded but account is not authorized to access this resource.', 'sent': '2021-07-11T17:20:07+00:00', 'transactionId': '034f7260e11ce432'}

In [42]:
tweet_df.shape

(100, 154)

In [29]:
tweet_df.head()