In [1]:
import tweepy
from twitter_authentication import bearer_token
import time

In [2]:
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt

In [14]:
from tqdm import tqdm

In [3]:
client = tweepy.Client(bearer_token, wait_on_rate_limit=True)

In [4]:
df = pd.read_csv('polls.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

In [52]:
def get_tweets(q, start, end, maximum=10, count=1):
    i = 0
    objlist = []
    for response in tweepy.Paginator(client.search_all_tweets, 
                                     query = q,
                                     user_fields = ['username', 'public_metrics', 'description', 'location'],
                                     tweet_fields = ['created_at', 'geo', 'public_metrics', 'text'],
                                     expansions = 'author_id',
                                     start_time = start,
                                     end_time = end,
                                     max_results=maximum):
        if i == count:
            break
        i += 1
        time.sleep(1.5) # 1 search request per second limit
        objlist.append(response)
    return objlist

In [53]:
def csv_tweets(obj, directory, date):
    result = []
    user_dict = {}
    # Loop through each response object
    for response in obj:
        # Take all of the users, and put them into a dictionary of dictionaries with the info we want to keep
        for user in response.includes['users']:
            user_dict[user.id] = {'username': user.username, 
                                  'followers': user.public_metrics['followers_count'],
                                  'tweets': user.public_metrics['tweet_count'],
                                  'description': user.description,
                                  'location': user.location
                                 }
        for tweet in response.data:
            # For each tweet, find the author's information
            author_info = user_dict[tweet.author_id]
            # Put all of the information we want to keep in a single dictionary for each tweet
            result.append({# 'author_id': tweet.author_id, 
                           # 'username': author_info['username'],
                           'author_followers': author_info['followers'],
                           # 'author_tweets': author_info['tweets'],
                           # 'author_description': author_info['description'],
                           # 'author_location': author_info['location'],
                           'text': tweet.text,
                           # 'created_at': tweet.created_at,
                           'retweets': tweet.public_metrics['retweet_count'],
                           'replies': tweet.public_metrics['reply_count'],
                           'likes': tweet.public_metrics['like_count'],
                           # 'quote_count': tweet.public_metrics['quote_count']
                          })

    # Change this list of dictionaries into a dataframe
    tmp = pd.DataFrame(result)
    tmp.to_csv(f"tweets/{directory}/{date.strftime('%Y-%m-%d')}.csv", index=False)

In [54]:
def get_data(q, name, pdf=df, maximum=10, count=1):
    dates = [df.index[0] + datetime.timedelta(days=-1)] + list(df.index)
    for i in tqdm(range(len(dates)-1), desc='Downloading tweets', unit='Date'):
        start = dates[i]
        end = dates[i+1]
        obj = get_tweets(q, start.strftime('%Y-%m-%dT%H:%M:%SZ'), end.strftime('%Y-%m-%dT%H:%M:%SZ'), maximum, count)
        time.sleep(1.5)
        csv_tweets(obj, name, end)

In [55]:
get_data('Fetterman -is:retweet -RT lang:en', 'fetterman', maximum=500, count=10)

Downloading tweets:  47%|████████████████████████████                                | 43/92 [13:16<22:52, 28.00s/Date]Rate limit exceeded. Sleeping for 97 seconds.
Downloading tweets:  82%|████████████████████████████████████████████████▉           | 75/92 [27:53<05:33, 19.64s/Date]Rate limit exceeded. Sleeping for 126 seconds.
Downloading tweets: 100%|████████████████████████████████████████████████████████████| 92/92 [37:23<00:00, 24.39s/Date]


In [56]:
get_data('Mehmet Oz OR Dr. OZ -is:retweet -RT lang:en', 'oz', maximum=500, count=10)

Downloading tweets:  30%|██████████████████▎                                         | 28/92 [12:24<28:03, 26.31s/Date]Rate limit exceeded. Sleeping for 152 seconds.
Downloading tweets:  61%|████████████████████████████████████▌                       | 56/92 [27:24<16:21, 27.25s/Date]Rate limit exceeded. Sleeping for 146 seconds.
Downloading tweets:  91%|██████████████████████████████████████████████████████▊     | 84/92 [42:10<03:35, 26.91s/Date]Rate limit exceeded. Sleeping for 167 seconds.
Downloading tweets: 100%|████████████████████████████████████████████████████████████| 92/92 [48:29<00:00, 31.63s/Date]
