### Import packages & setup

In [3]:
import datetime
import tweepy
from tweepy import OAuthHandler
import json
import pandas as pd
import csv
import os
import time

In [4]:
# ACADEMIC ACCOUNT. Set connection to Tweepy. I've put my API keys in a .py file called API_keys.py
from my_api_keys import bearer_token
tweepyclient = tweepy.Client(bearer_token, wait_on_rate_limit=True)

In [28]:
def scrapetweets(query, start_time, end_time, num_tweets):
    
    tweet_list = []
    
    # Grabs the time when we start making requests to the API
    func_start = time.time()

    while True:    
        for response in tweepy.Paginator(tweepyclient.search_all_tweets, 
                                         query = query,
                                         user_fields = ['username', 'public_metrics', 'description', 'location'],
                                         tweet_fields = ['created_at', 'geo', 'public_metrics', 'text'],
                                         expansions = 'author_id',
                                         start_time = start_time,
                                         end_time = end_time,
                                         max_results=500):
            tweet_list.append(response)

        # Flag to quit when we need to, but it's not working correctly at this point
        if len(tweet_list) <= num_tweets :   
            continue
        else:
            break
    
    # Grab how long it took to scrape all the tweets
    func_end = time.time()
    print('Scraping time was {} minutes.'.format(round(func_end - func_start)/60, 2))
    
    result = []
    user_dict = {}
    # Loop through each response object
    for response in tweet_list:
        # Take all of the users, and put them into a dictionary of dictionaries with the info we want to keep
        for user in response.includes['users']:
            user_dict[user.id] = {'username': user.username, 
                                  'followers': user.public_metrics['followers_count'],
                                  'tweets': user.public_metrics['tweet_count'],
                                  'description': user.description,
                                  'location': user.location
                                 }
        for tweet in response.data:
            # For each tweet, find the author's information
            author_info = user_dict[tweet.author_id]
            # Put all of the information we want to keep in a single dictionary for each tweet
            result.append({'user_id': tweet.author_id, 
                           'username': author_info['username'],
                           'follower_count': author_info['followers'],
                           'total_tweets': author_info['tweets'],
                           'description': author_info['description'],
                           'location': author_info['location'],
                           'tweet_id' : tweet.id,
                           'text': tweet.text,
                           'created_at': tweet.created_at,
                           'retweets_count': tweet.public_metrics['retweet_count'],
                           'replies_count': tweet.public_metrics['reply_count'],
                           'likes_count': tweet.public_metrics['like_count'],
                           'quote_count': tweet.public_metrics['quote_count']
                          })

    # Change this list of dictionaries into a dataframe
    df = pd.DataFrame(result)
    
    # Let's see how long it took to grab all tweets and how many were pulled
    print('Scraped {} tweets'.format(len(df)))

    return df



In [29]:
start_time = "2022-11-01T00:00:00Z"
end_time = "2022-11-02T00:00:00Z"
query = '#trailrunning OR #running OR #run OR #trail OR #trailrunner OR #trailrun -is:retweet lang:en'
num_tweets=100

scrapetweetstest(query, start_time, end_time, num_tweets)

Scraping time was 2.05 minutes.
Scraped 49180 tweets


Unnamed: 0,user_id,username,follower_count,total_tweets,description,location,tweet_id,text,created_at,retweets_count,replies_count,likes_count,quote_count
0,38011168,Planet_Chad,3010,66284,Fine Art Photographer and birder looking to te...,"Raleigh, NC",1587595277837701120,RT @DebbieOppermann: Colourful fall trail thro...,2022-11-01 23:59:44+00:00,46,0,0,0
1,165834331,sak53,1051,47930,"憂いのある曲、埃っぽい曲、地味ながら味わいのある曲が好きです / Indie Rock, A...",,1587595031447498753,朝ラン完了\n朝からど快晴だと清々しいですね、今日も一日何とか乗り切ろう〜\n#run #n...,2022-11-01 23:58:46+00:00,0,1,25,0
2,3695474660,touchdownbot,3914,150656,All for the love of Touchdowns!,United States,1587594936081334272,RT @AkbarGamingTV: They be disappointed 🤣 #mad...,2022-11-01 23:58:23+00:00,1,0,0,0
3,1437066104019595266,WCash44,7,106,"Husband, Father, Coach, Mentor, and gym owner ...","Staunton, VA",1587594838371106816,@CaydenCash2 highlights vs. Staunton Storm(Wee...,2022-11-01 23:58:00+00:00,0,0,0,0
4,4421129835,GiselaFigueroa5,87,18058,,,1587594710600015873,RT @Elcalvariorunn1: Tips para calentar ante d...,2022-11-01 23:57:29+00:00,4,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49175,1432239786283057158,Else76247763,459,4095,Salam kenal,,1587233822617505792,RT @9inbirth: #Badass #full #term #mama #runni...,2022-11-01 00:03:27+00:00,10,0,0,0
49176,3917981907,myfoodfantasy69,4340,545157,#FrizeMedia Helps Amplify Online Presence Of B...,"Manchester, England",1587233820436201476,RT @Charlesfrize: #Running - Benefits Of A Goo...,2022-11-01 00:03:26+00:00,1,0,0,0
49177,997748161551785985,IamAbot94,32,3136,Visit https://t.co/aqLNYmB7mS to search a tale...,Fiverr,1587233573521629184,Work Out1 | Fitness planner | Minimalist plann...,2022-11-01 00:02:27+00:00,0,0,0,0
49178,1401870661241511938,7motivation_24,165,12062,"Delivering #motivational #quotes, every hour o...",,1587233205865832450,Always remember that you are lavishly #wrathfu...,2022-11-01 00:01:00+00:00,0,0,0,0


In [30]:
df.head()

NameError: name 'df' is not defined

## Ignore for now: testing adding followers & following to df

In [None]:
def get_followers_id(person):
    followersid = []
    count=0
    user=api.get_user(screen_name=person)
    user_id=user.id
    number_of_followers=user.followers_count
    status = tweepy.Cursor(api.get_follower_ids, screen_name=person, tweet_mode="extended").items()
    for i in range(0,number_of_followers):
        follower=next(status)
        followersid.append(follower)
        count += 1
    return followersid

In [None]:
get_followers_id("37chandler")
