## Dataset Share 1

**By:** _Peyten Boutwell_ 

In [1]:
import datetime
import tweepy

# I've put my API keys in a .py file called API_keys.py
from API_key import api_key, api_key_secret, access_token, access_token_secret

In [2]:
# Authenticate the Tweepy API
auth = tweepy.OAuthHandler(api_key,api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)

# Follower IDs

In [7]:
# I'm putting the handles in a list to iterate through below
football_handles = ['oregonfootball', 'BeaverFootball']

# This will iterate through each Twitter handle that we're collecting from
for screen_name in football_handles:
    
    # Tells Tweepy we want information on the handle we're collecting from
    # The next line specifies which information we want, which in this case is the number of followers 
    user = api.get_user(screen_name) 
    followers_count = user.followers_count

    # Let's see roughly how long it will take to grab all the follower IDs. 
    print(f'''
    @{screen_name} has {followers_count} followers. 
    That will take roughly {followers_count/(5000*60):.0f} hours and {followers_count/(5000):.2f} minutes
    ''')


    @oregonfootball has 278437 followers. 
    That will take roughly 1 hours and 55.69 minutes
    

    @BeaverFootball has 78636 followers. 
    That will take roughly 0 hours and 15.73 minutes
    


In [8]:
# This creates a dictionary containing a list for each Twitter handle we'll be grabbing follower IDs from
id_dict = {'oregonfootball' : [], 
           'BeaverFootball' : []}

# Grabs the time when we start making requests to the API
start_time = datetime.datetime.now()

# .keys() allows us to iterate through each key in the dictionary
for handle in id_dict.keys():
    
    #we must iterate through each of the pages in order to get all follower IDs
    # To grab the follower IDs, we will be using followers_ids
    for page in tweepy.Cursor(api.followers_ids,
                              # This is how we will get around the issue of not being able to grab all ids at once
                              # Once the rate limit is hit, we will be notified that we must wait 15 mins (900 secs)
                              wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True,
                              screen_name=handle).pages():
         
        # The page variable comes back as a list, so we have to use .extend rather than .append
        id_dict[handle].extend(page)

# Let's see how long it took to grab all follower IDs
end_time = datetime.datetime.now()
elapsed_time = end_time - start_time
print(elapsed_time)

Rate limit reached. Sleeping for: 893
Rate limit reached. Sleeping for: 893
Rate limit reached. Sleeping for: 894
Rate limit reached. Sleeping for: 892


1:00:26.209446


## Grab descriptions based on the followers IDs

In [None]:
users_dict = {'oregonfootball' : [], 'BeaverFootball' : []}

for handle in id_dict:
    start=0 #we have feed the API 100 ID's at a time, this will iterate through them
    end=0
    followers=[]
    while end-1 <= len(id_dict[handle]): #quit when we get past the end of our list
        end += 100 #update the end of our slice
        if end <= len(id_dict[handle]): #split into if else statement to slice correctly
            try:
                
                followers_temp = api.lookup_users(id_dict[handle][start:end])
            except tweepy.TweepError as err: 
                if err.code == 103: #if we get a rate limit error, go to sleep
                    print('sleeping, 900 seconds')
                    time.sleep(900)
        else:
            try:
                followers_temp = api.lookup_users(id_dict[handle][start:])
            except tweepy.TweepError as err:
                if err.code == 103:
                    print('sleeping, 900 seconds')
                    time.sleep(900)
        followers.extend(followers_temp)
        start = end #update our starting slice index for next loop
    users_dict[handle].extend(followers)

In [None]:
headers = ['screen_name', 'name', 'location', 'followers_count', 'friends_count', 'description']

for handle in users_dict.keys():
    
    # Descriptions with emoji or non-Roman letters can cause trouble. Encoding your .txt file in utf-8 will help
    with open(f'{handle}_followers.txt','w', encoding='utf-8') as out_file:
        out_file.write('\t'.join(headers) + '\n')

        for idx, user in enumerate(users_dict[handle]):
                   
            
            outline = [user.screen_name, user.name, user.location, user.followers_count, user.friends_count, 
                           user.description.replace("\n"," ")]
                
            out_file.write('\t'.join([str(item) for item in outline]) + '\n')   