Script for downloading likes:

Endptt 1 - Download most recent 100 liking users given a tweet id

    - 75 requests per 15-minute

Endpt 2 - Download a user's liked tweets. 

    - let's restrict this to most recent 1000 likes
    - 75 requests per 15-minute
    - goes towards tweet cap of 500,000 per month
    
    
Approach:

    1) For each labelled account, GET most recent 100 tweets that are not retweets or replies
    - api.get_users_tweets(twitter_id, exclude=['retweets', 'replies'], max_results=100])
    - 900/15 mins
    
    2) Get neighbor nodes:
    - For each tweet returned, get users who liked it
        - api.get_liking_users(tweet_id,  user_fields=['public_metrics'])
        - 75 / 15 mins
        - returns most recent 100 users who liked the tweet
    - For this user, get liked tweets:
        - api.get_liked_tweets(twitter_id, max_results=100, expansions='author_id', user_fields=['public_metrics'])
        - 75 / 15 mins
    
    

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import json
import requests

import tweepy
from tweepy import Client

# key = 'B81Sj4ifYRstLZGMyMDM0roG3'
# secret = 'c1Rg4vdyl2mjMEC7WKDsVLTz2IlqTHy1yezn71Y9cw3LgZaxAW'
# token = 'AAAAAAAAAAAAAAAAAAAAAD6eWQEAAAAABtT80i7lkSemhYsuOIjy4O4xLQs%3DkfsByhRoXQ9odyT51sECR1vXxOLKxdtOON2Ij4OODdAt8jTcGY'

key='CSh1kvK1gpxzyhl8GYuMa0RIv'
secret='yfgNtKNVeqdjqktu9lR4eK0oEt68KjXhVV8jJOJgvqrvpFbL9N'
token='AAAAAAAAAAAAAAAAAAAAAOCeVAEAAAAABD8nomZEvbNVV3p7dFOVkhm5Kv8%3DritcqLa43rGxvmEUXtuSVp6fwDKsONk6hbgPVGyjmqI72ueB9W'


api = tweepy.Client(bearer_token=token, consumer_key=key, consumer_secret=secret, 
              return_type=requests.Response,
              wait_on_rate_limit=True)

In [2]:
combined = pd.read_pickle('../node_and_degree_list.pkl')
thresh = 1000
# Drop accounts that exceed threshold
small = combined.loc[combined.followers_count < thresh]
small = small.loc[small.followers_count < thresh]

In [3]:
# DEFINITION - return list of tweet ids for 50 most recent tweets
def get_tweet_list(twitter_id):
    r = api.get_users_tweets(twitter_id, exclude=['retweets', 'replies'], max_results=50)
    tweet_ids = []
    
    try:
        for i in r.json()['data']:
            tweet_ids.append(i['id'])
    except:
        return []
    
    return tweet_ids

In [4]:
# DEFINITION - return pandas df of users who liked input list of tweets + follow counts
def get_df_liking_users(og_id, tweet_ids, cols):
    if len(tweet_ids) == 0:
        print('No tweets')
        return pd.DataFrame(columns=cols)
    for tweet_id in tweet_ids:
        try:
            r = api.get_liking_users(tweet_id, user_fields=['public_metrics'])

            df = pd.DataFrame(columns=cols)

            if r.json()['meta']['result_count'] == 0:
                return df
        except:
            return pd.DataFrame(columns=cols)
        for user in r.json()['data']:
            uid = user['id']
            followers = user['public_metrics']['followers_count']
            following = user['public_metrics']['following_count']

            df = df.append({
                cols[0]: uid,
                cols[1]: og_id,
                cols[2]: following,
                cols[3]: followers
            }, ignore_index=True)
            
    return df

In [5]:
# DEFINITION - takes list of liked twitter_ids and formats into df
def make_df_from_user_list(og_id, user_list, cols):
    df = pd.DataFrame(columns=cols)
    if len(user_list) == 0:
        print('No likes')
        return df
    
    for uid in user_list:
        df = df.append({
                    cols[0]: uid,
                    cols[1]: og_id,
                    cols[2]: np.nan,
                    cols[3]: np.nan
                }, ignore_index=True)
    return df

In [6]:
# DEFINITION - return df of authors of last 50 tweets this user has liked
def get_df_liked_users(twitter_id, cols):
    r = api.get_liked_tweets(twitter_id, max_results=50, expansions='author_id')

    df = pd.DataFrame(columns=cols)
    try:
        if r.json()['meta']['result_count'] == 0:
                return df

        # Get twitter_id of author
        liked_users = []
        for tweet in r.json()['data']:
            liked_users.append(tweet['author_id'])

        df = make_df_from_user_list(twitter_id, liked_users, cols)
    
    except:
        pass
        
    return df

In [18]:
from collections import deque

def recursion(visited, neighbors, edgelist, distance_from_root):
    if len(neighbors) != len(distance_from_root):
        print('Queue lengths not equal')
        return edgelist
    
    if not neighbors:
        return edgelist
    
    # If in visited, skip
    d = distance_from_root.popleft()
    twitter_id = neighbors.popleft()
    
    print(f'Distance: {d}')
    
    if ((twitter_id in visited) & (d == 0))| (d > 1):
        return edgelist
    
    # Else add to visited
    visited.add(twitter_id)
    
    # Get edgelist for this node
    tweet_ids = get_tweet_list(twitter_id)
    liking_df = get_df_liking_users(twitter_id, tweet_ids, cols)
    liked_df = get_df_liked_users(twitter_id, cols)
    node_edgelist = pd.concat([liked_df, liking_df])
    
    # Add this node edgelist to network edgelist
    edgelist = pd.concat([edgelist, node_edgelist])
    
    # Add neighbors to queue if less than 2 edges away from start
    if d+1 <= 1:
        n = node_edgelist.originating_id.append(node_edgelist.receiving_id)
        print(f'Num neighbors adding: {len(n)}')
        for uid in n:
            if uid not in visited:
                visited.add(uid)
                neighbors.append(uid)
                distance_from_root.append(d+1)
    
    edgelist = recursion(visited, neighbors, edgelist, distance_from_root)
    
    return edgelist

In [8]:
small_bots = small.loc[small.label == 'bot']

In [21]:
# already_processed = []

[4307623643,
 844779426260791296,
 837316817324556289,
 913028718377005056,
 790017240733278208,
 1016773517587537920,
 2855732920,
 1416121214,
 821215184660393986,
 1004764640486854656,
 352132252,
 1661404440,
 2928556470,
 2901953015,
 2491508162,
 490645569,
 300415996,
 709807650,
 976973737,
 2385417602,
 4808016715,
 1433472060,
 449470627]

In [None]:
%%time

cols = ['originating_id', 'receiving_id', 'originating_following_count', 'originating_follower_count']
for curr_id in small_bots.twitter_id:
    if curr_id in already_processed:
        continue
        
    edgelist = pd.DataFrame(columns=cols)

    neighbors = deque()
    distance_from_root = deque()
    neighbors.append(curr_id)
    distance_from_root.append(0)

    visited= set()
    # # While there are unvisited neighbors
    while len(neighbors) != 0:
        print(f'Num unvisited neighbors: {len(neighbors)}')
        edgelist = recursion(visited, neighbors, edgelist, distance_from_root)

    edgelist.to_pickle('bot/likes_bot_' + str(curr_id))
    already_processed.append(curr_id)

Num unvisited neighbors: 1
Distance: 0
No tweets
Num neighbors adding: 0
Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 22
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Num unvisited neighbors: 1
Distance: 0
No tweets
Num neighbors adding: 0
Num unvisited neighbors: 1
Distance: 0
No tweets
Num neighbors adding: 0
Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 100
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 874 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 872 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 880 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 880 seconds.


Distance: 1
Distance: 1
Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 100
Distance: 1
Distance: 1
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
Distance: 1
Distance: 1
No tweets
Distance: 1
Distance: 1
No tweets
Distance: 1
No tweets
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Num unvisit

Rate limit exceeded. Sleeping for 852 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 874 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 881 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 871 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 872 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 874 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 869 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 880 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 875 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 880 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 880 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 882 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 874 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 879 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 876 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 882 seconds.


Distance: 1
Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 30
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 879 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 878 seconds.


Distance: 1
No tweets
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 884 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 886 seconds.


Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 64
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 877 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1
Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 100
Distance: 1


Rate limit exceeded. Sleeping for 872 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 869 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 867 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 869 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 872 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 875 seconds.


Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 100
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 859 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 861 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 862 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 864 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 865 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 874 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 881 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 880 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 872 seconds.


Distance: 1
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 882 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 873 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 871 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 877 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 870 seconds.


Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 2
Distance: 1
Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 60
Distance: 1


Rate limit exceeded. Sleeping for 871 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 874 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 879 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 882 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 879 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 860 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 856 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 867 seconds.


Num unvisited neighbors: 1
Distance: 0
Num neighbors adding: 100
Distance: 1
Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 861 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 862 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 867 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 870 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 855 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 860 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 864 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 866 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 860 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 858 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 852 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 848 seconds.


Distance: 1
Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 863 seconds.


Distance: 1


Rate limit exceeded. Sleeping for 865 seconds.


Distance: 1
Distance: 1


Rate limit exceeded. Sleeping for 860 seconds.


In [14]:
while len(neighbors) != 0:
        print(f'Num unvisited neighbors: {len(neighbors)}')
        edgelist = recursion(visited, neighbors, edgelist, distance_from_root)

edgelist.to_pickle('bot/likes_bot_' + str(curr_id))
already_processed.append(curr_id)

In [189]:
small

Unnamed: 0,twitter_id,followers_count,following_count,label
24,4307623643,59,4,bot
36,844779426260791296,768,1,bot
83,837316817324556289,182,6,bot
137,913028718377005056,87,147,bot
175,790017240733278208,19,47,bot
...,...,...,...,...
2201,716626267,404,1,human
2214,770774730190098433,106,11,human
2236,849258957768192004,212,5,human
2242,456361810,66,187,human


In [255]:
# Get edgelist for this node
tweet_ids = get_tweet_list(twitter_id)
liking_df = get_df_liking_users(twitter_id, tweet_ids, cols)
liked_df = get_df_liked_users(twitter_id, cols)
node_edgelist = pd.concat([liked_df, liking_df])

In [None]:
edgelist