In [1]:
from notebook_utils import setup
import pandas as pd
import networkx as nx
from collections import defaultdict

setup()

import matplotlib.style as style

style.use(["seaborn-white", "seaborn-paper"])

In [2]:
DATE = "16-dec"
DATA_DIR = "../data/{}/".format(DATE)
EXPORT_DIR = "../data/dataframes/{}/".format(DATE)

In [3]:
df_users_with_clustering = pd.read_pickle('./df_users_with_clustering.pickle')

In [58]:
df_users_by_follower_count = df_users_with_clustering.sort_values("followers_count", ascending=False)

In [6]:
## Set up dependencies and credentials

import csv
import tweepy
from dotenv import load_dotenv
import os
import json

load_dotenv()
CREDENTIALS = os.getenv("TWITTER_CREDENTIALS")

configs = []
with open("../" + CREDENTIALS) as f:
    for ind, line in enumerate(f):
        configs.append(json.loads(line)) 
credential = 0
config = configs[credential]

auth = tweepy.OAuthHandler(config['consumer_key'], config['consumer_secret'])
auth.set_access_token(config['access_token'], config['access_token_secret'])

api = tweepy.API(auth)

In [95]:
def to_batch(a, size=100):
    """Transform a list into list of list. Each element of the new list is a
    list with size=100 (except the last one).
    """
    r = []
    qt, rm = divmod(len(a), size)
    i = -1
    for i in range(qt):
        r.append(a[i * size:(i + 1) * size])
    if rm != 0:
        r.append(a[(i + 1) * size:])
    return r

def check_inactive(api, uids):
    """ Check inactive account, one by one.
    Parameters
    ---------------
    uids : list
        A list of inactive account

    Returns
    ----------
        Yield tuple (uid, reason). Where `uid` is the account id,
        and `reason` is a string.
    """
    for uid in uids:
        try:
            u = api.get_user(user_id=uid)
            logger.warning('This user %r should be inactive', uid)
            yield (u, dict(code=-1, message='OK'))
        except tweepy.TweepyError as e:
            yield (uid, e[0][0])

def batch_fetch_users(api, uids):
    """ Fast check the status of specified accounts.
    Parameters
    ---------------
        api: tweepy API instance
        uids: account ids

    Returns
    ----------
    Tuple (active_uids, inactive_uids).
        `active_uids` is a list of active users and
        `inactive_uids` is a list of inactive uids,
            either supended or deactivated.
    """
    try:
        users = api.lookup_users(user_ids=uids,
                                 include_entities=False)
        active_uids = [str(u.id) for u in users]
        inactive_uids = list(set(uids) - set(active_uids))
        return users, inactive_uids
    except tweepy.TweepError as e:
        error_code = e.api_code
        if error_code == 17:
            return [], uids
        else:
            # Unexpected error
            print("Unknown error", e)
            raise e

def fetch_users(api, uids):
    active_users = []
    inactive_uids = []
    for batch_uids in to_batch(uids, size=100):
        print("Fetching batch")
        try:
            users, inactive = batch_fetch_users(api, batch_uids)
            active_users += users
            inactive_uids += inactive
        except Exception as e:
            print("Failed", e)
            return active_users, inactive_uids
        
        print("Fetched {}/{} users".format(len(active_users) + len(inactive_uids), len(uids)))
        
    return active_users, inactive_uids

In [83]:
active_users, inactive_uids = fetch_users(api, ["25073877"])

Fetching batch
Fetched 1/1 users


In [85]:
user_ids = list(df_users_by_follower_count.index[:10000].values)

In [100]:
active_users, inactive_uids = fetch_users(api, list(df_users_by_follower_count.index[10000:].values))

Fetching batch
Fetched 100/1378621 users
Fetching batch
Fetched 200/1378621 users
Fetching batch
Fetched 300/1378621 users
Fetching batch
Fetched 400/1378621 users
Fetching batch
Fetched 500/1378621 users
Fetching batch
Fetched 600/1378621 users
Fetching batch
Fetched 700/1378621 users
Fetching batch
Fetched 800/1378621 users
Fetching batch
Fetched 900/1378621 users
Fetching batch
Fetched 1000/1378621 users
Fetching batch
Fetched 1100/1378621 users
Fetching batch
Fetched 1200/1378621 users
Fetching batch
Fetched 1300/1378621 users
Fetching batch
Fetched 1400/1378621 users
Fetching batch
Fetched 1500/1378621 users
Fetching batch
Fetched 1600/1378621 users
Fetching batch
Fetched 1700/1378621 users
Fetching batch


KeyboardInterrupt: 

In [99]:
all_active_users = active_users
all_incative_uids = inactive_uids

In [98]:
print(len(active_users), len(inactive_uids))

9395 605


In [121]:
with open("../data_tools/check_users/users.jsonl", "w") as f:
    for user in all_active_users:
        f.write(json.dumps(user._json) + "\n")

with open("../data_tools/check_users/inactive_uids.txt", "w") as f:
    for uid in inactive_uids:
        f.write(str(uid)+ "\n") 

In [104]:
user = all_active_users[0]

In [113]:
json.dumps(user._json)

428333