In [1]:
import requests, json, numpy, datetime
import pywren
import boto3
import numpy as np
import botometer
import pandas as pd
import botometer_creds as creds


In [2]:
'''
Creates the botometer object used to inspect twitter accounts. 
'''
bot = botometer.Botometer(wait_on_ratelimit=True,
                          rapidapi_key=creds.RAPIDAPI_KEY,
                          **creds.TWITTER_APP_AUTH)

In [3]:
# Grab only the unique user accounts so that we don't calculate bot scores for the same account multiple times
all_tweets = pd.read_csv('Data/all_tweets.csv')
all_tweets['username'] = '@' + all_tweets['username'].astype(str)
accounts = all_tweets['username']
accounts_list = list(accounts)

In [5]:
all_tweets.nunique()

county         102
id          141737
username     29602
date        139313
text        130815
dtype: int64

In [18]:
# Grab a sample of users for each county in Illinois
sampled_usernames = \
    all_tweets.groupby('county')['username']\
    .apply(lambda x: x.sample(frac=.001))\
    .reset_index()

In [23]:
accounts_list = sampled_usernames['username'].unique()

In [42]:
# Separate the total number accounts into manageable chunks
# Due to rate limits on Twitter's API, we can only make 450 requests per 15 minutes
N = 100
accounts_chunk = [accounts_list[i * N:(i + 1) * N] for i in range((len(accounts_list) + N - 1) // N)]

In [32]:
# calculate Bot scores for lists in a given chunk
scores_0 = bot.check_accounts_in(list(accounts_chunk[0]))
list_scores_0 = []

In [33]:
# for each chunk of account reports, append them to a list
for screen_name, result in scores_0:
    list_scores_0.append(result)

In [38]:
def classify_account(dictionary):
    '''
    Given a dictionary that represents the botometer output, this function creates a binary
        variable indicating wheter or not the account is a bot based on a pre-determined
        threshold. 
    '''
    
    user_info = dictionary['user']
    name = user_info['screen_name']
    score = dictionary['scores']['english']
    bot = 0
    if score >= .29:
        bot += 1
    
    return {'account_name': name, 'bot': bot}
    
    

In [39]:
%%time

pwex = pywren.default_executor()
futures = pwex.map(classify_account, list_scores_0)
bot_or_not = pywren.get_all_results(futures)

CPU times: user 5.44 s, sys: 1.36 s, total: 6.79 s
Wall time: 26.4 s


In [40]:
bot_or_not

[{'account_name': 'jniffen', 'bot': 0},
 {'account_name': 'JermaineKnaff', 'bot': 0},
 {'account_name': 'TATKulisch', 'bot': 1},
 {'account_name': 'gbaction1', 'bot': 0},
 {'account_name': 'JohnRAbell1', 'bot': 0},
 {'account_name': 'BHSBUCletics', 'bot': 1},
 {'account_name': 'itsReeseWhitney', 'bot': 0},
 {'account_name': 'ShellshockNet', 'bot': 0},
 {'account_name': 'PBHenkelman', 'bot': 1},
 {'account_name': 'DashDeCosta', 'bot': 0},
 {'account_name': 'ColecaTheGreat', 'bot': 0},
 {'account_name': 'NETFLIXnUGGET', 'bot': 0},
 {'account_name': 'Puzzlequeer', 'bot': 0},
 {'account_name': 'corryn_brock', 'bot': 0},
 {'account_name': '4HWFan1989', 'bot': 0},
 {'account_name': 'EIUWBB', 'bot': 0},
 {'account_name': 'illinidiva12', 'bot': 0},
 {'account_name': 'Janpack00', 'bot': 0},
 {'account_name': 'cottonwood100', 'bot': 1},
 {'account_name': 'NYARK0AA', 'bot': 0},
 {'account_name': 'mdcichy', 'bot': 1},
 {'account_name': 'RJohnson69', 'bot': 0},
 {'account_name': 'JacksonJennings', 

In [41]:
pd.DataFrame(bot_or_not)

Unnamed: 0,account_name,bot
0,jniffen,0
1,JermaineKnaff,0
2,TATKulisch,1
3,gbaction1,0
4,JohnRAbell1,0
...,...,...
95,taraprindiville,0
96,STLprog,0
97,ToyaruthDiva,0
98,RealYeshua1,0
