# NaziHunter
This notebook will use a bag of words approach to help build a dataset of Nazi profiles and tweets.

Do not run all cells at once! Some of the test cells near the end can take 5-30 minutes to run, and save large files to your filesystem.

In [8]:
import json, csv
import re, os, sys, datetime, time
import tweepy
import sqlite3
import tokens # local file for storing tokens

# Set the private Twitter API keys.
consumer_key = tokens.consumer_key
consumer_secret = tokens.consumer_secret
access_token = tokens.access_token
access_token_secret = tokens.access_token_secret

# Pass tokens to Tweepy's OAuthHandler.
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

In [9]:
# Define the bag of words:
words = ["kekistan",
        "#kek",
        "14words",
        "14 words",
        "14 wrds",
        "fourteen words",
        "future for white children",
        "evropa",
        "ethnostate",
        "/ourguys/",
        "goyim",
        "groyper",
        "cuck",
        "red pilled",
        "redpilled",
        "whitegenocide",
        "white genocide",
        "white pride",
        "whiteisright",
        "defending white",
        "prowhite",
        "pro white",
        "aryanState",
        "nationalsocialist",
        "national socialist",
        "national socialism",
        "nazionalsocialista",
        "nazional socialista",
        "white nationalist",
        "white supremacist",
        "identitarian",
        "卐",
        "⚡⚡",
        "ϟϟ",
        "✠",
        "⊕"]
print("Bag of words contains " + str(len(words)) + " words.")

def bag_of_words(words):
    return dict([(word, True) for word in words])

bag = bag_of_words(words)

def test_1488(string):
    # Match 14 and 88 only if no digit precedes nor follows.
    # 14 and 88 may be separated by a single non-digit.
    # e.g., matches: 1488, 14/88, 14.88, asdf1488jkl, 13-14-88a, a14 88b.
    # e.g., non-matches: 14288, 14--88, 5551488555, 714/88.
    if re.match(r".*(?<!\d)14[\D]?88(?!\d)", string):
        return 1
    else:
        return 0

Bag of words contains 36 words.


In [40]:
def init(words):
    # set up a list of regex patterns
    # TODO: This would be more efficient as a trie.
    # Optimization may be unnecessary.
    re_patterns = []
    
    for word in words:
        re_patterns.append(r".*" + word)
    return re_patterns

re_patterns = init(words)

def check_protected(user):
    # Test if user is protected:
    try:
        if api.get_user(user).protected:
            print("Account is protected: " + str(user) + "          ")
            return True
        else:
            print("Account is not protected: " + str(user) + "          ")
            return False
    except BaseException as e:
        print("Error in check_protected() exception clause (id:" + str(user) + "):", e)

def print_result_count(result):
    if result == 0:
        print("No match.")
    elif result == 1:
        print("There is 1 match.")
    else:
        print("There are " + str(result) + " matches.")

def test_count(string, re_patterns):
    string = str.lower(string)
    result = 0
    for pattern in re_patterns:
        if re.match(pattern, string):
            result += 1
    return result

def print_count(string, re_patterns):
    return print_result_count(test_count(string))
    
def test_followers(user, re_patterns = re_patterns, num_results = 50):
    try:
        # Evaluate a list of followers for Nazis. Return an array of scores.
        userlist = api.followers_ids(user)

        # Don't try to test more results than there are users in the list.
        num_results = min(num_results, len(userlist))

        results = test_list(userlist, re_patterns, num_results)

        # Add the results to the database.
        update_baddie(api.get_user(user), results)
        return results
    except BaseException as e:
        print("Error in test_followers(): ", e)
        
        try:
            # Test if user is protected:
            if api.get_user(user).protected:
                print("Account is protected.")
            else:
                print("Account is not protected.")
        except BaseException as e:
            print("Error in test_followers() exception clause: ", e)
        
        return

def test_follows(user, re_patterns = re_patterns, num_results = 50):
    # Evaluate a list of friends for Nazis. Return an array of scores.
    return test_list(api.friends_ids(user), re_patterns, num_results)
    
def test_list(userlist, re_patterns = re_patterns, num_results = 50):
    # Evaluate a list of users for Nazis. Return an array of scores.
    scores = []
    baddies = []
    baddies_scores = {}
    start = 0
    start_time = datetime.datetime.now()
    
    if num_results == -1:
        num_results = len(userlist)
    
    try:
        i = 0
        data = []
        print(" ")
        while i < len(userlist) and i < num_results:
            print("Loaded " + str(i) + " of " + str(len(userlist)), end="\r")
            data.extend(api.lookup_users(user_ids = userlist[i:min(num_results, i+100)]))
            i = i + 100
        #data = api.lookup_users(user_ids = userlist)
    except BaseException as e:
        print("Error in test_list(): ", e)
        return
    
    time_twitter_api_complete = datetime.datetime.now()
    time_delta = (time_twitter_api_complete - start_time).total_seconds()
    time_delta = round(time_delta, 3)
    print(str(num_results) + " users loaded in " + str(time_delta) + " seconds.")

    count = 0
    for userdata in data:
        #test user
        score = test_count(userdata.description, re_patterns)
        score += test_count(userdata.screen_name, re_patterns)
        score += test_count(userdata.name, re_patterns)
        score += test_1488(userdata.description)
        score += test_1488(userdata.screen_name)
        score += test_1488(userdata.name)
        
        # Add this score to the array of scores.
        scores.append(score)

        # Add detected Nazis to the list of baddies.
        if score != 0:
            baddies.append(userdata)
            baddies_scores[userdata.id] = score

        # Report scanning progress in console.
        if count % 10 == 0:
            #clear_output()
            ratio = round((count-start)/num_results*100, 1)            
            print(str(ratio) + "% complete", end="\r")
        count = count + 1

    #clear_output()
    
    results = Results(scores, baddies)
    results.print_results()
    
    #Print time delta for testing followers.
    end_testing_time = datetime.datetime.now()
    time_delta = (end_testing_time - time_twitter_api_complete).total_seconds()
    time_delta = round(time_delta, 3)
    print(str(num_results) + " users tested in " + str(time_delta) + " seconds.")
    
    # add baddies to database
    add_baddies_to_db(baddies, baddies_scores)
    
    #Print time delta for adding results to db.
    end_time = datetime.datetime.now()
    time_delta = (end_time - end_testing_time).total_seconds()
    time_delta = round(time_delta, 3)
    print(str(len(baddies)) + " baddies updated/added to db in " + str(time_delta) + " seconds.")

    #Print time delta for completing all operations.
    time_delta = (end_time - start_time).total_seconds()
    time_delta = round(time_delta, 3)
    print("Scanned " + str(num_results) + " followers in " + str(time_delta) + " seconds.")
    
    return(results)

In [53]:
def add_baddies_to_db(baddies, baddies_scores):
    # Add or update the baddie in the db:
    # Opens the db
    db = sqlite3.connect('data/baddies.db')

    # Get a cursor object
    cursor = db.cursor()

    for baddie in baddies:
        add_to_baddies(baddie, baddies_scores[baddie.id], db, cursor)
        
    db.commit()
    db.close()
    
def add_to_baddies(userdata,score, db, cursor):
    # adds baddie details to db
    cursor.execute('''
        SELECT sturm_score FROM baddies WHERE twitter_id = ?
    ''', (userdata.id,))
    result = cursor.fetchone()
    
    if result is None:
        # insert new row
        cursor.execute('''
        INSERT INTO baddies (twitter_id, screen_name, name, sturm_score) VALUES (?,?,?,?)
    ''', (userdata.id,userdata.screen_name, userdata.name, score))
    else:
        # update score
        sturm_score = max(score, result[0])
        cursor.execute('''
        UPDATE baddies SET sturm_score = ? WHERE twitter_id = ?
    ''', (score, userdata.id))

def update_baddie(userdata, results):
    # Update ratio in the database.
    # adds baddie details to db
    db = sqlite3.connect('data/baddies.db')

    # Get a cursor object
    cursor = db.cursor()

    cursor.execute('''
        SELECT twitter_id FROM baddies WHERE twitter_id = ?
    ''', (userdata.id,))
    result = cursor.fetchone()
    
    if result is None:
        # insert new row?
        # no---this may not be a baddie that's being tested.
#        cursor.execute('''
#        INSERT INTO baddies (twitter_id, screen_name, name, baddie_ratio) VALUES (?,?,?,?)
#    ''', (userdata.id,userdata.screen_name, userdata.name, results.ratio))
        print("User is not in database. Not updating.")
    else:
        # update ratio
        cursor.execute('''
        UPDATE baddies SET baddie_ratio = ? WHERE twitter_id = ?
    ''', (results.ratio, userdata.id))
        db.commit()
        
    db.close()
    
def get_baddies_from_db():
    # Add or update the baddie in the db:
    # Opens the db
    db = sqlite3.connect('data/baddies.db')

    # Get a cursor object
    cursor = db.cursor()

    cursor.execute('''
        SELECT twitter_id FROM baddies
    ''')
    result = cursor.fetchall()
    
    db.close()
    
    return result

    
def get_unprotected_baddies_from_db():
    # Add or update the baddie in the db:
    # Opens the db
    db = sqlite3.connect('data/baddies.db')

    # Get a cursor object
    cursor = db.cursor()

    cursor.execute('''
        SELECT twitter_id FROM baddies WHERE protected IS 0
    ''')
    result = cursor.fetchall()
    
    db.close()
    
    return result

def print_results(scores):
    x = [str(x) for x in range(0,5)]
    x.append("5+")
    y = []
    sum = 0
    for i in range(0,5):
        count = scores.count(i)
        y.append(count)
        sum += count
    y.append(len(scores) - sum)
    print(str(x))
    print(str(y))
    total = len(scores) - y[0]
    ratio = round(total/len(scores)*100, 1)
    print(str(ratio) + "% identified as Nazis (" + str(total) + " of " + str(len(scores)) + " tested)")

def print_baddies_details(baddies):
    # Print a list containing names, screen_names, and profile descriptions.
    countIter = iter([x for x in range(0,len(baddies))])
    
    for user in baddies:
        print(str(next(countIter)) + ": " + user.name + " / @" + user.screen_name 
             + " / https://www.twitter.com/" + user.screen_name)
        print(user.description)

def get_baddies_names(baddies):
    # Return an array of screen_names from baddies.
    return [user.screen_name for user in baddies]

def print_baddies(baddies):
    # Takes a list of userdata and returns a list of names.
    for baddie in baddies:
        print(baddie.name)
        
def get_baddies(baddies):
    # Takes a list of userdata and returns a list of IDs.
    ids = set()
    
    for baddie in baddies:
        ids.add(baddie.id)
    
    return ids

def check_rate():
    # Check rate limit status, return dict.
    return api.rate_limit_status()['resources']

def check_rate_users():
    # Check rate limit status for get user, return int.
    return api.rate_limit_status()['resources']['users']['/users/show/:id']['remaining']

def check_rate_lookup():
    # Check rate limit status for batch user lookups, return int.
    return api.rate_limit_status()['resources']['users']['/users/lookup']['remaining']

def check_rate_followers_ids():
    # Check rate limit status for follower ids lookups, return int.
    return api.rate_limit_status()['resources']['followers']['/followers/ids']['remaining']

def check_api_rates():
    # Returns rate limits for lookups, users, and timeline requests.
    print(str(check_rate_lookup()))
    print(str(check_rate_users()))
    print(str(check_rate()['statuses']['/statuses/user_timeline']['remaining']))

class Results(object):
    """
    Results contain attributes and basic methods:
        
    Attributes:
        num_results: integer number of results requested
        scores: integer array of scores returned from assessment
        num_baddies: integer number of non-zero scores
        ratio: ratio of results that are baddies
        baddies_names: string array of screen_names for baddies
        baddies: list of user data objects for baddies
    """
    
    def __init__(self, scores, baddies):
        # Return a Results object with scores array and baddies list.
        self.scores = scores
        self.baddies = baddies
        self.baddies_names = get_baddies_names(baddies)
        self.num_results = len(scores)
        self.num_baddies = len(baddies)
        self.ratio = round(len(baddies)/len(scores)*100, 1)
        
    def print_results(self):
        # Print results nicely to console.
        print_results(self.scores)

In [141]:
#OLD-ISH?
def test_user_count(username):
    try:
        userdata = api.get_user(username)
        score = test_userdata(userdata)
    except Exception:
        score = -1
    
    return score

def test_userdata(userdata):
    score = test_count(userdata.description, re_patterns)
    score += test_count(userdata.screen_name, re_patterns)
    score += test_count(userdata.name, re_patterns)
    score += test_1488(userdata.description)
    score += test_1488(userdata.screen_name)
    score += test_1488(userdata.name)
    return score

def test_user(username):
    return print("@" + username + "\'s score is " + str(test_user_count(username)))

In [192]:
#OLD-ish?
from IPython.display import clear_output

def print_follower_results(user, count = 50):
    follower_ids = api.followers_ids(user)

    results = []
    baddies = []
    num_results = count
    start = 0

    for i in range(start,start+num_results):
        try:
            userdata = api.get_user(follower_ids[i])
        except Exception:
            continue

    #    print_result_count(test_count(str(userdata)))
        result = test_userdata(userdata)
        results.append(result)

        if result != 0:
            baddies.append(userdata)
            
            #update database
            add_to_baddies(userdata, score)

        if i % 10 == 0:
            clear_output()
            ratio = round((i-start)/num_results*100, 1)
            print(str(ratio) + "% complete")
    clear_output()
    print(str(results))

    #l = results
    #dict((x,l.count(x)) for x in set(l))

    x = [str(x) for x in range(0,5)]
    x.append("5+")
    y = []
    sum = 0
    for i in range(0,5):
        count = results.count(i)
        y.append(count)
        sum += count
    y.append(len(results) - sum)
    print(str(x))
    print(str(y))
    total = len(results) - y[0]
    ratio = round(total/len(results)*100, 1)
    print(str(ratio) + "% identified as Nazis (" + str(total) + " of " + str(len(results)) + " tested)")

In [156]:
#OLD
def print_result(result):
    if result:
        print("Match found")
    else:
        print("No match")
        
def print_result_count(result):
    if result == 0:
        print("No match.")
    elif result == 1:
        print("There is 1 match.")
    else:
        print("There are " + str(result) + " matches.")

def boolean_test(result):
    if result:
        return True
    else:
        return False

# Config and testing:

In [13]:
num_results = 10

#Test accounts:
test_baddies = ["ns148818",
                "SaxonResistance",
                "j_rashcoven",
                "ABel14_88"]

test_good = ["saraislet",
             "EricaJoy",
             "yonatanzunger"]

In [33]:
timeline = api.user_timeline("ABel14_88")
print_result_count(test_count(str(timeline), re_patterns))

There are 2 matches.


In [190]:
print_follower_results("ABel14_88",361)

[0, 1, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 2, 0, 0, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 0, 1, 0, 1, 0, 3, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, 3, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 3, 0, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 

In [40]:
status = api.get_status(912018899507351552, tweet_mode='extended')
print(str(status.user.screen_name))
print(str(status.user.id))
status.full_text

ABel14_88
909306189317423104


"#whiteprivilege and #WhiteSupremacist has just got started! You've seen #nothing yet. #we #are #the #ones #not #to #fuck #with"

In [13]:
timeline = api.user_timeline("ABel14_88")
#print(str(timeline))

In [67]:
status_json = json.dumps(status._json, indent = 4, ensure_ascii = False)
#print(str(status_json))
#print(str(status._json.values()))

In [1]:
def get_all_tweets(screen_name):
    
    #Twitter only allows access to a users most recent 3240 tweets with this method
    
    #authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    
    #initialize a list to hold all the tweepy Tweets
    alltweets = []    
    
    #make initial request for most recent tweets (200 is the maximum allowed count)
    new_tweets = api.user_timeline(screen_name = screen_name, count=20, tweet_mode='extended')
    new_tweets_json = [status._json for status in new_tweets]
    
    #save most recent tweets
    alltweets.extend(new_tweets_json)
    
#    #save the id of the oldest tweet less one
#    oldest = alltweets[-1].id - 1
#    
#    #keep grabbing tweets until there are no tweets left to grab
#    while len(new_tweets) > 0:
#        
#        #all subsiquent requests use the max_id param to prevent duplicates
#        new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
#        
#        #save most recent tweets
#        alltweets.extend(new_tweets)
#        
#        #update the id of the oldest tweet less one
#        oldest = alltweets[-1].id - 1
#
#        print "...%s tweets downloaded so far" % (len(alltweets))
       
    #write tweet objects to JSON
    filename = "tweets-test-" + screen_name + ".json"
    file = open(filename, 'w') 
    print("Writing tweet objects to JSON please wait...")
    json.dump(alltweets,file,sort_keys = True,indent = 4)
    
    #close the file
    print("Done")
    file.close()

In [4]:
#get_all_tweets("ABel14_88")
get_all_tweets("PaleoconWoman")

Writing tweet objects to JSON please wait...
Done


In [51]:
def estimate_time_remaining(start_time, current_time, current_index, num_accounts, num_tweets):
    # Estimate the number of seconds remaining:
    if current_index > 0:
        num_total_tweets = num_accounts * num_tweets
        num_completed_tweets = current_index * num_tweets
        ratio_complete = num_completed_tweets / num_total_tweets

        time_delta = (current_time - start_time).total_seconds()
        time_delta = round(time_delta, 3)

        time_per_account = time_delta / current_index
        num_accounts_remaining = num_accounts - current_index + 1
        time_remaining = time_per_account * num_accounts_remaining
        time_remaining = round(time_remaining, 3)
        return str(time_remaining)
    else:
        return ""

def write_tweets(id_list, num_tweets):
    # Writes $count number of tweets for each user in id_list to json file.
    
    # Note the start time:
    start_time = datetime.datetime.now()
    
    #Twitter only allows access to a users most recent 3240 tweets with this method
    #pull this number of tweets from each user:
    num_tweets = min(200, num_tweets)
    
    #authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    
    #initialize a list to hold all the tweepy Tweets
    alltweets = []    
    i = 0
    num_errors = 0
    
    for id in id_list:
        i = i + 1
#        if i % 10 == 0:
#            clear_output()
        
        #user = api.get_user(id).screen_name
        user = id
        output = "Fetching tweets for " + str(user) + "... ("
        output += str(i) + " of " + str(len(id_list)) + ")"
        
        if i != 0:
            current_time = datetime.datetime.now()
            time_remaining = estimate_time_remaining(start_time, current_time, i - 1 - num_errors, len(id_list) - num_errors, num_tweets)
            if time_remaining != "":
                output += " (" + str(time_remaining) + "s remaining)"
                output += "          "
        
        print(output, end="\r")
        
        #make initial request for most recent tweets (200 is the maximum allowed count)
        try:
            new_tweets = api.user_timeline(id = id, count = num_tweets, tweet_mode='extended')
            
            #save only json from tweet objects
            new_tweets_json = [status._json for status in new_tweets]
            
            #save most recent tweets
            alltweets.extend(new_tweets_json)
        except BaseException as e:
            # Increment number of errors.
            num_errors += 1
            
            # Check if account is protected.
            check_protected(user)
            
            # Check rate limit.
            if check_rate()['statuses']['/statuses/user_timeline']['remaining'] < 10:
                reset_time = check_rate()['statuses']['/statuses/user_timeline']['reset']
                current_time = time.time()
                time_delta = reset_time - current_time
                time_delta = min(0,round(time_delta, 3))
                print("Rate limit exceeded. Pausing for " + str(time_delta) + " seconds.", end="\r")
                
                while time_delta >= 60:
                    time.sleep(60)
                    reset_time = check_rate()['statuses']['/statuses/user_timeline']['reset']
                    current_time = time.time()
                    time_delta = reset_time - current_time
                    time_delta = min(0,round(time_delta, 3))
                    print("Rate limit exceeded. Pausing for " + str(time_delta) + " seconds.", end="\r")
                
                time.sleep(time_delta)

        #save the id of the oldest tweet less one
    #    oldest = alltweets[-1].id - 1

    #    #keep grabbing tweets until there are no tweets left to grab
    #    while len(new_tweets) > 0:
    #        
    #        #all subsiquent requests use the max_id param to prevent duplicates
    #        new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
    #        
    #        #save most recent tweets
    #        alltweets.extend(new_tweets)
    #        
    #        #update the id of the oldest tweet less one
    #        oldest = alltweets[-1].id - 1
    #
    #        print "...%s tweets downloaded so far" % (len(alltweets))
    
    time_string = datetime.datetime.now().strftime("%Y-%m-%d-%H_%M_%S")
    filename = "data/nazi_tweets-" + time_string + ".json"
    file = open(filename, 'w') 
    print("\n" + "Writing tweet objects to JSON please wait...")
    json.dump(alltweets,file,sort_keys = True,indent = 4)
    
    end_time = datetime.datetime.now()
    time_delta = (end_time - start_time).total_seconds()
    time_delta = round(time_delta, 3)
    num_tested = len(id_list) - num_errors
    output = str(num_tweets) + " tweets each from " + str(num_tested)
    output += " users written to JSON in " + str(time_delta) + " seconds."
    print(output)
    
    #close the file
    file.close()

In [19]:
baddies = [x[0] for x in get_unprotected_baddies_from_db()]
len(baddies)

986

In [52]:
write_tweets(baddies[0:899], 40)

Error in check_protected() exception clause (id:19160477): [{'code': 50, 'message': 'User not found.'}]
Account is not protected: 107848498          (284.592s remaining)          
Account is protected: 177824609          99) (292.53s remaining)           
Error in check_protected() exception clause (id:267797986): [{'code': 63, 'message': 'User has been suspended.'}]
Account is protected: 399464246          99) (283.803s remaining)          
Error in check_protected() exception clause (id:2586238849): [{'code': 63, 'message': 'User has been suspended.'}]
Error in check_protected() exception clause (id:2842094181): [{'code': 63, 'message': 'User has been suspended.'}]
Account is not protected: 2852211096           (253.737s remaining)          
Account is not protected: 753939280670502913           (156.211s remaining)          
Error in check_protected() exception clause (id:763415951010168832): [{'code': 50, 'message': 'User not found.'}]
Error in check_protected() exception clause (i

In [50]:
check_rate()['statuses']['/statuses/user_timeline']['remaining']

900

In [162]:
write_tweets(get_baddies(baddies), 20)

Writing tweet objects to JSON please wait...
Done


In [307]:
baddies_list = get_baddies(results.baddies)
baddies_array = list(baddies_list)

In [310]:
for user in baddies_array:
    print(str(api.get_user(user).followers_count))

88
94
267
95
154
38
709
89
15
5872
35
64
1727
72
815
148
381
1889
588
170
282
1166
140
333
3255
1985
208
135
1672
101
494
449
2133
572
840
351
188
1930
645
230
754
145
1946
3811
298
328
1017
744
530
327
88
56
1564
3408
699
565
177
175
4409
243
427
221
125
1595
310
147
578
17
19
80
3884
2174
291
31
12354
97
392
213
1277
8440
287
112
1675


In [415]:
for x in range(28,40):
    print("Testing Twitter id " + str(baddies_array[x]) + ":")
    results = test_followers(baddies_array[x], num_results = -1)
    print("\n")
    
    if check_rate_lookup() < 100 or check_rate_followers_ids() == 0:
        break

Testing Twitter id 508067360:
 
1672 users loaded in 45.862 seconds.
['0', '1', '2', '3', '4', '5+']
[1366, 204, 73, 22, 6, 1]
18.3% identified as Nazis (306 of 1672 tested)
1672 users tested in 0.204 seconds.
306 baddies updated/added to db in 0.024 seconds.
Scanned 1672 followers in 46.09 seconds.


Testing Twitter id 773502289831161856:
 
101 users loaded in 2.252 seconds.
['0', '1', '2', '3', '4', '5+']
[72, 15, 5, 7, 1, 1]
28.7% identified as Nazis (29 of 101 tested)
101 users tested in 0.02 seconds.
29 baddies updated/added to db in 0.007 seconds.
Scanned 101 followers in 2.28 seconds.


Testing Twitter id 710610743247245313:
 
495 users loaded in 17.69 seconds.
['0', '1', '2', '3', '4', '5+']
[410, 49, 21, 11, 4, 0]
17.2% identified as Nazis (85 of 495 tested)
495 users tested in 0.047 seconds.
85 baddies updated/added to db in 0.022 seconds.
Scanned 495 followers in 17.759 seconds.


Testing Twitter id 783424505712635904:
 
449 users loaded in 10.428 seconds.
['0', '1', '2', '3

In [366]:
results = test_followers("razorheart88", num_results = -1)

['0', '1', '2', '3', '4', '5+']
[1609, 262, 85, 21, 9, 0]
19.0% identified as Nazis (377 of 1986 tested)
1986 users tested in 0.244 seconds.
377 baddies updated/added to db in 0.011 seconds.
Scanned 1986 followers in 34.091 seconds.


In [61]:
def update_protected_baddies():
    # Update protected flag in the database if absent.
    db = sqlite3.connect('data/baddies.db')

    # Get a cursor object
    cursor = db.cursor()

    cursor.execute('''
        SELECT twitter_id FROM baddies WHERE protected IS null
    ''')
    results = cursor.fetchall()
    results = [x[0] for x in results]
    
    for baddie in results:
        if check_protected(baddie):
            protected = 1
        else:
            protected = 0

        # Update protected flag in db:
        cursor.execute('''
            UPDATE baddies SET protected = ? WHERE twitter_id = ?
            ''', (protected, baddie))
        db.commit()
        
    db.close()