In [1]:
#!pip install trio
import json
import tweepy
import re
from collections import Counter
import time
from decouple import config
import trio
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from pandas.io.json import json_normalize



In [2]:
TWITTER_AUTH = tweepy.OAuthHandler(config('TWITTER_CONSUMER_KEY'),config('TWITTER_CONSUMER_SECRET'))
TWITTER_AUTH.set_access_token(config('TWITTER_ACCESS_TOKEN'),config('TWITTER_ACCESS_TOKEN_SECRET'))
TWITTER = tweepy.API(TWITTER_AUTH)

In [3]:
start_api_check = TWITTER.rate_limit_status()
limits_alpha = json_normalize(start_api_check)

In [4]:
async def async_get_user_interactions(search, output, next_query, limit):
    """Crawls the targeted user's timeline and returns interactions.
    Args:
        `search`, string: The name of the user who's timeline to search.
        `output`, list of tuples: paired list of search & interaction targets. 
        `next_query`, list: a list of names to search on the next run. 
        `limit`, int: A flag to indicate how many results should be returned. 
    
    Functionality:
        Initialize search for the specific user.
        Get the user's tweets from their timeline. 
        Cycle through all the tweets' text and join it into a mega-string.
        Do some standardizing and replacing.
        Strip away everything except usernames, into a string. 
        Make a list of the counts, and take the top (X) most common people. 
        Creates a list of the `top` people. No duplicates 
        Tuple the results of the search & output together.
        Make search inputs for the next level of crawling.
    
    Returns:
        This function is async and has no return statement, rather it 
        instead updates the values of `output` and `next_query` extending
        the lists that were passed to it as args.
    """
    try:
        twitter_user = TWITTER.get_user(search)
        tweets = twitter_user.timeline(
                    count=200,
                    exclude_replies=False,
                    include_rts=True,
                    tweet_mode='extended'
        )
        b = [ i.full_text for i in tweets ]
        b = " ".join(b)
        b = b.lower()
        b = b.replace(search, "")
        out = re.findall(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9-_]+)',b)
        top = Counter(out).most_common(limit)

        if limit > 0:
            interactions = []
            for interaction_count in top:
                interactions += ([interaction_count[0]] * interaction_count[1])

            tweet_data = [(search, i) for i in interactions]
            output.extend(tweet_data)

            next_target_users = [person[0] for person in top]
            next_query.extend(next_target_users)

        elif limit == -1: 

            # Tuple the results of the search & output together.
            tweet_data = [(search, i) for i in out]
            output = output.extend(tweet_data)
            
    except tweepy.TweepError:
        print("tweepy.TweepError=")
        
    except:
        e = sys.exc_info()[0]
        print("Error: %s" % e)

In [102]:
async def interaction_chain():
    """Calls our async function & loops through it for each target user."""
    # input settings
    origin_user = ['bwinterrose']
    first_users = ['austen','paulg','justinkhan','tommycollison',"lambdaschool"]
    first_limit = 10 # -1 is no limit, anything above 1 sets limit.
    second_limit = 5 # -1 is no limit, anything above 1 sets limit. 
    third_limit = 5
    
    # init function variables
    level = 0
    data = []
    last = 2 # Set to number of levels. 
    start_time = time.time()
    last_time = time.time()
    next_users = first_users
    checked_users = origin_user

    # Updates the time for each loop.
    def funct_time(last, d, l):
        now = time.time()
        check_time = now - last
        print("\n\nLevel %s Completed\nLevel %s time to complete: %s." % (l, l, check_time))
        print("Level %s interactions found:", (l, len(d)))
        return now
    return next_users, checked_users, level 
    # Dedupe & Check user list for previous runs (no duplicated work). 
    def update_lists(x, y, l, l_n):
        # de-duplicate it.
        x = list(dict.fromkeys(x))
        # remove any users already searched
        for i in y:
            a = np.array(x)
            a = a[a != i]
            x = a.tolist()
        # add new values to the checked_list
        y += x
        # on first run.... 
        if l == 0:
            l += 1 
            print("----------Entering %sst level. Searching the following users:----------\n" % (l), x)
        # on other runs...
        else: 
            print("Level %s connections found:" % (l), len(x),"\n\n")
            if l_n > (l):
                l += 1
                print("---------Beginning Level %s - Searching the following users:\n" % (l), x) 
        return x, y, l
    
        
    # Level 1 Run.
    loop_num = 1
    async with trio.open_nursery() as nursery:
        for user in first_users:
            print("Loop # ", loop_num, " . Time so far:", time.time() - start_time)
            loop_num +=1
            nursery.start_soon(async_get_user_interactions, user, data, next_users, first_limit)
    
    last_time = funct_time(last_time, data, level)
    next_users, checked_users, level = update_lists(next_users, checked_users, level, last)


    # Level 2 Run.
    loop_num = 1
    async with trio.open_nursery() as nursery:
        for user in next_users:
            print("Loop # ", loop_num, " . Time so far:", time.time() - start_time)
            loop_num +=1
            nursery.start_soon(async_get_user_interactions, user, data, next_users, second_limit)
    
    last_time = funct_time(last_time, data, level)
    next_users, checked_users, level = update_lists(next_users, checked_users, level, last)


#     # Level 3 Run. Uncomment to run
#     loop_num = 1
#     async with trio.open_nursery() as nursery:
#         for user in next_users:
#             print("Loop # ", loop_num, " . Time so far:", time.time() - start_time)
#             loop_num +=1
#             nursery.start_soon(async_get_user_interactions, user, data, next_users, third_limit)
    
#     last_time = funct_time(last_time, data, level)
#     next_users, checked_users, level = update_lists(next_users, checked_users, level, last)
    
    print("Total time:", time.time() - start_time)
    print("Total Connections found:", (len(checked_users)-1))
    print("Total Overall Interactions found:", len(data))
    return data,next_users, checked_users, level 

In [103]:
next_users, checked_users, level  = trio.run(interaction_chain)

In [104]:
next_users, checked_users, level

(['austen', 'paulg', 'justinkhan', 'tommycollison', 'lambdaschool'],
 ['bwinterrose'],
 0)

In [68]:
edges = Counter(data)

In [105]:
def update_lists(x, y, l, l_n):
        # de-duplicate it.
        x = list(dict.fromkeys(x))
        # remove any users already searched
        for i in y:
            a = np.array(x)
            a = a[a != i]
            x = a.tolist()
        # add new values to the checked_list
        y += x
        # on first run.... 
        if l == 0:
            l += 1 
            print("----------Entering %sst level. Searching the following users:----------\n" % (l), x)
        # on other runs...
        else: 
            print("Level %s connections found:" % (l), len(x),"\n\n")
            if l_n > (l):
                l += 1
                print("---------Beginning Level %s - Searching the following users:\n" % (l), x) 
        return x, y, l
    


In [106]:
x = next_users
y = checked_users
l = level

In [107]:
x = next_users
y = checked_users
l = level
print("X: ",len(x), x)
print("Y: ",len(y),y)
for i in y:
    a = np.array(x)
    a = a[a != i]
    x = a.tolist()
print("X: ", len(x), x)
print("Y: ",len(y),y)

X:  5 ['austen', 'paulg', 'justinkhan', 'tommycollison', 'lambdaschool']
Y:  1 ['bwinterrose']
X:  5 ['austen', 'paulg', 'justinkhan', 'tommycollison', 'lambdaschool']
Y:  1 ['bwinterrose']


In [114]:
def remove_from_list(i,y):
    try:
        y.remove(i)
    except:
        pass

In [119]:
x1 = next_users
y1 = checked_users
for i in x1:
    remove_from_list(i,y1)

['austen', 'paulg', 'justinkhan', 'tommycollison', 'lambdaschool']

In [128]:
data_c = Counter(data)

In [140]:
l_data_c = list(data_c)
data_v = data_c.values()

In [126]:
[{"source" : data_c[0]}]

Counter({('paulg', 'austen'): 11,
         ('paulg', 'kevinsimler'): 9,
         ('paulg', 'amasad'): 6,
         ('paulg', 'webdevmason'): 6,
         ('paulg', 'wrathofgnon'): 4,
         ('paulg', 'atabarrok'): 4,
         ('paulg', 'mckaywrigley'): 4,
         ('paulg', 'lpolovets'): 4,
         ('paulg', 'maxcroser'): 3,
         ('paulg', 'stephansturges'): 3,
         ('tommycollison', 'lambdaschool'): 36,
         ('tommycollison', 'austen'): 33,
         ('tommycollison', 'calebhicks'): 14,
         ('tommycollison', 'trevmckendrick'): 11,
         ('tommycollison', 'mitchellbwright'): 9,
         ('tommycollison', 'ouraring'): 9,
         ('tommycollison', 'ryanleehamblin'): 7,
         ('tommycollison', 'mckaywrigley'): 7,
         ('tommycollison', 'davecraige'): 7,
         ('tommycollison', 'sunjieming'): 7,
         ('lambdaschool', 'austen'): 62,
         ('lambdaschool', 'tommycollison'): 9,
         ('lambdaschool', 'dustint314'): 3,
         ('lambdaschool', 'meaghan