## Load Credentials from a JSON

In [1]:
import json
import tweepy
import re
from collections import Counter
import time
from decouple import config
#!pip install trio
import trio



In [2]:
#Load Twitter Credentials File
TWITTER_AUTH = tweepy.OAuthHandler(config('TWITTER_CONSUMER_KEY'),config('TWITTER_CONSUMER_SECRET'))

TWITTER_AUTH.set_access_token(config('TWITTER_ACCESS_TOKEN'),config('TWITTER_ACCESS_TOKEN_SECRET'))

TWITTER = tweepy.API(TWITTER_AUTH)


"""with open('twitter_credentials.json') as cred_data:
    info = json.load(cred_data)
    TWITTER_AUTH = tweepy.OAuthHandler(info['CONSUMER_KEY'], info['CONSUMER_SECRET'])
    TWITTER_AUTH.set_access_token(info['ACCESS_KEY'],info['ACCESS_SECRET'])

TWITTER = tweepy.API(TWITTER_AUTH)""";


## Function 1 - First Layer Interactions

In [121]:
async def first_async_get_user_interactions(search, output, next_query):
    # Initialize search for the specific user.
    # Get the user's tweets from their timeline. 
    # Cycle through all the tweets' text and join it into a mega-string.
    # Do some standardizing and replacing.
    # Strip away everything except usernames, into a string. 
    # Make a list of the counts, and take the top (X) most common people. 
    # Creates a list of the `top` people. No duplicates 
    # Tuple the results of the search & output together.
    # Make search inputs for the next level of crawling. 
    twitter_user = TWITTER.get_user(search)
    tweets = twitter_user.timeline(
                count=200,
                exclude_replies=False,
                include_rts=True,
                tweet_mode='extended'
    )
    b = [ i.full_text for i in tweets ]
    b = " ".join(b)
    b = b.lower()
    b = b.replace(search, "")
    out = re.findall(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9-_]+)',b)
    top = Counter(out).most_common(20)
    interactions = []
    for interaction_count in top:
        interactions = interactions + ([interaction_count[0]] * interaction_count[1])    
    tweet_data = [(search, i) for i in interactions]
    output.extend(tweet_data)
    next_target_users = [person[0] for person in top]
    next_query.extend(next_target_users)

## Function 2 - Second Layer Interactions

In [122]:
async def second_async_get_user_interactions(search, output):
    # Initialize search for the specific user. 
    twitter_user = TWITTER.get_user(search)
    # Get the user's tweets from their timeline. 
    tweets = twitter_user.timeline(
                count=200,
                exclude_replies=False,
                include_rts=True,
                tweet_mode='extended'
    )
    # Cycle through all the tweets' text and join it into a mega-string.
    b = [ i.full_text for i in tweets ]
    b = " ".join(b)
    # Do some standardizing and replacing.
    b = b.lower()
    b = b.replace(search, "")
    # Strip away everything except usernames, into a string. 
    out = re.findall(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9-_]+)',b)
        
    # Tuple the results of the search & output together.
    tweet_data = [(search, i) for i in out]
    output = output.extend(tweet_data)

## Trio Async Tests

## 1. Drill down. (No 1st Layer Grouping) 

In [130]:
async def interaction_chain(first_user):
    data = []
    next_users = []
    start_time = time.time()
    
    # Level 1 Run. 
    await first_async_get_user_interactions(first_user, data, next_users)

    print("----------Entering 2nd level. Time so far:", time.time() - start_time)
    display(next_users)
    
    # Level 2 Run. 
    loop_num = 1
    async with trio.open_nursery() as nursery:
         for user in next_users:
            print("Loop # ", loop_num, " . Time so far:", time.time() - start_time)
            loop_num +=1
            nursery.start_soon(second_async_get_user_interactions, user, data)
            
    print("Total time:", time.time() - start_time)
    return data

In [None]:
import asyncio

async def main():
    data = []
    next_users = []
    start_time = time.time()
    
    

asyncio.run(hello_world())



In [None]:
import pandas as pd
import concurrent.futures
import requests
import time

out = []
CONNECTIONS = 100
TIMEOUT = 5

tlds = open('../data/sample_1k.txt').read().splitlines()
urls = ['http://{}'.format(x) for x in tlds[1:]]

def load_url(url, timeout):
    ans = requests.head(url, timeout=timeout)
    return ans.status_code

with concurrent.futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as executor:
    future_to_url = (executor.submit(load_url, url, TIMEOUT) for url in urls)
    time1 = time.time()
    for future in concurrent.futures.as_completed(future_to_url):
        try:
            data = future.result()
        except Exception as exc:
            data = str(type(exc))
        finally:
            out.append(data)

            print(str(len(out)),end="\r")

    time2 = time.time()

print(f'Took {time2-time1:.2f} s')
print(pd.Series(out).value_counts())

In [131]:
data = trio.run(interaction_chain, 'bwinterrose')

User  bwinterrose queried
----------Entering 2nd level. Time so far: 1.0968801975250244


['lambdaschool',
 'austen',
 'kaggle',
 'superhuman',
 'tommycollison',
 'paulg',
 'mwseibel',
 'arachnocapital2',
 'rasbt',
 'jason',
 'cameronlewis35',
 'ivanahuckova',
 'gauravvohra1',
 'cowtung',
 'stephen_walter',
 'elonmusk',
 'kays310',
 'hackernews',
 'mattmiesnieks',
 'danielgross',
 'rrhoover',
 'ubereng',
 'micah',
 'brianray',
 'gcpcloud',
 'mckaywrigley',
 'trichomedoctor',
 'rapidsai',
 'blazingdb',
 'jiweiliu',
 'dksf',
 'mitchellbwright',
 'calebhicks',
 'twitter',
 'parkerwightman',
 'drbillnye',
 'mlpowered',
 'chieffhiwa',
 'vsodera',
 'pioneerdotapp',
 'fwapacha',
 'roadrunning01',
 'jeremyphoward',
 'chrisalbon',
 'alexqgb',
 'rahulvohra',
 'patrickc',
 'dsaience',
 'viglovikov',
 'quocleix']

Loop #  1  . Time so far: 1.0992779731750488
Loop #  2  . Time so far: 1.0994369983673096
Loop #  3  . Time so far: 1.0995419025421143
Loop #  4  . Time so far: 1.0996360778808594
Loop #  5  . Time so far: 1.0997178554534912
Loop #  6  . Time so far: 1.0998761653900146
Loop #  7  . Time so far: 1.0999360084533691
Loop #  8  . Time so far: 1.099992036819458
Loop #  9  . Time so far: 1.1000490188598633
Loop #  10  . Time so far: 1.1001060009002686
Loop #  11  . Time so far: 1.1001629829406738
Loop #  12  . Time so far: 1.1002192497253418
Loop #  13  . Time so far: 1.1002769470214844
Loop #  14  . Time so far: 1.1003551483154297
Loop #  15  . Time so far: 1.100442886352539
Loop #  16  . Time so far: 1.1005198955535889
Loop #  17  . Time so far: 1.1006650924682617
Loop #  18  . Time so far: 1.100775957107544
Loop #  19  . Time so far: 1.1009690761566162
Loop #  20  . Time so far: 1.101027011871338
Loop #  21  . Time so far: 1.1010828018188477
Loop #  22  . Time so far: 1.1011629104614258
L

In [129]:
data

[('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose', 'lambdaschool'),
 ('bwinterrose