In [175]:
# User GET Endpoint Rate Limits: https://developer.twitter.com/en/docs/basics/rate-limits

"""
env:
    TWITTER_CONSUMER_KEY:
    TWITTER_CONSUMER_SECRET:
    TWITTER_ACCESS_TOKEN:
    TWITTER_ACCESS_TOKEN_SECRET:
    
get_user_interactions: 
    args:
        twitter_usernames - A list of usernames for the twitter username. (could use id)
    
    returns: 
        interactions_count - list of tuples in the format ("username", count)
""";

In [176]:
from sklearn.feature_extraction.text import CountVectorizer
import tweepy
import networkx as nx
import matplotlib.pyplot as plt
from decouple import config
import re
import pandas as pd
from collections import Counter
import csv
from pandas.io.json import json_normalize

## Initialize Tweepy

In [177]:
TWITTER_AUTH = tweepy.OAuthHandler(config('TWITTER_CONSUMER_KEY'),config('TWITTER_CONSUMER_SECRET'))

TWITTER_AUTH.set_access_token(config('TWITTER_ACCESS_TOKEN'),config('TWITTER_ACCESS_TOKEN_SECRET'))

TWITTER = tweepy.API(TWITTER_AUTH)

## Determine current rate limit stats

In [180]:
# Check the limit before running the function, then check again after and compare. 
start_api_check = TWITTER.rate_limit_status()
limits_alpha = json_normalize(json_str)

## This function performs search of user's interactions. 

In [185]:
def get_first_user_interactions(search):
    twitter_user = TWITTER.get_user(search)
    tweets = twitter_user.timeline(
                count=200, # Tweepy limit. 
                exclude_replies=False,
                include_rts=True,
                tweet_mode='extended')
    b = [ i.full_text for i in tweets ]
    b = " ".join(b)
    b = b.lower()
    b = b.replace(search, "")
    interactions = re.findall(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9-_]+)',b)
    interactions_count = Counter(interactions).most_common(10)
    return interactions_count


In [186]:
def get_first_user_connections(search):
    twitter_user = TWITTER.get_user(search)
    tweets = twitter_user.timeline(
                count=1000,
                exclude_replies=False,
                include_rts=True,
                tweet_mode='extended')
    b = [ i.full_text for i in tweets ]
    b = " ".join(b)
    b = b.lower()
    b = b.replace(search, "")
    interactions = re.findall(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9-_]+)',b)
    interactions = " ".join(interactions)
    interactions_list.append(interactions)
    index.append(search)
    return interactions

### Define a User

In [200]:
user = "bwinterrose"

In [203]:
%time
test_peeps = get_first_user_interactions(user)
test_peeps

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 11.9 µs


[('lambdaschool', 34),
 ('austen', 22),
 ('kaggle', 15),
 ('superhuman', 9),
 ('tommycollison', 8),
 ('paulg', 5),
 ('mwseibel', 5),
 ('arachnocapital2', 4),
 ('rasbt', 4),
 ('jason', 4)]

### Fetch user's interactions and user's interactions' interactions. 

In [205]:
def fetch_interactions(username):
    a = get_first_user_interactions(username)
    an_interactions_list = []
    an_index = []
    get_first_user_connections(username)
    for i in range(len(a)):
        get_first_user_connections(a[i][0])
    return (an_interactions_list, an_index)

In [190]:
from sklearn.feature_extraction.text import CountVectorizer
corpus = interactions_list
vectorizer = CountVectorizer(min_df=2)
X = vectorizer.fit_transform(corpus)

ValueError: empty vocabulary; perhaps the documents only contain stop words

In [None]:
import pandas as pd
display_df = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names(), index=index)

In [None]:
display_df

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
DG=nx.MultiGraph()

#loop through index, then the columns to find non-zero connections.
for i in display_df.index.values:
    for j in display_df.columns:
        if display_df[j].loc[i] >0 :
            DG.add_edge(j,i,display_df[j].loc[i] )
        else:
            pass
        
fig = plt.figure(figsize=(8,8))
nx.draw_kamada_kawai(DG,node_size=20)
plt.show()

## Examine API Impact of changes

In [126]:
end_api_check = TWITTER.rate_limit_status()
limits_beta = json_normalize(end_api_check).T
limits_beta.rename(columns = {0:'beta',}, inplace = True)

In [167]:
# Compare the change of ALL API ENDPOINTS between limits, alpha (before run), and beta (after run.)
limits_delta = limits_alpha.T.copy()
limits_delta['beta'] = limits_beta['beta']
limits_delta = limits_delta.reset_index(drop=False)
limits_delta.rename(columns = {0:'alpha', 'index':'api_endpoint'}, inplace = True)
limits_delta = limits_delta[['api_endpoint','alpha', 'beta']].assign(delta=limits_delta.alpha != limits_delta.beta)
limits_delta['type'] = limits_delta.api_endpoint.str.split(pat = '.', n = 1, expand = True)[0]
limits_delta['sub_type'] = limits_delta.api_endpoint.str.split(pat = '.', n = 2, expand = True)[1]
limits_delta['api_path'] = limits_delta.api_endpoint.str.split(pat = '.', n = 2, expand = True)[2].str.rsplit(pat = '.', n = 1, expand = True)[0]
limits_delta['method'] = limits_delta.api_path.str.rsplit(pat = '/', n = 1, expand = True)[1]
limits_delta['stat'] = limits_delta.api_endpoint.str.rsplit(pat = '.', n = 1, expand = True)[1]
limits_delta = limits_delta[['type', 'sub_type', 'api_path', 'method', 'stat', 'alpha', 'beta', 'delta']]

# Display filtered df. 
limits_delta[(limits_delta['stat'].str.contains("reset") == False) & (limits_delta['delta']==True)]



Unnamed: 0,type,sub_type,api_path,method,stat,alpha,beta,delta
50,resources,application,/application/rate_limit_status,rate_limit_status,remaining,163,179,True
