In [98]:
# ### Mount Drive

# from google.colab import drive
# drive.mount('/content/drive/')

In [99]:
### Imports

import os, requests, json
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import datetime
import seaborn as sns
from sklearn import preprocessing
import sys

In [100]:
bearer_token = "AAAAAAAAAAAAAAAAAAAAAGXdTwEAAAAAr2%2BC9Wi6GHR8%2Bk%2FiDL2AIHaC1I8%3D86fg9nIXAt2MFp0QP1sXU0q1VFKHAGaD1da68qG4X0glvGSh4D"

def response_health(r):
  if r.status_code != 200:
    raise Exception(
    "Request returned an error: {} {}".format(
      r.status_code, r.text
    )
  )
    
def bearer_oauth(r):
  r.headers["Authorization"] = f"Bearer {bearer_token}"
  return r

def send_request(url, params=None):
  '''Send Request (url) with optional params. Returns json'''
  # https://2.python-requests.org/en/master/api/#requests.request
  if params == None:
    response = requests.request("GET", url, auth=bearer_oauth)
  else:
    response = requests.request("GET", url, auth=bearer_oauth, params=params)
  print("Request response status: ", response.status_code)
  response_health(response)
  return response.json()

In [101]:
def get_user_data(name):
  # data dictionary scroll down to response fields https://developer.twitter.com/en/docs/twitter-api/users/lookup/api-reference/get-users-by-username-username

  userFields = {"user.fields":"created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld".replace(" ", "")}
  user_json = send_request(f"https://api.twitter.com/2/users/by/username/{name}",params=userFields)
  user_json = user_json["data"]

  outputDict = {}
  outputDict['following_count'] = user_json['public_metrics']['following_count']
  outputDict['tweet_count'] = user_json['public_metrics']['tweet_count']
  outputDict['followers_count'] = user_json['public_metrics']['followers_count']
  outputDict['listed_count'] = user_json['public_metrics']['listed_count']
  outputDict['username'] = user_json['username']
  outputDict['name'] = user_json['name']
  outputDict['id'] = user_json['id']
  outputDict['verified'] = user_json['verified']
  outputDict['protected'] = user_json['protected']
  outputDict['created_at'] = user_json['created_at']
  outputDict['description'] = user_json['description']

  try:
    test = user_json['pinned_tweet_id']
    outputDict['hasPinnedTweet'] = True
  except:
    outputDict['hasPinnedTweet'] = False
    pass
  try:
    outputDict['urlsInDescription'] = len(user_json['entities']['description']['urls'])
  except:
    outputDict['urlsInDescription'] = 0
    pass
  try:
    outputDict['hashtagsInDescription'] = len(user_json['entities']['description']['hashtags'])
  except:
    outputDict['hashtagsInDescription'] = 0
    pass

  try:
    outputDict['userWebsitesAdded'] = len(user_json['entities']['url']['urls'])
  except:
    outputDict['userWebsitesAdded'] = 0
    pass

  try:
    outputDict['cashtagsInDescription'] = len(user_json['entities']['description']['cashtags'])
  except:
    outputDict['cashtagsInDescription'] = 0
    pass

  try:
    outputDict['mentionsInDescription'] = len(user_json['entities']['description']['mentions'])
  except:
    outputDict['mentionsInDescription'] = 0
    pass

  
  return outputDict

In [102]:
def get_tweets_user(id, numTweets = 10, tweetsPerPage = 10):
    import math
    
    # (there are 10 results returned per page by default)
    if numTweets < tweetsPerPage:
        print("numTweets must be greater than or equal to the number of tweets per page.")
        return

    # to see data dictionary, click url and scroll down to response fields https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-tweets
    expansions = {"expansions":"author_id, attachments.poll_ids, attachments.media_keys, entities.mentions.username, geo.place_id, in_reply_to_user_id, referenced_tweets.id,referenced_tweets.id.author_id".replace(" ", "")}
    tweetFields = {"tweet.fields":"attachments, author_id, context_annotations, conversation_id, created_at, entities, geo, id, in_reply_to_user_id, lang, public_metrics, possibly_sensitive, referenced_tweets, reply_settings, source, text, withheld".replace(" ", "")}
    
    outputDict = {'id':[], 'text':[], 'lang':[],'possibly_sensitive':[],'retweet_count':[],'reply_count':[],'like_count':[],'quote_count':[]
    ,'reply_settings':[],'source':[],'created_at':[],'is_retweet':[],'contains_quote':[],'is_reply':[],'num_referenced_tweets':[],
    'url_image':[],'num_hashtags':[],'text_first_hashtag':[],'num_mentions':[],'num_cashtags':[],'num_polls':[]}

    # for each page of results
    for i in range(math.ceil(numTweets/tweetsPerPage)): 
        if i != 0:
            params = {**{'max_results':tweetsPerPage}, **expansions,**tweetFields,**{"pagination_token":nextToken}}
        else:
            params = {**{'max_results':tweetsPerPage}, **expansions,**tweetFields}

        tweet_json = (send_request(f"https://api.twitter.com/2/users/{id}/tweets", params=params))

        tweetData = tweet_json['data']

        for tweet in tweetData:
            outputDict['id'].append(tweet['id'])
            outputDict['text'].append(tweet['text'])
            outputDict['lang'].append(tweet['lang'])
            outputDict['possibly_sensitive'].append(tweet['possibly_sensitive'])
            outputDict['retweet_count'].append(tweet['public_metrics']['retweet_count'])
            outputDict['reply_count'].append(tweet['public_metrics']['reply_count'])
            outputDict['like_count'].append(tweet['public_metrics']['like_count'])
            outputDict['quote_count'].append(tweet['public_metrics']['quote_count'])
            outputDict['reply_settings'].append(tweet['reply_settings'])
            outputDict['source'].append(tweet['source'])
            outputDict['created_at'].append(tweet['created_at'])

            # referenced tweets: quotes, replies, and retweets
            try:
                refdTweets = tweet['referenced_tweets']
                outputDict['num_referenced_tweets'].append(len(refdTweets))

                rtweet = False
                reply = False
                quote = False
                # there may be multiple referenced tweets, apparently. So it could be a reply and contain a quote, I guess
                for t in refdTweets:
                    typ = t['type']
                    if typ == 'retweeted':
                        outputDict['is_retweet'].append(True)
                        rtweet = True
                    elif typ == 'quoted':
                        outputDict['contains_quote'].append(True)
                        quote = True
                    elif typ == 'replied_to':
                        outputDict['is_reply'].append(True)
                        reply = True
                        
                if not rtweet:
                        outputDict['is_retweet'].append(False)
                if not reply:
                        outputDict['is_reply'].append(False)
                if not quote:
                        outputDict['contains_quote'].append(False)

            except:
                outputDict['num_referenced_tweets'].append(0)
                outputDict['is_retweet'].append(False)
                outputDict['contains_quote'].append(False)
                outputDict['is_reply'].append(False)
                pass


            # image
            try:
                outputDict['url_image'].append(tweet['entities']['urls'][0]['images'][0]['url'])  #just grabbing the first image in the first url
            except:
                outputDict['url_image'].append("")  
                pass

            # hashtags
            try:
                outputDict['num_hashtags'].append(len(tweet['entities']['hashtags']))
                # grabbing just the first hashtag
                outputDict['text_first_hashtag'].append(tweet['entities']['hashtags'][0]['tag'])
            except:
                outputDict['num_hashtags'].append(0) 
                outputDict['text_first_hashtag'].append("")   
                pass

            # mentions
            try:
                outputDict['num_mentions'].append(len(tweet['entities']['mentions']))
            except:
                outputDict['num_mentions'].append(0) 
                pass

            # cashtags
            try:
                outputDict['num_cashtags'].append(len(tweet['entities']['cashtags']))
            except:
                outputDict['num_cashtags'].append(0) 
                pass
            
            # polls
            try:
                outputDict['num_polls'].append(len(tweet['attachments']['poll_ids']))
            except:
                outputDict['num_polls'].append(0) 
                pass


        nextToken = tweet_json['meta']['next_token']

    df = pd.DataFrame(outputDict)
    
    return df

In [103]:
def get_api_data(usernames):
    outputDict = {}
    usersDict =  {'following_count':[], 'tweet_count':[], 'followers_count':[], 'listed_count':[], 'username':[], 'name':[], 'id':[], 
    'verified': [], 'protected': [],'created_at': [],'description': [], 'hasPinnedTweet':[], 'urlsInDescription':[], 'hashtagsInDescription':[],
    'userWebsitesAdded':[], 'cashtagsInDescription':[],'mentionsInDescription':[]}

    for username in usernames:
        # get data related to user account
        userData = get_user_data(username)
        for k, v in userData.items():
            usersDict[k].append(v)

        # get data from tweets of the user
        outputDict[username] = get_tweets_user(userData['id'])

    outputDict['userData'] = pd.DataFrame(usersDict)

    return outputDict

apiData = get_api_data(['McDonalds', 'Wendys'])

Request response status:  200
Request response status:  200
Request response status:  200
Request response status:  200


In [104]:
apiData

{'McDonalds':                     id                                               text  \
 0  1470619824933986310  @spderii What went wrong? Let us know more: ht...   
 1  1470602325580521473  @ShibaInuGold2 We appreciate hearing your inte...   
 2  1470602287630495750  @JesseDiane__ Certainly not the experience we ...   
 3  1470602157372153859  @marayha7 Marayha, what went wrong? Let us kno...   
 4  1470602048081207297  @SydMarketsStuff We'd like to talk more about ...   
 5  1470601446630600705  @notgreatatmath Waiting is never fun, Dal! Sha...   
 6  1470601363050713088  @anjunabeachwave That's a bummer, Bren. Tell u...   
 7  1470600198305030145  @EliteGamingYTx That's not typical of us! Let'...   
 8  1470600122719428609  @xtrpro Order correctness is extremely importa...   
 9  1470600021779308546  @craxy4rubi Why do you say that, Rubi? Care to...   
 
   lang  possibly_sensitive  retweet_count  reply_count  like_count  \
 0   en               False              0            0 