In [1]:
import pandas as pd
import numpy as np 
import requests
import time
import csv

**Reference materials:**

Reddit API: (60 calls per minute)

https://www.reddit.com/dev/api/#GET_r_{subreddit}_about

Twitter API: 

https://towardsdatascience.com/getting-started-with-data-collection-using-twitter-api-v2-in-less-than-an-hour-600fbd5b5558

https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent

https://developer.twitter.com/en/docs/twitter-api/tweets/counts/api-reference/get-tweets-counts-recent

In [3]:
def get_request(url, parameters=None, headers=None, print_url=True):
    try:
        response = requests.get(url=url, params=parameters, headers=headers)
    except KeyboardInterrupt:
        raise KeyboardInterrupt
    except:
        print("Error.")
        return
    
    if response:
        if print_url:
            print(response.url)
            
        # requests might return an empty string instead of a json object
        if response.text == '':
            return {}
        else:
            return response.json()
    else:
        # Recursively wait and try again
        print('No response, waiting 10 seconds...')
        time.sleep(10)
        print('Retrying.')
        return get_request(url, parameters)

## Reddit API

In [4]:
%%time

name = "dota2"
url = f"https://www.reddit.com/r/{name}/about.json"
json_data = get_request(url, headers={'User-agent': 'your bot 0.1'})
subscriber_count = json_data['data']['subscribers']
subscriber_count

https://www.reddit.com/r/dota2/about.json
Wall time: 393 ms


1172848

## Twitter API

In [5]:
def connect_to_twitter():
    bearer_token = "AAAAAAAAAAAAAAAAAAAAAPdDaQEAAAAAKbfzzoZEN4u5GIAp5lp0M4fUCJw%3DjpnxM7BwwK1BqDlQDtS5avgn9hZ5HNJkcAaLCBFIg8T9keVXDM"
    return {"Authorization": "Bearer {}".format(bearer_token)}

headers = connect_to_twitter()

In [6]:
# Query 7 days of data

# Query up to 100 tweets with a keyword
def make_search_request(headers, keyword, max_results):
    url = "https://api.twitter.com/2/tweets/search/recent"
    query_params = {'query': keyword,
                    'max_results': max_results}
    response = requests.get(url, params=query_params, headers=headers)
    return response.json()


# Query count of tweets with a keyword 
def make_count_request(headers, keyword):
    url = "https://api.twitter.com/2/tweets/counts/recent"
    query_params = {'query': keyword, 
                   'granularity': "day"} # default is hour
    response = requests.get(url, params=query_params, headers=headers)
    return response.json()

In [7]:
tweets = make_search_request(headers, "dota2", 50)
tweets['data'][0]

{'edit_history_tweet_ids': ['1635324755359432704'],
 'id': '1635324755359432704',
 'text': 'The new #ESL Pro Tour for #Dota2 has been revealed! \nhttps://t.co/aracVjWGIY'}

In [8]:
tweet_count = make_count_request(headers, "dota2")
tweet_count_total = sum(x['tweet_count'] for x in tweet_count['data'])
tweet_count_total

15608