In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import time
from config import SECRET_TOKEN, PUBLIC_TOKEN, USERNAME, PASSWORD
from datetime import datetime

In [2]:
# authenticator to request from API using our 'personal use script' (PUBLIC_TOKEN)
# 'secret' (SECRET_TOKEN) from reddit.com/prefs/apps
auth = requests.auth.HTTPBasicAuth(PUBLIC_TOKEN, SECRET_TOKEN)

In [3]:
# set up login method (password), username and password
data = {'grant_type': 'password',
        'username': USERNAME,
        'password': PASSWORD
       }

In [4]:
# setup our header info, which gives reddit a brief description of our app
headers = {'User-Agent': 'MyBot/0.0.1'}

# send our request for an OAuth token
res = requests.post('https://www.reddit.com/api/v1/access_token',
                    auth=auth, data=data, headers=headers)

# convert response to JSON and pull access_token value
TOKEN = res.json()['access_token']

# add authorization to our headers dictionary
headers['Authorization'] = f'bearer {TOKEN}'

In [5]:
# headers

In [6]:
# while the token is valid (~2 hours) we just add headers=headers to our requests
# Response [200] means successful
requests.get('https://oauth.reddit.com/api/v1/me', headers=headers)

<Response [200]>

In [7]:
# use .json() to get json file of personal profile
requests.get('https://oauth.reddit.com/api/v1/me', headers=headers).json()

{'is_employee': False,
 'seen_layout_switch': False,
 'has_visited_new_profile': False,
 'pref_no_profanity': True,
 'has_external_account': False,
 'pref_geopopular': '',
 'seen_redesign_modal': False,
 'pref_show_trending': True,
 'subreddit': {'default_set': True,
  'user_is_contributor': False,
  'banner_img': '',
  'restrict_posting': True,
  'user_is_banned': False,
  'free_form_reports': True,
  'community_icon': None,
  'show_media': True,
  'icon_color': '',
  'user_is_muted': False,
  'display_name': 'u_Property_Manager7',
  'header_img': None,
  'title': '',
  'coins': 0,
  'previous_names': [],
  'over_18': False,
  'icon_size': [256, 256],
  'primary_color': '',
  'icon_img': 'https://styles.redditmedia.com/t5_2on3do/styles/profileIcon_snoo66fd8b7d-94c5-437c-a53b-63842f7a7554-headshot.png?width=256&amp;height=256&amp;crop=256:256,smart&amp;s=21c4f4969e1ee7637b9dca83eb99279395278a55',
  'description': '',
  'submit_link_label': '',
  'header_size': None,
  'restrict_comment

Send our request to get subredit using:  oauth.reddit.com/r/(subreddit)/(endpoint)  
```Python
res = reddit.get('https://oauth.reddit.com/r/subreddit/(endpoint))
                

In [8]:
# use the endpoint /hot to get the hottest posts on your subreddit
wsb_res = requests.get('https://oauth.reddit.com/r/wallstreetbets/hot',
                    headers=headers)

In [9]:
wsb_res.json()

{'kind': 'Listing',
 'data': {'after': 't3_quizwg',
  'dist': 27,
  'modhash': None,
  'geo_filter': None,
  'children': [{'kind': 't3',
    'data': {'approved_at_utc': None,
     'subreddit': 'wallstreetbets',
     'selftext': 'What are your moves tomorrow? Please keep the shitposting at a slow boil. \n\n^Navigate ^WSB|^We ^recommend ^best ^daily ^DD\n:--|:--\n**Discussion** | [All](https://reddit.com/r/wallstreetbets/search?sort=new&amp;restrict_sr=on&amp;q=flair%3ADiscussion) / [**Best Daily**](https://www.reddit.com/r/wallstreetbets/search?sort=top&amp;q=flair%3ADiscussion&amp;restrict_sr=on&amp;t=day) / [Best Weekly](https://www.reddit.com/r/wallstreetbets/search?sort=top&amp;q=flair%3ADiscussion&amp;restrict_sr=on&amp;t=week)\n**DD** | [All](https://reddit.com/r/wallstreetbets/search?sort=new&amp;restrict_sr=on&amp;q=flair%3ADD) / [**Best Daily**](https://www.reddit.com/r/wallstreetbets/search?sort=top&amp;q=flair%3ADD&amp;restrict_sr=on&amp;t=day) / [Best Weekly](https://www.red

In [10]:
# use for loop to get each post in the reddit thread
for post in wsb_res.json()['data']['children']:
    print(post)

{'kind': 't3', 'data': {'approved_at_utc': None, 'subreddit': 'wallstreetbets', 'selftext': 'What are your moves tomorrow? Please keep the shitposting at a slow boil. \n\n^Navigate ^WSB|^We ^recommend ^best ^daily ^DD\n:--|:--\n**Discussion** | [All](https://reddit.com/r/wallstreetbets/search?sort=new&amp;restrict_sr=on&amp;q=flair%3ADiscussion) / [**Best Daily**](https://www.reddit.com/r/wallstreetbets/search?sort=top&amp;q=flair%3ADiscussion&amp;restrict_sr=on&amp;t=day) / [Best Weekly](https://www.reddit.com/r/wallstreetbets/search?sort=top&amp;q=flair%3ADiscussion&amp;restrict_sr=on&amp;t=week)\n**DD** | [All](https://reddit.com/r/wallstreetbets/search?sort=new&amp;restrict_sr=on&amp;q=flair%3ADD) / [**Best Daily**](https://www.reddit.com/r/wallstreetbets/search?sort=top&amp;q=flair%3ADD&amp;restrict_sr=on&amp;t=day) / [Best Weekly](https://www.reddit.com/r/wallstreetbets/search?sort=top&amp;q=flair%3ADD&amp;restrict_sr=on&amp;t=week)\n**YOLO** | [All](https://reddit.com/r/wallstre

In [11]:
# we can extract different data from our post
for post in wsb_res.json()['data']['children']:
    print(post['data']['title'])

What Are Your Moves Tomorrow, November 16, 2021
Most Anticipated Earnings Releases for the trading week beginning November 15th, 2021
Just sign out Elon, pls
FYI Container costs just jumped from $2500 to $25000. We just canceled all orders for the rest of the year so we don’t lost money. Shitz about to get wild out there. Good luck
Alibaba, limp dick pattern bout to erect!!
Waiting for the Evergrande Collapse.
Tilray To the Moon DD inside.
My depressing investment story. Today I took most of what I had left out and a 90 day ban, I won’t be back for awhile. Gg market.
Down with the ship
Bears v. Bulls Covid-19 Timeline
50k YOLO puts on TSLA. Call me Mini-Mike (Burry)
Republican Lawmakers File Bill To Tax And Regulate Marijuana As Alternative To Democratic Proposals.
AMC…This week is THE week…
Al i doing it right?
Get your money up not your funny up👹
PYPL - Long YOLO
Largest U.S. Pension Bought Palantir, Snowflake, and Berkshire Hathaway Stock.
Daily Discussion Thread for November 15, 20

In [12]:
# initialize a data frame
df_hot = pd.DataFrame()
for post in wsb_res.json()['data']['children']:
    df_hot = df_hot.append({
        'subreddit' : post['data']['subreddit'],
        'title' : post['data']['title'],
        'selftext' : post['data']['selftext'],
        'upvote_ratio' : post['data']['upvote_ratio'],
        'upvotes' : post['data']['ups'],
        'downvotes' : post['data']['downs'],
        'score' : post['data']['score']
    }, ignore_index = True)

In [13]:
# this will show you all of the keys you can use when accessing the JSON
post['data'].keys()

dict_keys(['approved_at_utc', 'subreddit', 'selftext', 'author_fullname', 'saved', 'mod_reason_title', 'gilded', 'clicked', 'title', 'link_flair_richtext', 'subreddit_name_prefixed', 'hidden', 'pwls', 'link_flair_css_class', 'downs', 'thumbnail_height', 'top_awarded_type', 'hide_score', 'name', 'quarantine', 'link_flair_text_color', 'upvote_ratio', 'author_flair_background_color', 'ups', 'total_awards_received', 'media_embed', 'thumbnail_width', 'author_flair_template_id', 'is_original_content', 'user_reports', 'secure_media', 'is_reddit_media_domain', 'is_meta', 'category', 'secure_media_embed', 'link_flair_text', 'can_mod_post', 'score', 'approved_by', 'is_created_from_ads_ui', 'author_premium', 'thumbnail', 'edited', 'author_flair_css_class', 'author_flair_richtext', 'gildings', 'post_hint', 'content_categories', 'is_self', 'subreddit_type', 'created', 'link_flair_type', 'wls', 'removed_by_category', 'banned_by', 'author_flair_type', 'domain', 'allow_live_comments', 'selftext_html',

In [14]:
df_hot

Unnamed: 0,downvotes,score,selftext,subreddit,title,upvote_ratio,upvotes
0,0.0,67.0,What are your moves tomorrow? Please keep the ...,wallstreetbets,"What Are Your Moves Tomorrow, November 16, 2021",0.94,67.0
1,0.0,951.0,,wallstreetbets,Most Anticipated Earnings Releases for the tra...,0.97,951.0
2,0.0,12877.0,,wallstreetbets,"Just sign out Elon, pls",0.94,12877.0
3,0.0,3323.0,,wallstreetbets,FYI Container costs just jumped from $2500 to ...,0.9,3323.0
4,0.0,640.0,,wallstreetbets,"Alibaba, limp dick pattern bout to erect!!",0.95,640.0
5,0.0,18321.0,,wallstreetbets,Waiting for the Evergrande Collapse.,0.95,18321.0
6,0.0,580.0,Republican Nancy Mace is presenting her Mariju...,wallstreetbets,Tilray To the Moon DD inside.,0.91,580.0
7,0.0,417.0,,wallstreetbets,My depressing investment story. Today I took m...,0.93,417.0
8,0.0,1995.0,,wallstreetbets,Down with the ship,0.96,1995.0
9,0.0,184.0,,wallstreetbets,Bears v. Bulls Covid-19 Timeline,0.97,184.0


In [15]:
# use the endpoint /new to get the most recent posts on your subreddit
wsb_new_res = requests.get('https://oauth.reddit.com/r/wallstreetbets/new',
                    headers=headers, params = {'limit' : '100'})

df_new = pd.DataFrame()
for post in wsb_new_res.json()['data']['children']:
    df_new = df_new.append({
        'subreddit' : post['data']['subreddit'],
        'title' : post['data']['title'],
        'selftext' : post['data']['selftext'],
        'upvote_ratio' : post['data']['upvote_ratio'],
        'upvotes' : post['data']['ups'],
        'downvotes' : post['data']['downs'],
        'score' : post['data']['score']
    }, ignore_index = True)

In [16]:
df_new

Unnamed: 0,downvotes,score,selftext,subreddit,title,upvote_ratio,upvotes
0,0.0,1.0,,wallstreetbets,Rocket Lab to Acquire Space Hardware Company P...,1.00,1.0
1,0.0,25.0,,wallstreetbets,How much is enough?!?,0.93,25.0
2,0.0,2.0,,wallstreetbets,Because why shouldn't we buy the Green Bay Pac...,0.67,2.0
3,0.0,8.0,,wallstreetbets,MARA FD POOTS - 100K GAIN PORN,0.90,8.0
4,0.0,11.0,,wallstreetbets,WSB discussion today,0.79,11.0
...,...,...,...,...,...,...,...
95,0.0,3336.0,,wallstreetbets,Evergrande: A Short Story,0.97,3336.0
96,0.0,50716.0,,wallstreetbets,Blame Bernie if Elon sells more Tesla stock lol,0.65,50716.0
97,0.0,1932.0,,wallstreetbets,I did some TA on the 30min $SPY chart. Things ...,0.96,1932.0
98,0.0,314.0,,wallstreetbets,Cryptominers now gobbling up AMD CPUs because ...,0.93,314.0


In [17]:
# create an id for the post using kind and before and after 
# t3 = threads
post['kind']

't3'

In [18]:
# using id we get a specific id
post['data']['id']

'qtrokr'

In [19]:
# combine the id and the kind of post using an underscore (this becomes a unique id)
post['kind'] + '_' + post['data']['id']

't3_qtrokr'

In [20]:
# we can loop back in time using a unique id and before and after 
# this will look at posts after the given unique id
wsb_new_res = requests.get('https://oauth.reddit.com/r/wallstreetbets/new',
                    headers=headers, params = {'limit' : '100','after':'t3_qthuap'})
# loop through and append to exsisting df
for post in wsb_new_res.json()['data']['children']:
    df_new = df_new.append({
        'subreddit' : post['data']['subreddit'],
        'title' : post['data']['title'],
        'selftext' : post['data']['selftext'],
        'upvote_ratio' : post['data']['upvote_ratio'],
        'upvotes' : post['data']['ups'],
        'downvotes' : post['data']['downs'],
        'score' : post['data']['score']
    }, ignore_index = True)

In [21]:
df_new

Unnamed: 0,downvotes,score,selftext,subreddit,title,upvote_ratio,upvotes
0,0.0,1.0,,wallstreetbets,Rocket Lab to Acquire Space Hardware Company P...,1.00,1.0
1,0.0,25.0,,wallstreetbets,How much is enough?!?,0.93,25.0
2,0.0,2.0,,wallstreetbets,Because why shouldn't we buy the Green Bay Pac...,0.67,2.0
3,0.0,8.0,,wallstreetbets,MARA FD POOTS - 100K GAIN PORN,0.90,8.0
4,0.0,11.0,,wallstreetbets,WSB discussion today,0.79,11.0
...,...,...,...,...,...,...,...
195,0.0,10.0,,wallstreetbets,0K gain with WDC,0.73,10.0
196,0.0,182.0,,wallstreetbets,Lucid Air has become a finalist of the Car of ...,0.79,182.0
197,0.0,1071.0,,wallstreetbets,HAPPY FRIDAY RETARDS. 102k gains in a single d...,0.94,1071.0
198,0.0,64.0,,wallstreetbets,Yolo on baba calls before earnings next week😬,0.87,64.0


In [22]:
# we use this function to convert responses to dataframes
def df_from_response(res):
    # initialize temp dataframe for batch of data in response
    df = pd.DataFrame()

    # loop through each post pulled from res and append to df
    for post in res.json()['data']['children']:
        df = df.append({
            'subreddit': post['data']['subreddit'],
            'title': post['data']['title'],
            'selftext': post['data']['selftext'],
            'upvote_ratio': post['data']['upvote_ratio'],
            'ups': post['data']['ups'],
            'downs': post['data']['downs'],
            'score': post['data']['score'],
            'link_flair_css_class': post['data']['link_flair_css_class'],
            'created_utc': datetime.fromtimestamp(post['data']['created_utc']).strftime('%Y-%m-%dT%H:%M:%SZ'),
            'id': post['data']['id'],
            'kind': post['kind']
        }, ignore_index=True)

    return df

In [23]:
# initialize dataframe and parameters for pulling data in loop
data = pd.DataFrame()
params = {'limit': 100}

# loop through 10 times (returning 1K posts)
for i in range(3):
    # make request
    res = requests.get("https://oauth.reddit.com/r/wallstreetbets/new",
                       headers=headers,
                       params=params)

    # get dataframe from response
    new_df = df_from_response(res)
    # take the final row (oldest entry)
    row = new_df.iloc[len(new_df)-1]
    # create fullname
    fullname = row['kind'] + '_' + row['id']
    # add/update fullname in params
    params['after'] = fullname
    
    # append new_df to data
    data = data.append(new_df, ignore_index=True)

In [24]:
data

Unnamed: 0,created_utc,downs,id,kind,link_flair_css_class,score,selftext,subreddit,title,ups,upvote_ratio
0,2021-11-15T13:59:22Z,0.0,qurm0b,t3,news,1.0,,wallstreetbets,Rocket Lab to Acquire Space Hardware Company P...,1.0,1.00
1,2021-11-15T13:57:04Z,0.0,qurk7o,t3,meme,24.0,,wallstreetbets,How much is enough?!?,24.0,0.90
2,2021-11-15T13:51:15Z,0.0,qurfhu,t3,news,2.0,,wallstreetbets,Because why shouldn't we buy the Green Bay Pac...,2.0,0.67
3,2021-11-15T13:47:22Z,0.0,qurcdk,t3,profit,8.0,,wallstreetbets,MARA FD POOTS - 100K GAIN PORN,8.0,0.90
4,2021-11-15T13:33:51Z,0.0,qur1ji,t3,news,11.0,,wallstreetbets,WSB discussion today,11.0,0.79
...,...,...,...,...,...,...,...,...,...,...,...
295,2021-11-11T10:06:40Z,0.0,qrr4zw,t3,yolo,5.0,,wallstreetbets,Bought more on the morning dip. GEVO Yolo,5.0,0.61
296,2021-11-11T10:04:31Z,0.0,qrr39d,t3,yolo,26.0,,wallstreetbets,Keeping my uranium play simple,26.0,0.81
297,2021-11-11T10:03:47Z,0.0,qrr2nr,t3,meme,851.0,,wallstreetbets,We never learn do we,851.0,0.98
298,2021-11-11T10:02:59Z,0.0,qrr1yq,t3,question,730.0,It looks not only Musk is selling. Execs Denho...,wallstreetbets,Billions worth of TSLA dumped by Execs,730.0,0.90
