In [1]:
# general imports
import time
import os

# redefine system path to import outside packages...
import sys
sys.path.append('../')

# import internal tools
from src import enrichment
from src import fetch
from src import tools

In [2]:
# parameters for our search...
SUB  = 'uva'         # subreddit to scrape from
TIME = 'week'        # time range
SORT = 'top'         # category by which to sort by
N_POSTS    = 1       # number of posts
N_COMMENTS = 100     # hard limit on comments per post
DEPTH      = 2       # comment depth (or children) limit

# make sure you have these!
assert os.environ['REDDIT_CLIENT_ID']
assert os.environ['REDDIT_CLIENT_SECRET']

#### The example below calls a the `flat_fetch` function from `../src/fetch.py`...

In [3]:
# fetch submissions
comments_flat = fetch.flat_fetch(
    sub  = SUB,
    time = TIME,
    sort = SORT,
    num_posts = N_POSTS,
    num_comments = N_COMMENTS,
    depth = DEPTH
)

fetching flat comments from uva...           
sorting by: top, scraping 1 posts, 100 comments each (depth=2)
connecting to reddit client...
connected!
serializing data...
percent complete: 100.0%
done!


#### There exists another function `forest_fetch` that, while outputting a flattened list, still retains structural information such as depth.

In [4]:
# fetch submissions
comments = fetch.forest_fetch(
    sub  = SUB,
    time = TIME,
    sort = SORT,
    num_posts = N_POSTS,
    num_comments = N_COMMENTS,
    depth = DEPTH
)

fetching comments from uva...           
sorting by: top, scraping 1 posts
hard limit of 100 comments per post...
with maximum comment tree depth of 2...
connecting to reddit client...
connected!


#### save both as jsons...

In [5]:
# save to .json
tools.save_json(comments_flat, '../data/uva-flat-example.json')
tools.save_json(comments,      '../data/uva-depth-example.json')