In [1]:
# Collect relevant content through the Reddit API.
import json
import praw
# PRAW documentation:
#  https://praw.readthedocs.io/en/stable/code_overview/reddit_instance.html

In [2]:
# IMPORTANT: enter proper access credential in the config-file;
# follow instructions in reddit_credentials_verify.ipynb
import config_reddit

In [3]:
# establish an API connection and verify read-only access
reddit = praw.Reddit(user_agent=f"Exploration script by /u/{config_reddit.user_name}",
                     client_id=config_reddit.app_id,
                     client_secret=config_reddit.app_secret)
reddit.read_only

True

In [4]:
# choose a subreddit of interest
# MODIFY this to what you prefer to analyze
#
# Example (take the string from the ending-part of the subreddit URL):
#  https://www.reddit.com/r/ebikes/
query_subreddit = 'ebikes'

In [5]:
# decide how many top-"hot" posts to query
nposts = 50

In [6]:
# collect ids of the top posts within the chosen subreddit
post_ids = []
subreddit = reddit.subreddit(query_subreddit)
for p in subreddit.hot(limit = nposts):
    post_ids.append(p.id)
# check how many posts (submissions) were collected
len(post_ids)

50

In [7]:
# example post details
post_details = reddit.submission(id = post_ids[1])
print(post_details.title)
print(post_details.selftext)

Bike friendly neighborhoods start at the local level. You can make a difference.
1. Transportation infrastructure policy is largely done at the local level. 
2. A shockingly small number of people are actually involved in making this policy. 
3. Individual Advocates can have a huge impact at the city level. 

Bottom line: Anyone can make a difference! 

Even if you can't afford to catch a local politicians ear via donation... or if you don't have the free time to show up at a city hall meeting... you can still be an educator/advocate on social media. 

**Education**

Youtube:

* [Why City Design is Important \(and Why I Hate Houston\) by Not Just Bikes](https://www.youtube.com/watch?v=uxykI30fS54)

* [The Ugly, Dangerous, and Inefficient Stroads found all over the US & Canada by Not Just Bikes](https://www.youtube.com/watch?v=ORzNZUeUHAM)

* [Bike lanes are not enough by City Beautiful](https://www.youtube.com/watch?v=p36skNda3KE)



Tiktok:

* [Phil Sustainability & Enviro](https://ww

In [8]:
# decide how many top comments to query per post;
# NOTE: larger number of comments may dilute the content (irrelevant text)
ncomments = 10

In [9]:
# function to collect post data
def collect_post_data(post_id, ncomments, reddit):
    psubm = reddit.submission(id = post_id)
    pdata = {'id': post_id, 'title': psubm.title, 'text': psubm.selftext}
    
    # collect first- and second-level comments
    pcomm = []
    psubcomm = []
    psubm.comments.replace_more(limit = ncomments)
    for top_comment in psubm.comments:
        pcomm.append(top_comment.body)
        for lev2_comment in top_comment.replies:
            psubcomm.append(lev2_comment.body)
    
    # assemble the data together
    pdata['comments_lev1'] = pcomm
    pdata['comments_lev2'] = psubcomm
    
    return pdata

In [10]:
# collect information for each post
posts_all = [collect_post_data(pid, ncomments, reddit) for pid in post_ids]

In [11]:
# save collected data to json file
file_out = f"raw_post_comment_data.json"
with open(file_out, mode='w') as f:
    f.write(json.dumps(posts_all, indent=2))