In [None]:
# library imports

import praw
from dotenv import load_dotenv
import os
import requests
import pandas as pd
from praw_functions import search_subreddit, get_comments

## This notebook contains code to grab data from reddit using an api query with the PRAW library

You will need to have an account set up with the api and the associated keys.

In [2]:
# this block will grab keys from your local environment 

load_dotenv()

SECRET_KEY = os.getenv("SECRET_KEY")
CLIENT_ID = os.getenv("CLIENT_ID")
PASS = os.getenv("PASSWORD")
USER = os.getenv("USER")

### Define Functions

### Get Reddit & Subreddit Instance

In [95]:
# Create read only instance of reddit∏

reddit = praw.Reddit( 
    client_id=CLIENT_ID,
    client_secret=SECRET_KEY,
    user_agent=USER
)

reddit.read_only # check instance

True

In [96]:
washingtondc = reddit.subreddit("washingtondc") # get subreddit instance

washingtondc

Subreddit(display_name='washingtondc')

### Construct Query

In [97]:
neighborhoods = [
    'Neighborhood','Adams Morgan', 'American University Park', 'Anacostia', 'Barnaby Woods', 
    'Capitol Hill', 'Columbia Heights', 'Cleveland Park', 'Dupont', 
    'Foggy Bottom', 'Friendship Heights', 'Georgetown', 'Glover Park', 
    'H Street', 'Logan Circle', 'Mount Pleasant', 'Navy Yard', 
    'NoMa', 'Petworth', 'Shaw', 'Southwest Waterfront', 'Takoma', 
    'Tenleytown', 'The Palisades', 'U Street', 'West End', 'Woodley Park']

# Safety-related keywords
safety_keywords = ["crime", "safety", "criminal", "safe"]

# Generate the query
neighborhood_query = " OR ".join(f'"{neighborhood}"' for neighborhood in neighborhoods)
safety_query = " OR ".join(safety_keywords)

query = f"({neighborhood_query}) AND ({safety_query})"

In [98]:
query

'("Neighborhood" OR "Adams Morgan" OR "American University Park" OR "Anacostia" OR "Barnaby Woods" OR "Capitol Hill" OR "Columbia Heights" OR "Cleveland Park" OR "Dupont" OR "Foggy Bottom" OR "Friendship Heights" OR "Georgetown" OR "Glover Park" OR "H Street" OR "Logan Circle" OR "Mount Pleasant" OR "Navy Yard" OR "NoMa" OR "Petworth" OR "Shaw" OR "Southwest Waterfront" OR "Takoma" OR "Tenleytown" OR "The Palisades" OR "U Street" OR "West End" OR "Woodley Park") AND (crime OR safety OR criminal OR safe)'

### Search Threads

In [99]:
threads_df = search_subreddit(washingtondc, query, 500)

threads_df

Unnamed: 0_level_0,title,text,year,month
submission_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
x2b2l6,"The Pie Shop says juveniles are "" hurling full...",,2022,08
vmsxc7,How safe is this neighborhood?,"Hey you, yeah you. Stop fucking asking this. Y...",2022,06
13woim5,"Anacostia council, Mayor Bowser hear residents...",,2023,05
1b5hyt5,'We should not let criminals take over our nei...,,2024,03
18zp614,Car taken from Georgetown with infant inside. ...,,2024,01
...,...,...,...,...
vd9zif,There have been 7 shootings in the city since ...,"Mainly in the SE and NE areas, but this is rid...",2022,06
10p64o,Fishing the Forgotten River in the Nation’s Ca...,,2012,09
13ujjig,Rock Creek Thru Hike Update,Hi friends! Thank you for your support regardi...,2023,05
1gg547z,Was Attacked in the Capitol Hill Area (Near Li...,Hi! I went out for a run and was running throu...,2024,10


### Grab Comments

In [92]:
comments_df = get_comments(list(threads_df.index), reddit)

comments_df.head()

Unnamed: 0,submission_id,author,body,score,year,month
0,x2b2l6,Dentedhelm,Why Pie Shop???? They're one of the coolest pl...,275,2022,8
1,x2b2l6,,"This street, I hope it'll be alright",43,2022,8
2,x2b2l6,CaptchaCrunch,What is a “safety walk”… serious answers only ...,117,2022,8
3,x2b2l6,let-it-rain-sunshine,The local news did a piece on H street and bla...,289,2022,8
4,x2b2l6,,Juvenile crime is way up and it’s hard not to ...,173,2022,8


### Save Data

In [29]:
file_name='test' # EDIT ME - for file name 

In [55]:
# run this block to save to repo data folder
threads_washdc_df.to_csv("./data/"+file_name+"_threads_washdc_df.csv")
comments_washdc_df.to_csv("./data/"+file_name+"_comments_washdc_df.csv")
threads_DCforRent_df.to_csv("./data/"+file_name+"_threads_DCforRent_df.csv")
comments_DCforRent_df.to_csv("./data/"+file_name+"_comments_DCforRent_df.csv")
#threads_df.to_csv("./data/"+file_name+'_threads.csv')
#comments_df.to_csv("./data/"+file_name+'_comments.csv', index=False)
