In [1]:
# library imports

import praw
from dotenv import load_dotenv
import os
import requests
import pandas as pd
from praw_functions import search_subreddit, get_comments

## This notebook contains code to grab data from the subreddit r/washingtondc

You will need to have an account set up with the api and the associated keys.

In [2]:
# this block will grab keys from your local environment 

load_dotenv()

SECRET_KEY = os.getenv("SECRET_KEY")
CLIENT_ID = os.getenv("CLIENT_ID")
PASS = os.getenv("PASSWORD")
USER = os.getenv("USER")

### Get Reddit & Subreddit Instance

In [3]:
# Create read only instance of reddit∏

reddit = praw.Reddit( 
    client_id=CLIENT_ID,
    client_secret=SECRET_KEY,
    user_agent=USER
)

reddit.read_only # check instance

True

In [4]:
washingtondc = reddit.subreddit("washingtondc") # get subreddit instance

washingtondc

Subreddit(display_name='washingtondc')

### Construct Query

In [5]:
neighborhoods = [
    'Neighborhood','Adams Morgan', 'American University Park', 'Anacostia', 'Barnaby Woods', 
    'Capitol Hill', 'Columbia Heights', 'Cleveland Park', 'Dupont', 
    'Foggy Bottom', 'Friendship Heights', 'Georgetown', 'Glover Park', 
    'H Street', 'Logan Circle', 'Mount Pleasant', 'Navy Yard', 
    'NoMa', 'Petworth', 'Shaw', 'Southwest Waterfront', 'Takoma', 
    'Tenleytown', 'The Palisades', 'U Street', 'West End', 'Woodley Park']

# Safety-related keywords
safety_keywords = ["crime", "safety", "criminal", "safe"]

# Generate the query
neighborhood_query = " OR ".join(f'"{neighborhood}"' for neighborhood in neighborhoods)
safety_query = " OR ".join(safety_keywords)

query = f"({neighborhood_query}) AND ({safety_query})"

In [6]:
query

'("Neighborhood" OR "Adams Morgan" OR "American University Park" OR "Anacostia" OR "Barnaby Woods" OR "Capitol Hill" OR "Columbia Heights" OR "Cleveland Park" OR "Dupont" OR "Foggy Bottom" OR "Friendship Heights" OR "Georgetown" OR "Glover Park" OR "H Street" OR "Logan Circle" OR "Mount Pleasant" OR "Navy Yard" OR "NoMa" OR "Petworth" OR "Shaw" OR "Southwest Waterfront" OR "Takoma" OR "Tenleytown" OR "The Palisades" OR "U Street" OR "West End" OR "Woodley Park") AND (crime OR safety OR criminal OR safe)'

### Search Threads

In [None]:
threads_df = search_subreddit(washingtondc, query, 500)

threads_df.head()

Unnamed: 0_level_0,title,text,year,month
submission_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
x2b2l6,"The Pie Shop says juveniles are "" hurling full...",,2022,08
vmsxc7,How safe is this neighborhood?,"Hey you, yeah you. Stop fucking asking this. Y...",2022,06
13woim5,"Anacostia council, Mayor Bowser hear residents...",,2023,05
1b5hyt5,'We should not let criminals take over our nei...,,2024,03
18zp614,Car taken from Georgetown with infant inside. ...,,2024,01
...,...,...,...,...
vd9zif,There have been 7 shootings in the city since ...,"Mainly in the SE and NE areas, but this is rid...",2022,06
10p64o,Fishing the Forgotten River in the Nation’s Ca...,,2012,09
1cvss24,is DC really that dangerous?,So i’m interning in Arlington and originally p...,2024,05
13ujjig,Rock Creek Thru Hike Update,Hi friends! Thank you for your support regardi...,2023,05


### Grab Comments

In [8]:
comments_df = get_comments(list(threads_df.index), reddit)

In [9]:
comments_df.shape

(23262, 6)

In [10]:
comments_df.head(25)

Unnamed: 0_level_0,submission_id,author,body,score,year,month
comment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
imijk0f,x2b2l6,Dentedhelm,Why Pie Shop???? They're one of the coolest pl...,275,2022,8
imiokoy,x2b2l6,,"This street, I hope it'll be alright",41,2022,8
imievy6,x2b2l6,CaptchaCrunch,What is a “safety walk”… serious answers only ...,117,2022,8
imijk71,x2b2l6,let-it-rain-sunshine,The local news did a piece on H street and bla...,289,2022,8
imirv7o,x2b2l6,,Juvenile crime is way up and it’s hard not to ...,167,2022,8
imig84i,x2b2l6,,So they’re trespassing on a property and hurli...,161,2022,8
imj9r0a,x2b2l6,foxy-coxy,I think it's time that we all admit that neith...,31,2022,8
imisyky,x2b2l6,thezhgguy,Did Pie Shop used to be Dangerously Delicious?...,14,2022,8
imiitxp,x2b2l6,ZenPoet,D.C. cops being completely useless? Hardly new...,96,2022,8
imj6xfm,x2b2l6,AnonyJustAName,[https://twitter.com/tomsherwood/status/156474...,5,2022,8


### Drop Empty Comments

In [11]:
comments_df[comments_df["body"] == '[removed]']

Unnamed: 0_level_0,submission_id,author,body,score,year,month
comment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
imih07x,x2b2l6,,[removed],25,2022,08
imij24s,x2b2l6,,[removed],11,2022,08
imigcfw,x2b2l6,,[removed],-48,2022,08
imih7tt,x2b2l6,,[removed],12,2022,08
imigt6t,x2b2l6,,[removed],25,2022,08
...,...,...,...,...,...,...
hx6k1lw,stoah5,,[removed],1,2022,02
gdg1b4q,k03gu6,,[removed],21,2020,11
h9v25g2,p90h2e,,[removed],30,2021,08
h9v4g5m,p90h2e,,[removed],-65,2021,08


In [12]:
comments_df = comments_df.drop(comments_df[comments_df["body"]=="[removed]"].index)

In [13]:
comments_df[comments_df["body"] == '[removed]']

Unnamed: 0_level_0,submission_id,author,body,score,year,month
comment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [None]:
comments_df.head()

Unnamed: 0_level_0,submission_id,author,body,score,year,month
comment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
imijk0f,x2b2l6,Dentedhelm,Why Pie Shop???? They're one of the coolest pl...,275,2022,08
imiokoy,x2b2l6,,"This street, I hope it'll be alright",41,2022,08
imievy6,x2b2l6,CaptchaCrunch,What is a “safety walk”… serious answers only ...,117,2022,08
imijk71,x2b2l6,let-it-rain-sunshine,The local news did a piece on H street and bla...,289,2022,08
imirv7o,x2b2l6,,Juvenile crime is way up and it’s hard not to ...,167,2022,08
...,...,...,...,...,...,...
h9vbsnq,p90h2e,,[deleted],1,2021,08
ha1ypp5,p90h2e,CaptainObvious110,Hmm actually you make a lot of sense here. Th...,1,2021,08
h9vcebj,p90h2e,129za,You probably know if better than I do. In the ...,1,2021,08
h9vkj9w,p90h2e,,[deleted],-1,2021,08


### Save Data

In [15]:
file_name='washingtondc_12_5' # EDIT ME - for file name 

In [16]:
# run this block to save to repo data folder
comments_df.to_csv("./data/"+file_name+"_comments.csv")
threads_df.to_csv("./data/"+file_name+"_threads.csv")
