In [1]:
# library imports

import praw
from dotenv import load_dotenv
import os
import requests
import pandas as pd
from praw_functions import search_subreddit, get_comments 

## This notebook contains code to grab data from reddit using an api query with the PRAW library

You will need to have an account set up with the api and the associated keys.

In [4]:
# this block will grab keys from your local environment 

load_dotenv()

SECRET_KEY = os.getenv("SECRET_KEY")
CLIENT_ID = os.getenv("CLIENT_ID")
PASS = os.getenv("PASSWORD")
USER = os.getenv("USER")

### Get Reddit & Subreddit Instance

In [7]:
# Create read only instance of reddit

reddit = praw.Reddit( 
    client_id=CLIENT_ID,
    client_secret=SECRET_KEY,
    user_agent=USER
)

reddit.read_only # check instance

True

In [9]:
washdc = reddit.subreddit("washdc") # get subreddit instance

washdc

Subreddit(display_name='washdc')

### Create Query

In [75]:
## Make the query for neighborhoods
dc_neighborhoods = [
    'Neighborhood', 'Adams Morgan', 'American University Park', 'Anacostia', 'Barnaby Woods', 
    'Capitol Hill', 'Columbia Heights', 'Cleveland Park', 'Dupont', 
    'Foggy Bottom', 'Friendship Heights', 'Georgetown', 'Glover Park', 
    'H Street', 'Logan Circle', 'Mount Pleasant', 'Navy Yard', 
    'NoMa', 'Petworth', 'Shaw', 'Southwest Waterfront', 'Takoma', 
    'Tenleytown', 'The Palisades', 'U Street', 'West End', 'Woodley Park']

safety_words = [
    'crime', 'safe', 'criminal', 'safety']

neighborhood_query = " OR ".join(f'"{neighborhood}"' for neighborhood in dc_neighborhoods)
safety_query = " OR ".join(safety_words)

query = f"({neighborhood_query}) AND ({safety_query})"
query

'("Neighborhood" OR "Adams Morgan" OR "American University Park" OR "Anacostia" OR "Barnaby Woods" OR "Capitol Hill" OR "Columbia Heights" OR "Cleveland Park" OR "Dupont" OR "Foggy Bottom" OR "Friendship Heights" OR "Georgetown" OR "Glover Park" OR "H Street" OR "Logan Circle" OR "Mount Pleasant" OR "Navy Yard" OR "NoMa" OR "Petworth" OR "Shaw" OR "Southwest Waterfront" OR "Takoma" OR "Tenleytown" OR "The Palisades" OR "U Street" OR "West End" OR "Woodley Park") AND (crime OR safe OR criminal OR safety)'

### Search Threads

In [80]:
threads_washdc_df = search_subreddit(washdc, query, 500)
threads_washdc_df.head()

Unnamed: 0_level_0,title,text,year,month
submission_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1h2wunn,Why are criminals and violent teens allowed to...,,2024,11
1gghbb2,Mob attack on U Street investigated as hate crime,,2024,10
1go926g,Criminals have all out brawl on typical night ...,,2024,11
1ce7smd,Fatal shooting on U Street: this is what happe...,,2024,4
1e5y4pe,Assault in DC's Foggy Bottom area investigated...,,2024,7


### Grab Comments

In [82]:
comments_washdc_df = get_comments(list(threads_washdc_df.index), reddit)

comments_washdc_df.head()

Unnamed: 0,submission_id,author,body,score,year,month
0,1h2wunn,borg359,“There’s nothing we can do to stop this” says ...,301,2024,11
1,1h2wunn,SpaceGhost2009,I saw a 14-16 year old kid outside of the Walg...,254,2024,11
2,1h2wunn,haroldhecuba88,Because the judicial system considers them vic...,515,2024,11
3,1h2wunn,BusinessPublic2577,The Mayor cares nothing for the U Street corri...,30,2024,11
4,1h2wunn,cluehq,DC Residents continue to elect a government th...,80,2024,11


### Save Data

In [84]:
file_name='_12_4' # EDIT ME - for file name 

In [86]:
# run this block to save to repo data folder
threads_washdc_df.to_csv("./data/"+file_name+"_threads_washdc_df.csv")
comments_washdc_df.to_csv("./data/"+file_name+"_comments_washdc_df.csv")