In [27]:
# library imports

import praw
from dotenv import load_dotenv
import os
import requests
import pandas as pd
from praw_functions import search_subreddit, get_comments 

## This notebook contains code to grab data from reddit using an api query with the PRAW library

You will need to have an account set up with the api and the associated keys.

In [30]:
# this block will grab keys from your local environment 

load_dotenv()

SECRET_KEY = os.getenv("SECRET_KEY")
CLIENT_ID = os.getenv("CLIENT_ID")
PASS = os.getenv("PASSWORD")
USER = os.getenv("USER")

### Get Reddit & Subreddit Instance

In [33]:
# Create read only instance of reddit

reddit = praw.Reddit( 
    client_id=CLIENT_ID,
    client_secret=SECRET_KEY,
    user_agent=USER
)

reddit.read_only # check instance

True

In [35]:
DCforRent = reddit.subreddit("DCforRent") # get subreddit instance

DCforRent

Subreddit(display_name='DCforRent')

### Create Query

In [113]:
## Make the query for neighborhoods
dc_neighborhoods = [
    'Neighborhood', 'Adams Morgan', 'American University Park', 'Anacostia', 'Barnaby Woods', 
    'Capitol Hill', 'Columbia Heights', 'Cleveland Park', 'Dupont', 
    'Foggy Bottom', 'Friendship Heights', 'Georgetown', 'Glover Park', 
    'H Street', 'Logan Circle', 'Mount Pleasant', 'Navy Yard', 
    'NoMa', 'Petworth', 'Shaw', 'Southwest Waterfront', 'Takoma', 
    'Tenleytown', 'The Palisades', 'U Street', 'West End', 'Woodley Park']

safety_words = [
    'crime', 'safe', 'criminal', 'safety']

neighborhood_query = " OR ".join(f'"{neighborhood}"' for neighborhood in dc_neighborhoods)
safety_query = " OR ".join(safety_words)

query = f"({neighborhood_query}) AND ({safety_query})"
query

'("Neighborhood" OR "Adams Morgan" OR "American University Park" OR "Anacostia" OR "Barnaby Woods" OR "Capitol Hill" OR "Columbia Heights" OR "Cleveland Park" OR "Dupont" OR "Foggy Bottom" OR "Friendship Heights" OR "Georgetown" OR "Glover Park" OR "H Street" OR "Logan Circle" OR "Mount Pleasant" OR "Navy Yard" OR "NoMa" OR "Petworth" OR "Shaw" OR "Southwest Waterfront" OR "Takoma" OR "Tenleytown" OR "The Palisades" OR "U Street" OR "West End" OR "Woodley Park") AND (crime OR safe OR criminal OR safety)'

### Search Threads

In [124]:
threads_DCforRent_df = search_subreddit(DCforRent, query, 500)

threads_DCforRent_df.head()

Unnamed: 0_level_0,title,text,year,month
submission_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
y57cbw,"Best quiet, safe but lively neighborhood in DC",I am in my late 30s and haven't lived in DC si...,2022,10
utr8aw,What Neighborhoods in DC/Mayland Should Be Avo...,,2022,5
32cqd6,Recommendations on safe neighborhoods for a yo...,I'm moving to DC from the South in a few weeks...,2015,4
20t3z6,Find out how safe the neighborhood you want to...,,2014,3
1h3ldb4,1 Month of Free Rent in 770 SQFT. High Rise Ap...,Apartment Takeover Opportunity: Market House i...,2024,11


### Grab Comments

In [126]:
comments_DCforRent_df = get_comments(list(threads_DCforRent_df.index), reddit)

comments_DCforRent_df.head()

Unnamed: 0,submission_id,author,body,score,year,month
0,y57cbw,eeek0711,Or Cleveland Park in DC,4,2022,10
1,y57cbw,eeek0711,Takoma Park MD,3,2022,10
2,y57cbw,nonmimeticform,Baltimore,3,2022,10
3,y57cbw,dcgirlsmallworld,If you are looking for a quiet neighborhood wi...,3,2022,10
4,y57cbw,eeek0711,I lived in the DeLano Apartments in Woodley Pa...,2,2022,10


### Save Data

In [128]:
file_name='_12_4' # EDIT ME - for file name 

In [130]:
# run this block to save to repo data folder
threads_DCforRent_df.to_csv("./data/"+file_name+"_threads_DCforRent_df.csv")
comments_DCforRent_df.to_csv("./data/"+file_name+"_comments_DCforRent_df.csv")