In [7]:
import praw
from dotenv import load_dotenv
import os
import requests
from datetime import datetime
import pandas as pd

In [8]:
load_dotenv()

SECRET_KEY = os.getenv("SECRET_KEY")
CLIENT_ID = os.getenv("CLIENT_ID")
PASS = os.getenv("PASSWORD")
USER = os.getenv("USER")

### Define Functions

In [None]:
############################################################################################################################################

def search_subreddit(subreddit, query:str, limit:int):
    """query a specific subreddit.
    query: string query for praw api
    limit: int number of threads to collect
    returns: dataframe of threads matching query"""

    thread_list = list() # init list for thread data

    for submission in subreddit.search(query=query, limit=limit): # iterate over all threads returned from search

        # grab year and month from thread
        date = datetime.fromtimestamp(submission.created_utc).strftime('%Y-%m-%d %H:%M:%S')
        year = date.split("-")[0]
        month = date.split("-")[1]

        result_dict = { # put thread data in a dict
                "submission_id":submission.id,
                "title":submission.title,
                "text":submission.selftext,
                "year":year,
                "month":month
        }

        thread_list.append(result_dict) # append dict to list

    df = pd.DataFrame(thread_list) # turn list into dataframe
    df.set_index("submission_id", inplace=True) # set index to be thread id

    return df

############################################################################################################################################

def get_comments(id_list:list, reddit):
    """get all comments from a specific thread
    id_list: list of submission ids to get comment data from
    reddit: reddit instance 
    returns: dataframe of comment data"""
    
    comments_list = list()

    for id in id_list:
        
        submission = reddit.submission(id=id)

        for comment in submission.comments.list():

            date = datetime.fromtimestamp(comment.created_utc).strftime("%Y-%m-%d %H:%M:%S")
            year = date.split("-")[0]
            month = date.split("-")[1]

            comment_dict = {
                "submission_id": id,
                "author": comment.author.name if comment.author else None,
                "body": comment.body,
                "score": comment.score,
                "year":year,
                "month":month
            }   

            comments_list.append(comment_dict)

    df = pd.DataFrame(comments_list)

    return df

############################################################################################################################################

### Get Reddit & Subreddit Instance

In [9]:
# Create read only instance of reddit∏

reddit = praw.Reddit( 
    client_id=CLIENT_ID,
    client_secret=SECRET_KEY,
    user_agent=USER
)

reddit.read_only # check instance

True

In [41]:
washingtondc = reddit.subreddit("washingtondc") # get subreddit instance

washingtondc

Subreddit(display_name='washingtondc')

### Search Threads

In [45]:
threads_df = search_subreddit(washingtondc, 'title:"safe" title:"neighborhood"', 50)

threads_df.head()

Unnamed: 0_level_0,title,text,year,month
submission_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
vmsxc7,How safe is this neighborhood?,"Hey you, yeah you. Stop fucking asking this. Y...",2022,6
16hpztv,Where would you buy in DC to feel most safe ab...,Would the northern part of Georgetown/Glover P...,2023,9
3j8vju,Van Ness residents say their neighborhood isn'...,,2015,9
lsoay5,Recommendation of a safe neighborhood in which...,"Obviously not looking for luxury, would like a...",2021,2
ih2u1o,How safe is the Old City/Near-Northeast neighb...,I'm thinking about signing a lease on a place ...,2020,8


### Grab Comments

In [46]:
comments_df = get_comments(list(threads_df.index), reddit)

comments_df.head()

Unnamed: 0,submission_id,author,body,score,year,month
0,vmsxc7,downvoteyous,cant comment just got murdered in the face due...,615,2022,6
1,vmsxc7,pizzajona,I’m thinking of moving to Capitol Hill on Janu...,52,2022,6
2,vmsxc7,zero_derivation,Is Columbia Heights a bad neighborhood for rat...,145,2022,6
3,vmsxc7,,I'm living inside the fenced-in perimeter of t...,197,2022,6
4,vmsxc7,BringMeCoffeeOrTea_,I thought everyone who lived in D.C. was murde...,245,2022,6


### Save Data

In [None]:
file_name='test' # EDIT ME - for file name 

In [None]:
# run this block to save to repo data folder

threads_df.to_csv("./data/"+file_name+'_threads.csv')
comments_df.to_csv("./data/"+file_name+'_comments.csv', index=False)
