##### Python Reddit Scraper

One-Time Instructions:
1. Sign up for Reddit and login 
2. Goto https://www.reddit.com/prefs/apps
    1. Fill out all fields. Some notes:
        1. Chose 'script' as the app type
        2. For the about/redirect text boxes, choose a website you own or any website
    2. Write down the client_id (2nd line underneath the app title) and client_secret (labeled "secret")
3. Create a text file named ".env" with the following fields:
    - reddit_client_id     = "_FROM STEP 2.2 ABOVE_"
    - reddit_client_secret = "_FROM STEP 2.2 ABOVE_"
    - reddit_user_agent    = "multiplatform:PyScraper:v1.0 (by /u/_YOUR REDDIT USER NAME_)"
    - reddit_username      = "_YOUR REDDIT USER NAME_"
    - reddit_password      = "_YOUR REDDIT PASSWORD_"



In [1]:
# Imports
################################################################################
import os
import praw
import pandas as pd

from   dotenv   import load_dotenv
from   datetime import datetime


In [2]:
# Step 0. Set the global parameters
################################################################################
url_of_interest = 'https://www.reddit.com/r/TikTokCringe/comments/16ir9c7/using_ai_in_schools/'
filename        = 'using-ai-in-schools.csv' # GLOBAL PARAMETER
folder          = "./" # GLOBAL PARAMETER


In [3]:
# Step 1. Login to Reddit (Authentication)
################################################################################

# load all the variables from .env file into environment variables
load_dotenv()  

client_id     = os.getenv("reddit_client_id")
client_secret = os.getenv("reddit_client_secret")
user_agent    = os.getenv("reddit_user_agent")
username      = os.getenv("reddit_username")
password      = os.getenv("reddit_password")

# Note: twoFA and username/password are only needed if posting to reddit
# twoFA = "INSERT TWO-FACTOR AUTHENTICATION" # Unnecessary if only scraping
reddit = praw.Reddit(
    client_id     = client_id,    
    client_secret = client_secret,
    user_agent    = user_agent,   
    #username     = username,               # Unnecessary if only scraping    
    #password     = f'{password}'           # Unnecessary if only scraping
    #password     = f'{password}:{twoFA}'   # If your account has 2FA
)


In [4]:
# Step 1: Scrape the URL of interest
################################################################################

post = reddit.submission(url=url_of_interest)
post.comments.replace_more(limit=None)  # Grab original post
comments = post.comments.list()         # Grab comments

In [6]:
# Step 2 (required): Create dataframe of results
################################################################################

thread = []

# Add original post (OP) to the dataframeâ€”customize to add columns of interest
thread.append({
    'reply'   : 0,
    'id'      : post.id,
    'parent_id' : "null",
    'date'    : datetime.utcfromtimestamp(post.created_utc).strftime("%Y-%m-%d %H:%M:%S UTC"),
    'author'  : post.author,
    'title'   : post.title,
    'score'   : post.score,
    'replies' : len(comments),
    'text'    : post.selftext,
})

# Add all comments to the dataframe
for i, comment in enumerate(comments):
    thread.append({
        'reply'     : i+1,
        'id'        : comment.id,
        'parent_id' : comment.parent_id,
        'date'      : datetime.utcfromtimestamp(comment.created_utc).strftime("%Y-%m-%d %H:%M:%S UTC"),
        'author'    : comment.author,
        'title'     : post.title,
        'score'     : comment.score,
        'replies'   : len(comment.replies),
        'text'      : comment.body,
    })

In [7]:
# Step 3: Write out dataframe
################################################################################

df       = pd.DataFrame(thread)
fullpath = f'{folder}/{filename}'
df.to_csv(fullpath, index = False)
