In [9]:
import praw
import json
from datetime import datetime, timezone
from dotenv import load_dotenv, dotenv_values 
import os
load_dotenv()

True

In [10]:
#https://www.reddit.com/prefs/apps

# Replace these with your own Reddit app credentials
reddit = praw.Reddit(
    client_id=os.getenv("CLIENT_ID"),
    client_secret=os.getenv("CLIENT_SECRET"),
    user_agent=os.getenv("USER_AGENT"),
)

def get_comments_from_post(post_url):
    # Fetch the submission
    submission = reddit.submission(url=post_url)
    
    # Ensure we get all comments
    submission.comments.replace_more(limit=None)
    
    # Collect comments with parent-child relationships
    comments_data = []
    for comment in submission.comments.list():
        # Each comment has the user, time, message, and link to its parent (if it's a reply)
        comment_info = {
            "comment_id": comment.id,
            "user": comment.author.name if comment.author else "Deleted",
            "time": datetime.fromtimestamp(comment.created_utc, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z'),
            "message": comment.body,
            "parent_id": comment.parent_id.split('_')[1] if comment.parent_id != comment.link_id else None
        }
        comments_data.append(comment_info)
    
    return comments_data

def scrape_user_political_posts(username, political_subreddits):
    # Get the Reddit user
    user = reddit.redditor(username)
    
    # List to store post data
    posts_data = []
    
    # Iterate through the user's submissions (posts)
    for submission in user.submissions.new(limit=None):  # `limit=None` fetches all posts
        # Filter by political subreddits
        if submission.subreddit.display_name in political_subreddits:
            # Collect post data
            post_info = {
                "post_id": submission.id,
                "subreddit": submission.subreddit.display_name,
                "title": submission.title,
                "time": datetime.fromtimestamp(submission.created_utc, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z'),
                "url": submission.url,
                "score": submission.score,
                "num_comments": submission.num_comments,
                "selftext": submission.selftext  # Body of the post, if it's a text post
            }
            posts_data.append(post_info)
    return posts_data




In [8]:
# Specify the Reddit post URL you want to scrape
post_url = "https://www.reddit.com/r/politics/comments/1gc7k0y/bcso_voter_punches_poll_worker_when_reminded/"

# Retrieve comments
comments = get_comments_from_post(post_url)

# Save comments to a JSON file
with open("comments_graph.json", "w") as f:
    json.dump(comments, f, indent=4)

print("Comments saved to comments.json")

Comments saved to comments.json


In [12]:
political_subreddits = [
    "politics", "PoliticalDiscussion", "Conservative", "Liberal", "progressive", 
    "Anarchism", "Socialism", "Libertarian", "neoliberal", "Democrats", "Republican", 
] # this is chat generated, need to find political subreddits
posts = scrape_user_political_posts("mattjb", political_subreddits)

with open("user_posts.json", "w") as f:
    json.dump(posts, f, indent=4)

print("Comments saved to comments.json")

Comments saved to comments.json
