In [1]:
import praw, json, pprint
from datetime import datetime
from dateutil import tz
from pydantic import BaseModel
from typing import List

In [2]:
credentials = json.load(open("credentials.json"))
reddit =  praw.Reddit(
    client_id = credentials["client_id"],
    client_secret = credentials["client_secret"],
    user_agent = credentials["user_agent"],
    )

In [3]:
submissions = [] 

In [4]:
for submission in reddit.subreddit('StableDiffusion+LocalLLaMA+singularity+ArtificialInteligence+MachineLearning').top(time_filter="week", limit=500):
    submissions.append(submission)

In [5]:
class Comment(BaseModel):
    body: str

class Post(BaseModel):
    date: str
    title: str
    body: str
    upvote_ratio: float
    subreddit: str
    num_comments: int
    submission_flair: str
    has_media: bool
    url: str
    comments: List[Comment]
    
class PostCollection(BaseModel):
    posts: List[Post] = []
    
postcollection = PostCollection()

In [7]:
for submission in submissions:
    if not submission.link_flair_text in ['meme', 'Meme', 'funny', 'Funny']:
        comments = [Comment(body=el.body) for el in submission.comments.list()[:5]]
        post = Post(
            date = datetime.fromtimestamp(submission.created).strftime('%d-%m-%Y'),
            title = submission.title,
            body = submission.selftext,
            upvote_ratio = submission.upvote_ratio,
            subreddit = submission.subreddit.display_name,
            num_comments = submission.num_comments,
            url = submission.url,
            submission_flair = str(submission.link_flair_text),
            has_media = 'preview' in vars(submission) or submission.is_video or 'media_metadata' in vars(submission),
            comments = comments
        )
        
    postcollection.posts.append(post)

In [8]:
postcollection_json = json.loads(postcollection.model_dump_json())

In [9]:
with open('data.json', 'w', encoding='utf-8') as f:
    json.dump(postcollection_json, f, ensure_ascii=False, indent=4)