In [13]:
import praw
import pymongo
import datetime

In [14]:
def RedditSearch_AddMongo(sub_Reddit, db_Name, client_Id, client_Secret, user_Agent,uri):
    """
    This functions seaches a subReddit on reddit and adds the information to a MongoDB database.
    Parameters:
        sub_Reddit(String) = sub reddit name
        db_Name(String) = what you want the database called
        client_Id(String) = the client ID given from Reddit API
        client_Secret(String) = secret given by Reddit API
        user_Agent(String) = username of the user
        uri(String) = The link to the MongoDB database 
    """
    # Information for praw
    reddit = praw.Reddit(
        client_id = client_Id,
        client_secret = client_Secret,
        user_agent = user_Agent
    )
    
    # The name of the mongoDB as well as the collection name
    client = pymongo.MongoClient(uri)
    
    # Name of the subreddit to search for
    subRedditName = sub_Reddit
    
    # Name of the database
    db = client[db_Name]
    
    # Name of the collection
    collection = db["RedditPosts"]
    collection_comments = db["RedditComments"]
        
    # Loop the subreddit saving all the posts from the subreddit
    for post in reddit.subreddit(subRedditName).top(limit=None):

        # Check if post already exists in the database
        # Skip this post if it already exists in the database
        if collection.find_one({"post_url": f"https://www.reddit.com{post.permalink}"}):
            continue 

        # Timestamp formatting
        timestamp = datetime.datetime.utcfromtimestamp(post.created_utc)
        formatted_timestamp = timestamp.isoformat()
        # Post dictionary to be added to the database
        post_dict = {
                "title": post.title,
                "post_id": post.id,
                "author": post.author.name if post.author else 'N/A',
                "num_comments": post.num_comments,
                "score": post.score,
                "attachment_file": post.url,
                "timestamp": formatted_timestamp,
                "subreddit_name": subRedditName,
                "post_url": f"https://www.reddit.com{post.permalink}"
        }
        collection.insert_one(post_dict)

        comment_set = set()

        # Scrapping the comment and if the author is deleted account then puts N/A
        for comment in post.comments.list():
            if collection_comments.find_one({"id": comment.id}):
                continue

            if isinstance(comment, praw.models.Comment):
                comment_dict = {
                    "id": comment.id,
                    "post_id": post.id,
                    "author": comment.author.name if comment.author else 'N/A',
                    "score": comment.score,
                    "num_replies": len(comment.replies),
                    "text": comment.body,
                    "subreddit_name": subRedditName,
                    "timestamp": datetime.datetime.utcfromtimestamp(comment.created_utc).isoformat(),
                }
            comment_tuple = tuple(comment_dict.items())
            comment_set.add(comment_tuple)

        comment_list = [dict(comment_tuple) for comment_tuple in comment_set]
        collection_comments.insert_many(comment_list)


        
    client.close()



In [15]:
sub_Reddit = "ChatGPT"

db_Name = "RedditData"
client_Id = "XXJgyOD5LF7dxRTGedxliQ"
client_Secret = "4W2SxwrOIz6xJWNz4ePVTX2_GwF2BA"
user_Agent = "Bombe_Cerise"
uri = "mongodb+srv://testbot:king@cluter1.kov9r66.mongodb.net/?retryWrites=true&w=majority"


RedditSearch_AddMongo(sub_Reddit, db_Name, client_Id, client_Secret, user_Agent,uri)

KeyboardInterrupt: 