In [1]:
from dotenv import load_dotenv
import os
import neptune
import praw
from datetime import datetime
import json
import time
import pandas as pd
from tqdm import tqdm



In [2]:
def reddit_connect():
    """Initialize Reddit connection"""
    load_dotenv()
    reddit = praw.Reddit(
        client_id=os.getenv('REDDIT_CLIENT_ID'),
        client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
        user_agent=os.getenv('REDDIT_USER_AGENT'),
        username=os.getenv('REDDIT_USERNAME'),
        password=os.getenv('REDDIT_PASSWORD')
    )
    return reddit

In [None]:
def lookup_missing_authors(csv_file=None, post_ids=None):
    """
    Lookup missing author names for posts by ID
    
    Args:
        csv_file: Path to CSV file with post_id column
        post_ids: List of post IDs to lookup
    
    Returns:
        Dictionary mapping post_id to author_name
    """
    # Initialize Reddit
    reddit = reddit_connect
    
    # Get post IDs from CSV or use provided list
    if csv_file:
        df = pd.read_csv(csv_file)
        post_ids = df['post_id'].tolist()
    
    if not post_ids:
        print("No post IDs provided")
        return {}
    
    author_lookup = {}
    errors = 0
    
    print(f"Looking up authors for {len(post_ids)} posts...")
    
    for i, post_id in enumerate(tqdm(post_ids, desc="Looking up authors")):
        try:
            submission = reddit.submission(id=post_id)
            
            if submission.author is not None:
                author_lookup[post_id] = submission.author.name
            else:
                author_lookup[post_id] = "[deleted]"
                
        except Exception as e:
            print(f"Error with post {post_id}: {e}")
            author_lookup[post_id] = "[error]"
            errors += 1
        
        # Rate limiting - be nice to Reddit
        if i % 50 == 0 and i > 0:
            time.sleep(1)
    
    print(f"Lookup completed. {errors} errors encountered.")
    return author_lookup

def update_csv_with_authors(csv_file, author_lookup):
    """Update CSV file with looked-up author names"""
    df = pd.read_csv(csv_file)
    
    # Update author names
    for post_id, author_name in author_lookup.items():
        mask = df['post_id'] == post_id
        df.loc[mask, 'author_name'] = author_name
    
    # Save updated file
    updated_filename = csv_file.replace('.csv', '_with_authors.csv')
    df.to_csv(updated_filename, index=False)
    
    print(f"Updated CSV saved as: {updated_filename}")
    return df