In [1]:
!pip install praw openai requests python-dotenv




In [41]:
import praw
import openai
import json
import re
import time
import os
from datetime import datetime
from typing import Dict, List, Tuple
from google.colab import files
import requests

# --- Credentials ---
REDDIT_CLIENT_ID = ""
REDDIT_CLIENT_SECRET = ""
REDDIT_USER_AGENT = ""
OPENROUTER_API_KEY = ""
REDDIT_USERNAME = ""
REDDIT_PASSWORD = ""

# --- OpenRouter Setup (for openai v0.28.0) ---
openai.api_key = OPENROUTER_API_KEY
openai.api_base = "https://openrouter.ai/api/v1"

print(" Setup complete! Ready to analyze Reddit users.")

 Setup complete! Ready to analyze Reddit users.


In [42]:
class RedditPersonaAnalyzer:
    def __init__(self):
        """Initialize the Reddit API client"""
        self.reddit = praw.Reddit(
            client_id=REDDIT_CLIENT_ID,
            client_secret=REDDIT_CLIENT_SECRET,
            user_agent=REDDIT_USER_AGENT,
            username=REDDIT_USERNAME,
            password=REDDIT_PASSWORD
        )
        # Configuration
        self.max_posts = 10
        self.max_comments = 20

    def extract_username_from_url(self, url: str) -> str:
        """Extract username from Reddit profile URL"""
        url = url.rstrip('/')
        match = re.search(r'/user/([^/]+)', url)
        if match:
            return match.group(1)
        else:
            raise ValueError(f"Invalid Reddit URL format: {url}")

    def scrape_user_data(self, username: str) -> Dict:
        """Scrape posts and comments from a Reddit user"""
        try:
            user = self.reddit.redditor(username)
            # This check will raise an exception if the user doesn't exist
            user.id

            posts_data = []
            comments_data = []
            print(f"Scraping data for user: {username}")

            # Scrape posts
            print(" Collecting posts...")
            for post in user.submissions.new(limit=self.max_posts):
                posts_data.append({
                    'url': f"https://reddit.com{post.permalink}",
                    'subreddit': str(post.subreddit),
                    'title': post.title,
                    'selftext': post.selftext
                })

            # Scrape comments
            print("Collecting comments...")
            for comment in user.comments.new(limit=self.max_comments):
                comments_data.append({
                    'url': f"https://reddit.com{comment.permalink}",
                    'subreddit': str(comment.subreddit),
                    'body': comment.body
                })

            print(f"Scraped {len(posts_data)} posts and {len(comments_data)} comments")
            return {
                'username': username,
                'posts': posts_data,
                'comments': comments_data
            }
        except Exception as e:
            # Catch PRAW exceptions for non-existent users
            raise Exception(f"User {username} not found or suspended. Original error: {e}")

    def generate_persona_with_citations(self, user_data: Dict) -> str:
        """Generate user persona using OpenRouter with citations"""
        # Prepare a simplified list of content for the prompt
        content_for_prompt = ""
        for post in user_data['posts']:
            content_for_prompt += f"Post in r/{post['subreddit']} with title '{post['title']}': {post['selftext']} [citation: {post['url']}]\\n"
        for comment in user_data['comments']:
            content_for_prompt += f"Comment in r/{comment['subreddit']} says: {comment['body']} [citation: {comment['url']}]\\n"

        prompt = f"""
        Analyze the following Reddit activity to create a user persona. For each characteristic (like interests, personality traits, etc.), you MUST provide the specific citation URL given in the text.

        User Activity:
        {content_for_prompt}
        """
        try:
            # CORRECTED: Use the syntax for openai v0.28.0
            response = openai.ChatCompletion.create(
                model="anthropic/claude-3.5-sonnet",
                messages=[
                    {"role": "system", "content": "You are an expert at creating user personas from text. You must always cite your sources using the URLs provided."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=1000,
                headers={ "HTTP-Referer": "http://localhost", "X-Title": "Reddit Persona Bot" }
            )
            return response['choices'][0]['message']['content']
        except Exception as e:
            raise Exception(f"Error generating persona: {str(e)}")

    def save_persona_to_file(self, persona_text: str, username: str) -> str:
        """Save the generated persona to a text file"""
        filename = f"{username}_persona.txt"
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(persona_text)
        print(f" Persona saved to: {filename}")
        return filename

    def analyze_user(self, profile_url: str) -> str:
        """Main method to analyze a Reddit user and generate persona"""
        try:
            username = self.extract_username_from_url(profile_url)
            print(f" Analyzing user: {username}")
            user_data = self.scrape_user_data(username)
            if not user_data['posts'] and not user_data['comments']:
                print(f"No activity found for user {username}.")
                return None
            print(" Generating persona with AI...")
            persona_text = self.generate_persona_with_citations(user_data)
            filename = self.save_persona_to_file(persona_text, username)
            return filename
        except Exception as e:
            print(f" Error during analysis: {str(e)}")
            return None

In [43]:
analyzer = RedditPersonaAnalyzer()
print(" Reddit Persona Analyzer initialized!")

 Reddit Persona Analyzer initialized!


In [46]:
working_sample_users = [
    "https://www.reddit.com/user/kojied/comments/",
    "https://www.reddit.com/user/Hungry-Move-6603/comments/"
]

print(" Testing with working sample users...")
print("Note: Some users from previous tests might be suspended/deleted")

for url in working_sample_users:
    print(f"\n{'='*60}")
    print(f"Testing: {url}")
    print('='*60)

    try:
        result = analyzer.analyze_user(url)

        if result:
            print(f"✅ Success! Results saved to: {result}")

            # Display first few lines of the result
            try:
                with open(result, 'r', encoding='utf-8') as f:
                    lines = f.readlines()
                    print("\n Preview of generated persona:")
                    for i, line in enumerate(lines[:15]):
                        print(line.strip())
                    if len(lines) > 15:
                        print("... (truncated)")
            except Exception as e:
                print(f"Could not preview file: {e}")
        else:
            print("Failed to analyze this user")

    except Exception as e:
        print(f" Error during analysis: {str(e)}")

    print("\n⏳ Waiting 3 seconds before next user...")
    time.sleep(3)  # Longer pause between users to avoid rate limiting

print("\n🎉 Testing complete!")


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



 Testing with working sample users...
Note: Some users from previous tests might be suspended/deleted

Testing: https://www.reddit.com/user/kojied/comments/
 Analyzing user: kojied
Scraping data for user: kojied
 Collecting posts...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Collecting comments...
Scraped 10 posts and 20 comments
 Generating persona with AI...
 Persona saved to: kojied_persona.txt
✅ Success! Results saved to: kojied_persona.txt

 Preview of generated persona:
Based on the provided Reddit activity, I'll create a detailed user persona:

Demographics & Location:
- Lives in New York City (relatively new resident, ~3 years) [https://reddit.com/r/newyorkcity/comments/1lykkqf/i_feel_violated_by_intern_season/]
- Millennial age group [https://reddit.com/r/AskReddit/comments/1kz5a0n/do_you_think_millennials_generally_look_younger/mv2xf3q/]
- Japanese language speaker/cultural knowledge [https://reddit.com/r/OnePiece/comments/1kpavbs/ive_been_ripping_on_that_part_of_zoros_backstory/mswlma6/]

Professional Background:
- Works in technology, specifically iOS/VR development [https://reddit.com/r/visionosdev/comments/1b3yugb/best_blogs_tutorial_channels_to_learn/]
- Shows interest in H1B visa topics, suggesting possible personal connection to immigratio

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.




Testing: https://www.reddit.com/user/Hungry-Move-6603/comments/
 Analyzing user: Hungry-Move-6603


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Scraping data for user: Hungry-Move-6603
 Collecting posts...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Collecting comments...
Scraped 3 posts and 12 comments
 Generating persona with AI...
 Persona saved to: Hungry-Move-6603_persona.txt
✅ Success! Results saved to: Hungry-Move-6603_persona.txt

 Preview of generated persona:
Based on the provided Reddit activity, I'll create a user persona with specific citations:

Name: Unspecified
Location History:
- Originally from Delhi, recently moved to Lucknow in December 2024 for business purposes
[Citation: https://reddit.com/r/lucknow/comments/1lwyhny/everyone_is_something_in_lko/]

Professional Background:
- Business-oriented individual
[Citation: https://reddit.com/r/lucknow/comments/1lwyhny/everyone_is_something_in_lko/]

Interests & Lifestyle:
1. Reading enthusiast (seeking reading clubs/cafes)
[Citation: https://reddit.com/r/lucknow/comments/1lzuq0r/reading_cafe_reader_club/]

... (truncated)

⏳ Waiting 3 seconds before next user...

🎉 Testing complete!
