In [5]:
pip install groq requests python-dotenv


Collecting groq
  Using cached groq-0.30.0-py3-none-any.whl.metadata (16 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting distro<2,>=1.7.0 (from groq)
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting pydantic<3,>=1.9.0 (from groq)
  Downloading pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->groq)
  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.33.2 (from pydantic<3,>=1.9.0->groq)
  Downloading pydantic_core-2.33.2-cp313-cp313-win_amd64.whl.metadata (6.9 kB)
Collecting typing-inspection>=0.4.0 (from pydantic<3,>=1.9.0->groq)
  Downloading typing_inspection-0.4.1-py3-none-any.whl.metadata (2.6 kB)
Using cached groq-0.30.0-py3-none-any.whl (131 kB)
Using cached distro-1.9.0-py3-none-any.whl (20 kB)
Downloading pydantic-2.11.7-py3-none-any.whl (444 kB)
Downloading pydantic_core-2.33.2-cp313-cp31

In [8]:
pip install praw

Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install Pillow

Collecting Pillow
  Downloading pillow-11.3.0-cp313-cp313-win_amd64.whl.metadata (9.2 kB)
Downloading pillow-11.3.0-cp313-cp313-win_amd64.whl (7.0 MB)
   ---------------------------------------- 0.0/7.0 MB ? eta -:--:--
   ------------ --------------------------- 2.1/7.0 MB 16.2 MB/s eta 0:00:01
   ---------------------------------------  6.8/7.0 MB 21.3 MB/s eta 0:00:01
   ---------------------------------------- 7.0/7.0 MB 19.8 MB/s eta 0:00:00
Installing collected packages: Pillow
Successfully installed Pillow-11.3.0
Note: you may need to restart the kernel to use updated packages.


In [21]:
# Reddit User Persona Generator Pipeline
# Based on your existing code with improvements and added features

# Required installations:
# pip install praw groq requests python-dotenv pillow

import praw
import requests
import os
import time
import re
from datetime import datetime
from typing import List, Dict, Optional
from dotenv import load_dotenv
from PIL import Image, ImageDraw, ImageFont
import textwrap
import json

# Load environment variables
load_dotenv()

class RedditPersonaGenerator:
    def __init__(self):
        """Initialize the Reddit Persona Generator with API connections"""
        # Connect to Reddit
        self.reddit = praw.Reddit(
            client_id=os.getenv("REDDIT_CLIENT_ID"),
            client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
            user_agent=os.getenv("REDDIT_USER_AGENT")
        )
        
        # Groq API configuration
        self.groq_api_key = os.getenv("GROQ_API_KEY")
        self.groq_api_url = "https://api.groq.com/openai/v1/chat/completions"
        
        # Test API connections
        self.test_connections()
    
    def test_connections(self):
        """Test API connections"""
        print("🔧 Testing API connections...")
        
        # Test Reddit connection
        try:
            self.reddit.user.me()
            print("✅ Reddit API connected successfully")
        except Exception as e:
            print(f"❌ Reddit API connection failed: {e}")
        
        # Test Groq connection
        try:
            headers = {"Authorization": f"Bearer {self.groq_api_key}"}
            response = requests.get("https://api.groq.com/openai/v1/models", headers=headers)
            if response.status_code == 200:
                print("✅ Groq API connected successfully")
            else:
                print(f"❌ Groq API connection failed: {response.status_code}")
        except Exception as e:
            print(f"❌ Groq API connection failed: {e}")
    
    def extract_username_from_url(self, url: str) -> str:
        """Extract username from Reddit profile URL"""
        # Handle various Reddit URL formats
        url = url.strip().rstrip('/')
        
        # Remove protocol if present
        if url.startswith('http'):
            url = url.split('://', 1)[1]
        
        # Extract username using regex patterns
        patterns = [
            r'reddit\.com/user/([^/]+)',
            r'reddit\.com/u/([^/]+)',
        ]
        
        for pattern in patterns:
            match = re.search(pattern, url)
            if match:
                return match.group(1)
        
        # Fallback: split by '/' and take the last part
        parts = url.split('/')
        if len(parts) > 0:
            return parts[-1]
        
        return url
    
    def fetch_user_data(self, username: str, limit: int = 100) -> Dict:
        """
        Fetch user data from Reddit with enhanced error handling and metadata
        
        Args:
            username: Reddit username
            limit: Maximum number of posts/comments to fetch
            
        Returns:
            Dictionary containing user data
        """
        print(f"Fetching data for u/{username}...")
        
        try:
            user = self.reddit.redditor(username)
            
            # Check if user exists by accessing id
            try:
                user_id = user.id
                print(f"✅ User found: {username} (ID: {user_id})")
            except Exception as e:
                print(f"❌ User {username} not found or suspended: {e}")
                return {
                    'username': username,
                    'posts': [],
                    'comments': [],
                    'avatar_url': None,
                    'error': f'User not found or suspended: {e}'
                }
            
            user_data = {
                'username': username,
                'posts': [],
                'comments': [],
                'avatar_url': None,
                'created_utc': None,
                'karma': {
                    'post_karma': 0,
                    'comment_karma': 0
                },
                'subreddit_activity': {}
            }
            
            # Get user metadata
            try:
                user_data['created_utc'] = user.created_utc
                user_data['karma']['post_karma'] = user.link_karma
                user_data['karma']['comment_karma'] = user.comment_karma
                
                # Try to get avatar URL
                if hasattr(user, 'icon_img') and user.icon_img:
                    user_data['avatar_url'] = user.icon_img.replace('&amp;', '&')
                    
            except Exception as e:
                print(f"⚠️ Could not fetch user metadata: {e}")
            
            # Fetch posts
            print(f"📝 Fetching posts...")
            post_count = 0
            try:
                for submission in user.submissions.new(limit=limit//2):
                    if post_count >= limit//2:
                        break
                    
                    post_data = {
                        'id': submission.id,
                        'type': 'post',
                        'title': submission.title,
                        'content': submission.selftext,
                        'subreddit': submission.subreddit.display_name,
                        'score': submission.score,
                        'created_utc': submission.created_utc,
                        'url': submission.url,
                        'formatted_text': f"Post: {submission.title}\n{submission.selftext}"
                    }
                    
                    user_data['posts'].append(post_data)
                    
                    # Track subreddit activity
                    subreddit = submission.subreddit.display_name
                    user_data['subreddit_activity'][subreddit] = user_data['subreddit_activity'].get(subreddit, 0) + 1
                    
                    post_count += 1
                    
            except Exception as e:
                print(f"⚠️ Error fetching posts: {e}")
            
            # Fetch comments
            print(f"Fetching comments...")
            comment_count = 0
            try:
                for comment in user.comments.new(limit=limit//2):
                    if comment_count >= limit//2:
                        break
                    
                    comment_data = {
                        'id': comment.id,
                        'type': 'comment',
                        'content': comment.body,
                        'subreddit': comment.subreddit.display_name,
                        'score': comment.score,
                        'created_utc': comment.created_utc,
                        'parent_id': comment.parent_id,
                        'formatted_text': f"Comment: {comment.body}"
                    }
                    
                    user_data['comments'].append(comment_data)
                    
                    # Track subreddit activity
                    subreddit = comment.subreddit.display_name
                    user_data['subreddit_activity'][subreddit] = user_data['subreddit_activity'].get(subreddit, 0) + 1
                    
                    comment_count += 1
                    
            except Exception as e:
                print(f"⚠️ Error fetching comments: {e}")
            
            print(f"✅ Fetched {len(user_data['posts'])} posts and {len(user_data['comments'])} comments")
            return user_data
            
        except Exception as e:
            print(f"❌ Error fetching user data for {username}: {e}")
            return {
                'username': username,
                'posts': [],
                'comments': [],
                'avatar_url': None,
                'error': str(e)
            }
    
    def generate_persona(self, user_data: Dict) -> str:
        """
        Generate user persona using Groq API
        
        Args:
            user_data: Dictionary containing user's posts and comments
            
        Returns:
            Generated persona text
        """
        print("Generating persona with Groq AI...")
        
        # Prepare content for analysis
        all_content = []
        
        # Add posts with metadata
        for post in user_data['posts']:
            content = f"POST (r/{post['subreddit']}, Score: {post['score']}): {post['title']}\n{post['content']}"
            all_content.append(content)
        
        # Add comments with metadata
        for comment in user_data['comments']:
            content = f"COMMENT (r/{comment['subreddit']}, Score: {comment['score']}): {comment['content']}"
            all_content.append(content)
        
        # Combine content
        combined_content = "\n\n".join(all_content)
        
        # Truncate if too long (Groq has token limits)
        if len(combined_content) > 20000:
            combined_content = combined_content[:20000] + "\n\n[Content truncated due to length...]"
        
        # Get top subreddits for context
        top_subreddits = sorted(user_data['subreddit_activity'].items(), key=lambda x: x[1], reverse=True)[:5]
        subreddit_context = ", ".join([f"r/{sub} ({count} posts)" for sub, count in top_subreddits])
        
        # Create enhanced prompt
        prompt = f"""You are a user research expert. Based on the following Reddit posts and comments from user '{user_data['username']}', generate a detailed user persona.

USER CONTEXT:
- Username: {user_data['username']}
- Total Posts: {len(user_data['posts'])}
- Total Comments: {len(user_data['comments'])}
- Post Karma: {user_data['karma']['post_karma']}
- Comment Karma: {user_data['karma']['comment_karma']}
- Most Active Subreddits: {subreddit_context}

REDDIT CONTENT:
{combined_content}

Generate a detailed user persona in this EXACT format:

Name: [Name or nickname based on username ]
Age: [Estimated age range with reasoning based on interests, language, and references]
Occupation: [Likely profession or field based on posts, interests, and expertise shown]
Interests: [Main interests and hobbies with specific examples from posts]
Motivations: [What drives this person based on their posts and comments]
Habits: [Observable patterns in behavior, posting times, or discussion topics]
Frustrations: [Issues they commonly complain about or express concern over]
Personality: [Personality traits evident from communication style and opinions]
Goals: [Apparent goals, aspirations, or things they're working toward]

For each section, include specific citations in the format [Post/Comment ID: brief excerpt] to support your analysis. Use actual post/comment IDs from the data provided.

Be thorough and analytical. Base everything on concrete evidence from their posts and comments."""
        
        # Make API call to Groq
        headers = {
            "Authorization": f"Bearer {self.groq_api_key}",
            "Content-Type": "application/json"
        }
        
        payload = {
            "model": "llama3-70b-8192",
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.7,
            "max_tokens": 2000
        }
        
        try:
            response = requests.post(self.groq_api_url, headers=headers, json=payload)
            
            if response.status_code == 200:
                persona_text = response.json()["choices"][0]["message"]["content"]
                print("✅ Persona generated successfully")
                return persona_text
            else:
                print(f"❌ Groq API Error {response.status_code}: {response.text}")
                return self.generate_fallback_persona(user_data)
                
        except Exception as e:
            print(f"❌ Error generating persona: {e}")
            return self.generate_fallback_persona(user_data)
    
    def generate_fallback_persona(self, user_data: Dict) -> str:
        """Generate a basic persona without AI when API fails"""
        print("Generating fallback persona...")
        
        username = user_data['username']
        posts = user_data['posts']
        comments = user_data['comments']
        top_subreddits = sorted(user_data['subreddit_activity'].items(), key=lambda x: x[1], reverse=True)[:5]
        
        persona = f"""Name: {username}
Age: Unable to determine from available data
Occupation: Unable to determine from available data
Interests: Active in {len(user_data['subreddit_activity'])} subreddits, primarily: {', '.join([f"r/{sub} ({count} posts)" for sub, count in top_subreddits])}
Motivations: Regular engagement with Reddit community discussions across diverse topics
Habits: Posted {len(posts)} posts and {len(comments)} comments, shows consistent Reddit activity
Frustrations: Unable to determine specific frustrations from available data
Personality: Active Reddit user with {user_data['karma']['post_karma']} post karma and {user_data['karma']['comment_karma']} comment karma
Goals: Community participation and knowledge sharing through Reddit discussions
"""
        
        return persona
    
    def save_to_file(self, persona_text: str, username: str) -> str:
        """
        Save persona to text file with enhanced formatting
        
        Args:
            persona_text: Generated persona text
            username: Reddit username
            
        Returns:
            Filename of saved file
        """
        filename = f"{username}_persona.txt"
        
        # Create header with metadata
        header = f"""Reddit User Persona Analysis
{'='*50}
Username: u/{username}
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Generated by: Reddit Persona Generator Pipeline
{'='*50}

"""
        
        with open(filename, "w", encoding="utf-8") as f:
            f.write(header + persona_text)
        
        print(f"✅ Persona saved to: {filename}")
        return filename
    
# Modified sections for the RedditPersonaGenerator class

    def download_avatar(self, avatar_url: str, username: str) -> Optional[Image.Image]:
        """Download and return avatar image object instead of saving to file"""
        if not avatar_url:
            return None
        
        print(f"Downloading avatar for {username}...")
        
        try:
            response = requests.get(avatar_url, timeout=10)
            if response.status_code == 200:
                avatar_img = Image.open(BytesIO(response.content))
                print(f"✅ Avatar downloaded successfully")
                return avatar_img
            else:
                print(f"⚠️ Could not download avatar: HTTP {response.status_code}")
        except Exception as e:
            print(f"⚠️ Could not download avatar: {e}")
        
        return None
    
    def extract_complete_persona_content(self, persona_file: str) -> Dict:
        """
        Extract ALL content from persona file in structured format
        
        Args:
            persona_file: Path to persona text file
            
        Returns:
            Dictionary containing all structured content
        """
        print(f"Extracting complete persona content from {persona_file}...")
        
        try:
            with open(persona_file, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Extract header information
            header_info = {}
            username_match = re.search(r'Username:\s*(.+)', content)
            if username_match:
                header_info['username'] = username_match.group(1).strip()
            
            date_match = re.search(r'Generated:\s*(.+)', content)
            if date_match:
                header_info['generated_date'] = date_match.group(1).strip()
            
            # Extract user context stats
            stats_info = {}
            stats_patterns = {
                'total_posts': r'Total Posts:\s*(\d+)',
                'total_comments': r'Total Comments:\s*(\d+)',
                'post_karma': r'Post Karma:\s*(\d+)',
                'comment_karma': r'Comment Karma:\s*(\d+)',
                'most_active_subreddits': r'Most Active Subreddits:\s*(.+)'
            }
            
            for key, pattern in stats_patterns.items():
                match = re.search(pattern, content)
                if match:
                    if key == 'most_active_subreddits':
                        stats_info[key] = match.group(1).strip()
                    else:
                        stats_info[key] = int(match.group(1))
            
            # Extract persona sections with complete content
            persona_sections = {}
            
            # Find the start of persona content (after "REDDIT CONTENT:" or similar)
            persona_start = content.find('Generate a detailed user persona')
            if persona_start == -1:
                persona_start = content.find('Name:')
            
            if persona_start != -1:
                persona_content = content[persona_start:]
                
                # Define section patterns to capture everything
                section_patterns = {
                    'Name': r'Name:\s*([^:]+?)(?=Age:|$)',
                    'Age': r'Age:\s*([^:]+?)(?=Occupation:|$)',
                    'Occupation': r'Occupation:\s*([^:]+?)(?=Interests:|$)',
                    'Interests': r'Interests:\s*([^:]+?)(?=Motivations:|$)',
                    'Motivations': r'Motivations:\s*([^:]+?)(?=Habits:|$)',
                    'Habits': r'Habits:\s*([^:]+?)(?=Frustrations:|$)',
                    'Frustrations': r'Frustrations:\s*([^:]+?)(?=Personality:|$)',
                    'Personality': r'Personality:\s*([^:]+?)(?=Goals:|$)',
                    'Goals': r'Goals:\s*(.+?)(?=\n\n|$)'
                }
                
                for section_name, pattern in section_patterns.items():
                    match = re.search(pattern, persona_content, re.DOTALL)
                    if match:
                        content_text = match.group(1).strip()
                        # Clean up the content
                        content_text = re.sub(r'\n+', ' ', content_text)  # Replace newlines with spaces
                        content_text = re.sub(r'\s+', ' ', content_text)  # Normalize spaces
                        persona_sections[section_name] = content_text
                    else:
                        persona_sections[section_name] = 'Not specified'
            
            result = {
                'header_info': header_info,
                'stats_info': stats_info,
                'persona_sections': persona_sections
            }
            
            print(f"✅ Successfully extracted complete persona content")
            return result
            
        except Exception as e:
            print(f"❌ Error extracting persona content: {e}")
            return {
                'header_info': {},
                'stats_info': {},
                'persona_sections': {}
            }
    
    def create_enhanced_persona_card(self, persona_file: str, username: str, avatar_url: str = None) -> str:
        """
        Create a comprehensive persona card with ALL content from persona file
        
        Args:
            persona_file: Path to persona text file
            username: Reddit username
            avatar_url: Avatar URL (not file path)
            
        Returns:
            Filename of generated persona card
        """
        print(f"Creating enhanced persona card with complete content for {username}...")
        
        # Extract complete persona content
        persona_data = self.extract_complete_persona_content(persona_file)
        
        # Download avatar as image object
        avatar_img = None
        if avatar_url:
            avatar_img = self.download_avatar(avatar_url, username)
        
        # Enhanced card dimensions for more content
        card_width = 1600
        card_height = 1200
        
        # Create image with modern gradient background
        img = Image.new('RGB', (card_width, card_height), color='white')
        draw = ImageDraw.Draw(img)
        
        # Create sophisticated gradient background
        for y in range(card_height):
            ratio = y / card_height
            r = int(10 + ratio * 30)   # Dark navy to medium blue
            g = int(15 + ratio * 40)   
            b = int(35 + ratio * 80)   
            draw.line([(0, y), (card_width, y)], fill=(r, g, b))
        
        # Load fonts with better sizing
        try:
            title_font = ImageFont.truetype("arial.ttf", 36)
            section_font = ImageFont.truetype("arial.ttf", 16)
            content_font = ImageFont.truetype("arial.ttf", 12)
            username_font = ImageFont.truetype("arial.ttf", 28)
            stats_font = ImageFont.truetype("arial.ttf", 13)
            small_font = ImageFont.truetype("arial.ttf", 11)
        except:
            try:
                title_font = ImageFont.truetype("DejaVuSans-Bold.ttf", 36)
                section_font = ImageFont.truetype("DejaVuSans-Bold.ttf", 16)
                content_font = ImageFont.truetype("DejaVuSans.ttf", 12)
                username_font = ImageFont.truetype("DejaVuSans.ttf", 28)
                stats_font = ImageFont.truetype("DejaVuSans.ttf", 13)
                small_font = ImageFont.truetype("DejaVuSans.ttf", 11)
            except:
                title_font = ImageFont.load_default()
                section_font = ImageFont.load_default()
                content_font = ImageFont.load_default()
                username_font = ImageFont.load_default()
                stats_font = ImageFont.load_default()
                small_font = ImageFont.load_default()
        
        # Header section with avatar integration
        header_height = 140
        
        # Draw header background
        for y in range(header_height):
            ratio = y / header_height
            r = int(20 + ratio * 15)
            g = int(25 + ratio * 20)
            b = int(45 + ratio * 25)
            draw.line([(0, y), (card_width, y)], fill=(r, g, b))
        
        # Avatar integration in header
        avatar_size = 80
        avatar_x = 50
        avatar_y = 30
        
        if avatar_img:
            try:
                # Resize and create circular avatar
                avatar_resized = avatar_img.resize((avatar_size, avatar_size))
                
                # Create circular mask
                mask = Image.new('L', (avatar_size, avatar_size), 0)
                draw_mask = ImageDraw.Draw(mask)
                draw_mask.ellipse((0, 0, avatar_size, avatar_size), fill=255)
                
                # Apply mask
                avatar_resized.putalpha(mask)
                img.paste(avatar_resized, (avatar_x, avatar_y), avatar_resized)
                
                # Add border around avatar
                draw.ellipse([avatar_x-2, avatar_y-2, avatar_x + avatar_size+2, avatar_y + avatar_size+2], 
                            outline=(255, 255, 255), width=3)
            except Exception as e:
                print(f"⚠️ Could not process avatar: {e}")
                # Draw placeholder
                draw.ellipse([avatar_x, avatar_y, avatar_x + avatar_size, avatar_y + avatar_size], 
                            fill=(100, 100, 100), outline=(255, 255, 255), width=2)
                draw.text((avatar_x + 25, avatar_y + 30), "👤", fill='white', font=username_font)
        else:
            # Draw placeholder
            draw.ellipse([avatar_x, avatar_y, avatar_x + avatar_size, avatar_y + avatar_size], 
                        fill=(100, 100, 100), outline=(255, 255, 255), width=2)
            draw.text((avatar_x + 25, avatar_y + 30), "👤", fill='white', font=username_font)
        
        # Title and username next to avatar
        title_start_x = avatar_x + avatar_size + 30
        title_text = "Reddit User Persona Analysis"
        draw.text((title_start_x, avatar_y + 5), title_text, fill='white', font=title_font)
        
        username_text = f"u/{username}"
        draw.text((title_start_x, avatar_y + 45), username_text, fill='#64B5F6', font=username_font)
        
        # Generation date
        header_info = persona_data.get('header_info', {})
        date_text = f"Generated: {header_info.get('generated_date', 'Unknown')}"
        draw.text((title_start_x, avatar_y + 80), date_text, fill='#B0BEC5', font=stats_font)
        
        # Stats section - comprehensive display
        stats_y = header_height + 10
        stats_height = 80
        
        # Draw stats background
        draw.rectangle([20, stats_y, card_width - 20, stats_y + stats_height], 
                      fill=(25, 35, 55), outline=(70, 80, 100), width=2)
        
        # Display comprehensive stats
        stats_info = persona_data.get('stats_info', {})
        if stats_info:
            # Line 1: Post and Comment stats
            stats_line1 = f"📊 Total Posts: {stats_info.get('total_posts', 'N/A')} | Total Comments: {stats_info.get('total_comments', 'N/A')}"
            draw.text((40, stats_y + 15), stats_line1, fill='#FFD700', font=stats_font)
            
            # Line 2: Karma stats
            stats_line2 = f"⭐ Post Karma: {stats_info.get('post_karma', 'N/A')} | Comment Karma: {stats_info.get('comment_karma', 'N/A')}"
            draw.text((40, stats_y + 35), stats_line2, fill='#64B5F6', font=stats_font)
            
            # Line 3: Subreddit activity
            subreddit_text = stats_info.get('most_active_subreddits', 'N/A')
            if len(subreddit_text) > 100:
                subreddit_text = subreddit_text[:100] + "..."
            draw.text((40, stats_y + 55), f"🏛️ Most Active: {subreddit_text}", fill='#81C784', font=small_font)
        
        # Content area - Enhanced 3-column layout with complete content
        content_start_y = stats_y + stats_height + 20
        content_height = card_height - content_start_y - 60
        
        # Column configuration
        col_width = (card_width - 160) // 3
        col_margin = 50
        
        # Organize sections into 3 columns with better distribution
        persona_sections = persona_data.get('persona_sections', {})
        
        column_sections = [
            ['Name', 'Age', 'Occupation'],
            ['Interests', 'Motivations', 'Habits'],
            ['Frustrations', 'Personality', 'Goals']
        ]
        
        # Enhanced section icons
        section_icons = {
            'Name': '👤',
            'Age': '🎂',
            'Occupation': '💼',
            'Interests': '🎯',
            'Motivations': '💡',
            'Habits': '🔄',
            'Frustrations': '😤',
            'Personality': '🧠',
            'Goals': '🎯'
        }
        
        # Draw enhanced columns with complete content
        for col_idx, sections in enumerate(column_sections):
            col_x = col_margin + col_idx * (col_width + col_margin)
            
            # Draw column background with gradient effect
            for y in range(content_start_y - 15, card_height - 80):
                ratio = (y - content_start_y) / content_height
                r = int(35 + ratio * 10)
                g = int(45 + ratio * 15)
                b = int(65 + ratio * 20)
                draw.line([(col_x - 15, y), (col_x + col_width + 15, y)], fill=(r, g, b))
            
            # Column border
            draw.rectangle([col_x - 15, content_start_y - 15, col_x + col_width + 15, card_height - 80], 
                         outline=(80, 90, 110), width=2)
            
            current_y = content_start_y + 10
            
            for section in sections:
                # Check if we have space for this section
                if current_y + 100 > card_height - 100:
                    break
                    
                # Section header with icon
                icon = section_icons.get(section, '📋')
                header_text = f"{icon} {section}"
                draw.text((col_x, current_y), header_text, fill='#FFD700', font=section_font)
                current_y += 25
                
                # Section content - get complete content
                content = persona_sections.get(section, 'Not specified')
                
                # Clean content but preserve important information
                content = re.sub(r'\[([^\]]+)\]', r'(\1)', content)  # Convert citations to parentheses
                content = ' '.join(content.split())  # Clean up whitespace
                
                # Enhanced text wrapping with better content fitting
                wrapped_lines = []
                words = content.split()
                current_line = ""
                
                for word in words:
                    test_line = current_line + (" " if current_line else "") + word
                    bbox = draw.textbbox((0, 0), test_line, font=content_font)
                    text_width = bbox[2] - bbox[0]
                    
                    if text_width <= col_width - 25:
                        current_line = test_line
                    else:
                        if current_line:
                            wrapped_lines.append(current_line)
                            current_line = word
                        else:
                            # Handle very long words
                            if len(word) > 30:
                                word = word[:30] + "..."
                            wrapped_lines.append(word)
                            current_line = ""
                
                if current_line:
                    wrapped_lines.append(current_line)
                
                # Calculate available space for this section
                space_for_section = min(150, (card_height - 100 - current_y) // len(sections) if len(sections) > 0 else 150)
                max_lines = max(3, space_for_section // 18)
                
                # Draw wrapped text with optimal spacing
                for i, line in enumerate(wrapped_lines[:max_lines]):
                    if current_y + 18 > card_height - 100:
                        break
                    draw.text((col_x + 5, current_y), line, fill='white', font=content_font)
                    current_y += 18
                
                # Add continuation indicator if content was truncated
                if len(wrapped_lines) > max_lines:
                    draw.text((col_x + 5, current_y), "...", fill='#888888', font=content_font)
                    current_y += 18
                
                current_y += 15  # Space between sections
        
        # Enhanced footer
        footer_y = card_height - 25
        footer_text = f"Generated by Reddit Persona Generator Pipeline | {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        footer_bbox = draw.textbbox((0, 0), footer_text, font=small_font)
        footer_width = footer_bbox[2] - footer_bbox[0]
        draw.text((card_width//2 - footer_width//2, footer_y), footer_text, fill='#888888', font=small_font)
        
        # Save the enhanced card
        card_filename = f"{username}_persona_card.png"
        img.save(card_filename)
        
        print(f"✅ Enhanced persona card with complete content saved: {card_filename}")
        return card_filename
    
    def process_user(self, url_or_username: str) -> Dict:
        """
        Process a single user through the entire pipeline - MODIFIED VERSION
        """
        print(f"\n{'='*60}")
        print(f" Processing: {url_or_username}")
        print(f"{'='*60}")
        
        # Extract username from URL
        username = self.extract_username_from_url(url_or_username)
        print(f"Username extracted: {username}")
        
        # Step 1: Fetch user data
        user_data = self.fetch_user_data(username)
        
        if 'error' in user_data:
            return {
                'username': username,
                'success': False,
                'error': user_data['error']
            }
        
        # Check if we have enough data
        total_content = len(user_data['posts']) + len(user_data['comments'])
        if total_content == 0:
            return {
                'username': username,
                'success': False,
                'error': 'No posts or comments found'
            }
        
        # Step 2: Generate persona
        persona_text = self.generate_persona(user_data)
        
        # Step 3: Save persona to file
        persona_file = self.save_to_file(persona_text, username)
        
        # Step 4: Create enhanced persona card (no separate avatar file)
        card_file = self.create_enhanced_persona_card(persona_file, username, user_data.get('avatar_url'))
        
        result = {
            'username': username,
            'success': True,
            'persona_file': persona_file,
            'card_file': card_file,
            'avatar_url': user_data.get('avatar_url'),  # Keep URL for reference
            'stats': {
                'posts': len(user_data['posts']),
                'comments': len(user_data['comments']),
                'post_karma': user_data['karma']['post_karma'],
                'comment_karma': user_data['karma']['comment_karma'],
                'subreddits': len(user_data['subreddit_activity'])
            }
        }
        
        print(f"✅ Successfully processed {username}")
        return result
        
    def run_pipeline(self):
            """Main pipeline execution"""
            print("Reddit User Persona Generator Pipeline")
            print("="*50)
            
            # Get user input
            input_urls = input("Paste Reddit profile URLs (comma-separated):\n").strip()
            
            if not input_urls:
                print("❌ No URLs provided. Exiting.")
                return
            
            # Parse URLs
            urls = [url.strip() for url in input_urls.split(",") if url.strip()]
            
            if not urls:
                print("❌ No valid URLs found. Exiting.")
                return
            
            print(f"Found {len(urls)} URL(s) to process")
            
            results = []
            
            # Process each user
            for i, url in enumerate(urls, 1):
                print(f"\nProcessing {i}/{len(urls)}: {url}")
                
                try:
                    result = self.process_user(url)
                    results.append(result)
                    
                    # Add delay between requests to avoid rate limiting
                    if i < len(urls):
                        print("Waiting 3 seconds before next request...")
                        time.sleep(3)
                        
                except Exception as e:
                    print(f"❌ Error processing {url}: {e}")
                    results.append({
                        'username': url,
                        'success': False,
                        'error': str(e)
                    })
            
            # Display final results
            print(f"\n{'='*60}")
            print("PIPELINE RESULTS")
            print(f"{'='*60}")
            
            successful = 0
            failed = 0
            
            for result in results:
                if result['success']:
                    successful += 1
                    stats = result['stats']
                    print(f"✅ {result['username']}: {stats['posts']} posts, {stats['comments']} comments")
                    print(f" Files: {result['persona_file']}, {result['card_file']}")
                    if result['avatar_file']:
                        print(f"  Avatar: {result['avatar_file']}")
                else:
                    failed += 1
                    print(f"❌ {result['username']}: {result['error']}")
            
            print(f"\n Summary: {successful} successful, {failed} failed")
            print(" Pipeline completed!")

# Main execution
if __name__ == "__main__":
    
    # Initialize and run pipeline
    try:
        generator = RedditPersonaGenerator()
        generator.run_pipeline()
    except KeyboardInterrupt:
        print("\n Pipeline interrupted by user")
    except Exception as e:
        print(f"\n Pipeline error: {e}")

🔧 Testing API connections...
✅ Reddit API connected successfully
✅ Groq API connected successfully
Reddit User Persona Generator Pipeline


Paste Reddit profile URLs (comma-separated):
 https://www.reddit.com/user/VR2005


Found 1 URL(s) to process

Processing 1/1: https://www.reddit.com/user/VR2005

 Processing: https://www.reddit.com/user/VR2005
Username extracted: VR2005
Fetching data for u/VR2005...
✅ User found: VR2005 (ID: i8klxes3)
📝 Fetching posts...
Fetching comments...
✅ Fetched 50 posts and 50 comments
Generating persona with Groq AI...
✅ Persona generated successfully
✅ Persona saved to: VR2005_persona.txt
Creating enhanced persona card with complete content for VR2005...
Extracting complete persona content from VR2005_persona.txt...
✅ Successfully extracted complete persona content
Downloading avatar for VR2005...
⚠️ Could not download avatar: name 'BytesIO' is not defined
✅ Enhanced persona card with complete content saved: VR2005_persona_card.png
✅ Successfully processed VR2005

PIPELINE RESULTS
✅ VR2005: 50 posts, 50 comments
 Files: VR2005_persona.txt, VR2005_persona_card.png

 Pipeline error: 'avatar_file'
