In [None]:
# Enhanced Certificate Caption Generator - Dependencies Installation
# This cell installs all required packages for the enhanced version

# Core dependencies
!pip install torch transformers sentence-transformers
!pip install pytesseract easyocr opencv-python-headless
!pip install PyMuPDF pillow numpy requests
!pip install pdf2image streamlit gradio
!pip install python-dotenv textblob

# Try to install system dependencies (works in Colab)
try:
    !apt-get update -qq
    !apt-get install -y tesseract-ocr poppler-utils
    print("✅ System dependencies installed successfully")
except:
    print("⚠️ Could not install system dependencies (normal for local environments)")
    print("📋 For local use, install manually:")
    print("   - Tesseract OCR: https://github.com/tesseract-ocr/tesseract")
    print("   - Poppler: https://poppler.freedesktop.org/")

In [None]:
# Enhanced Certificate to Caption Generator - Core Implementation
# Simplified version to avoid dependency conflicts

import os
import re
import io
import sys
import json
import base64
import logging
import tempfile
import requests
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Union
from dataclasses import dataclass, asdict

# Core libraries
import cv2
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter
import fitz  # PyMuPDF

# OCR libraries
import pytesseract
try:
    import easyocr
    EASYOCR_AVAILABLE = True
    print("✅ EasyOCR available")
except ImportError:
    EASYOCR_AVAILABLE = False
    print("⚠️ EasyOCR not available - using PyTesseract only")

# PDF processing
from pdf2image import convert_from_path, convert_from_bytes

# Simplified ML alternatives
try:
    from textblob import TextBlob
    TEXTBLOB_AVAILABLE = True
    print("✅ TextBlob available for NLP")
except ImportError:
    TEXTBLOB_AVAILABLE = False
    print("⚠️ TextBlob not available - using basic text processing")

# Environment detection and file handling
try:
    from google.colab import files as colab_files
    COLAB_ENV = True
    print("📱 Running in Google Colab")
except ImportError:
    COLAB_ENV = False
    print("💻 Running in local environment")

try:
    import streamlit as st
    STREAMLIT_AVAILABLE = True
    print("✅ Streamlit available")
except ImportError:
    STREAMLIT_AVAILABLE = False

try:
    import gradio as gr
    GRADIO_AVAILABLE = True
    print("✅ Gradio available")
except ImportError:
    GRADIO_AVAILABLE = False

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class CertificateAnalysis:
    """Data class for certificate analysis results"""
    title: str = ""
    organization: str = ""
    recipient_name: str = ""
    completion_status: str = ""
    skills_covered: List[str] = None
    duration: str = ""
    date_issued: str = ""
    certificate_type: str = ""
    confidence_score: float = 0.0
    industry: str = ""
    
    def __post_init__(self):
        if self.skills_covered is None:
            self.skills_covered = []

@dataclass
class CaptionTemplate:
    """Data class for caption templates"""
    name: str
    style: str
    opening: List[str]
    achievement_templates: Dict[str, str]
    value_statements: List[str]
    call_to_actions: List[str]
    hashtag_style: str

class EnhancedCertificateAnalyzer:
    """Enhanced certificate analyzer with multiple OCR engines and advanced features"""
    
    def __init__(self, use_easyocr: bool = True):
        self.use_easyocr = use_easyocr and EASYOCR_AVAILABLE
        
        # Initialize OCR engines
        self.pytesseract_config = r'--oem 3 --psm 6 -l eng'
        if self.use_easyocr:
            try:
                self.easyocr_reader = easyocr.Reader(['en'])
                print("✅ EasyOCR initialized successfully")
            except Exception as e:
                print(f"❌ EasyOCR initialization failed: {e}")
                self.use_easyocr = False
        
        # Enhanced keywords and patterns
        self.certificate_keywords = {
            'completion': [
                'completed', 'finished', 'successfully completed', 'accomplished', 
                'certified', 'graduated', 'achieved', 'earned', 'obtained',
                'passed', 'fulfilled', 'mastered'
            ],
            'course_types': [
                'course', 'training', 'program', 'workshop', 'certification',
                'bootcamp', 'seminar', 'webinar', 'masterclass', 'diploma',
                'degree', 'certificate', 'specialization', 'nanodegree'
            ],
            'organization_indicators': [
                'organized by', 'conducted by', 'hosted by', 'presented by',
                'issued by', 'from', 'by', 'offered by', 'provided by'
            ],
            'skill_indicators': [
                'skills', 'learned', 'covered', 'topics', 'subjects', 'modules',
                'curriculum', 'competencies', 'proficiency', 'expertise',
                'knowledge', 'training in', 'specialization in'
            ],
            'duration_patterns': [
                r'(\d+)\s*(hour|hr|hours|hrs)',
                r'(\d+)\s*(week|weeks|wk|wks)',
                r'(\d+)\s*(month|months|mon|mos)',
                r'(\d+)\s*(day|days)',
                r'(\d+)\s*(year|years|yr|yrs)'
            ]
        }
        
        # Industry-specific hashtag mappings
        self.industry_hashtags = {
            'technology': ['#TechSkills', '#Programming', '#SoftwareDevelopment', '#Innovation', '#DigitalTransformation'],
            'data_science': ['#DataScience', '#MachineLearning', '#Analytics', '#BigData', '#AI'],
            'design': ['#Design', '#UXDesign', '#CreativeSkills', '#VisualDesign', '#UserExperience'],
            'business': ['#BusinessSkills', '#Leadership', '#Management', '#Strategy', '#Entrepreneurship'],
            'marketing': ['#DigitalMarketing', '#MarketingStrategy', '#SocialMedia', '#ContentMarketing', '#SEO'],
            'finance': ['#Finance', '#FinTech', '#Investment', '#Analysis', '#Accounting'],
            'healthcare': ['#Healthcare', '#MedicalTraining', '#PatientCare', '#HealthTech', '#Medicine'],
            'education': ['#Education', '#Teaching', '#LearningAndDevelopment', '#Training', '#EdTech'],
            'general': ['#ProfessionalDevelopment', '#SkillBuilding', '#CareerGrowth', '#LearningJourney']
        }
        
        # Load caption templates
        self._load_caption_templates()
    
    def _load_caption_templates(self):
        """Load different caption style templates"""
        self.caption_templates = {
            'professional': CaptionTemplate(
                name="Professional",
                style="formal",
                opening=["I'm pleased to share", "I'm proud to announce", "Excited to share"],
                achievement_templates={
                    'course': "I have successfully completed the {title} course{organization_text}.",
                    'workshop': "I participated in the {title} workshop{organization_text}.",
                    'certification': "I have earned certification in {title}{organization_text}."
                },
                value_statements=[
                    "This achievement strengthens my professional capabilities and expertise.",
                    "The knowledge gained will be valuable in delivering exceptional results.",
                    "This learning experience enhances my ability to contribute effectively to projects."
                ],
                call_to_actions=[
                    "Looking forward to applying these skills professionally.",
                    "Ready to contribute with enhanced expertise.",
                    "Excited to leverage this knowledge in future endeavors."
                ],
                hashtag_style="professional"
            ),
            'enthusiastic': CaptionTemplate(
                name="Enthusiastic",
                style="casual",
                opening=["Hey LinkedIn! 🎉", "Thrilled to share! 🚀", "Amazing news! ✨"],
                achievement_templates={
                    'course': "🎓 Just crushed the {title} course{organization_text}! 💪",
                    'workshop': "🎯 Had an incredible time at the {title} workshop{organization_text}! 🔥",
                    'certification': "🏆 Officially certified in {title}{organization_text}! 🎊"
                },
                value_statements=[
                    "This journey has been absolutely transformative! 🌟",
                    "Can't wait to put these amazing skills to work! 💡",
                    "Feeling more confident and ready to tackle new challenges! 💪"
                ],
                call_to_actions=[
                    "Bring on the exciting projects! 🚀",
                    "Ready to make some magic happen! ✨",
                    "Let's connect and create something awesome! 🤝"
                ],
                hashtag_style="enthusiastic"
            ),
            'technical': CaptionTemplate(
                name="Technical",
                style="detailed",
                opening=["Technical milestone achieved", "Completed advanced training in", "Enhanced technical proficiency in"],
                achievement_templates={
                    'course': "Successfully completed comprehensive training in {title}{organization_text}.",
                    'workshop': "Participated in intensive {title} workshop{organization_text}.",
                    'certification': "Achieved professional certification in {title}{organization_text}."
                },
                value_statements=[
                    "This training provides practical skills directly applicable to complex technical challenges.",
                    "The curriculum covered industry best practices and cutting-edge methodologies.",
                    "Gained hands-on experience with tools and frameworks essential for modern development."
                ],
                call_to_actions=[
                    "Ready to implement these methodologies in real-world projects.",
                    "Excited to contribute to technically challenging initiatives.",
                    "Looking forward to collaborating on innovative solutions."
                ],
                hashtag_style="technical"
            )
        }
    
    def enhance_image(self, image: Image.Image) -> Image.Image:
        """Enhanced image preprocessing for better OCR results"""
        try:
            # Convert to numpy array
            img_array = np.array(image)
            
            # Convert to grayscale if not already
            if len(img_array.shape) == 3:
                gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
            else:
                gray = img_array
            
            # Apply noise reduction
            denoised = cv2.fastNlMeansDenoising(gray)
            
            # Enhance contrast
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            enhanced = clahe.apply(denoised)
            
            # Apply sharpening
            kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
            sharpened = cv2.filter2D(enhanced, -1, kernel)
            
            # Convert back to PIL Image
            return Image.fromarray(sharpened)
            
        except Exception as e:
            logger.warning(f"Image enhancement failed: {e}")
            return image
    
    def extract_text_pytesseract(self, image: Image.Image) -> Tuple[str, float]:
        """Extract text using PyTesseract with confidence scoring"""
        try:
            enhanced_image = self.enhance_image(image)
            
            # Get text with confidence data
            data = pytesseract.image_to_data(enhanced_image, config=self.pytesseract_config, output_type=pytesseract.Output.DICT)
            
            # Filter confident text
            confidences = [int(conf) for conf in data['conf'] if int(conf) > 30]
            texts = [data['text'][i] for i, conf in enumerate(data['conf']) if int(conf) > 30 and data['text'][i].strip()]
            
            text = ' '.join(texts)
            avg_confidence = np.mean(confidences) if confidences else 0
            
            return text, avg_confidence / 100.0
            
        except Exception as e:
            logger.error(f"PyTesseract extraction failed: {e}")
            return "", 0.0
    
    def extract_text_easyocr(self, image: Image.Image) -> Tuple[str, float]:
        """Extract text using EasyOCR with confidence scoring"""
        if not self.use_easyocr:
            return "", 0.0
        
        try:
            enhanced_image = self.enhance_image(image)
            img_array = np.array(enhanced_image)
            
            results = self.easyocr_reader.readtext(img_array, detail=1, paragraph=True)
            
            texts = []
            confidences = []
            
            for (bbox, text, confidence) in results:
                if confidence > 0.3:  # Filter low confidence results
                    texts.append(text)
                    confidences.append(confidence)
            
            combined_text = ' '.join(texts)
            avg_confidence = np.mean(confidences) if confidences else 0
            
            return combined_text, avg_confidence
            
        except Exception as e:
            logger.error(f"EasyOCR extraction failed: {e}")
            return "", 0.0
    
    def extract_text_from_image(self, image_path: str) -> Dict:
        """Extract text from image using multiple OCR engines"""
        try:
            image = Image.open(image_path)
            results = {}
            
            # Try PyTesseract
            pytesseract_text, pytesseract_conf = self.extract_text_pytesseract(image)
            results['pytesseract'] = {'text': pytesseract_text, 'confidence': pytesseract_conf}
            
            # Try EasyOCR if available
            if self.use_easyocr:
                easyocr_text, easyocr_conf = self.extract_text_easyocr(image)
                results['easyocr'] = {'text': easyocr_text, 'confidence': easyocr_conf}
            
            # Choose best result
            best_engine = 'pytesseract'
            best_confidence = pytesseract_conf
            
            if self.use_easyocr and easyocr_conf > pytesseract_conf:
                best_engine = 'easyocr'
                best_confidence = easyocr_conf
            
            return {
                'text': results[best_engine]['text'],
                'confidence': best_confidence,
                'engine_used': best_engine,
                'all_results': results
            }
            
        except Exception as e:
            logger.error(f"Image text extraction failed: {e}")
            return {'text': '', 'confidence': 0.0, 'engine_used': 'none', 'all_results': {}}
    
    def extract_text_from_pdf(self, pdf_path: str) -> Dict:
        """Extract text from PDF using multiple methods"""
        try:
            # First try direct text extraction
            doc = fitz.open(pdf_path)
            direct_text = ""
            for page in doc:
                direct_text += page.get_text() + "\n"
            doc.close()
            
            if direct_text.strip():
                return {
                    'text': direct_text,
                    'confidence': 0.95,
                    'method': 'direct_extraction',
                    'all_results': {'direct': direct_text}
                }
            
            # If no direct text, use OCR on converted images
            logger.info("No direct text found, using OCR on PDF pages...")
            
            try:
                images = convert_from_path(pdf_path, dpi=300)
            except Exception:
                # Fallback to bytes conversion
                with open(pdf_path, 'rb') as f:
                    images = convert_from_bytes(f.read(), dpi=300)
            
            all_text = ""
            confidences = []
            
            for i, image in enumerate(images):
                temp_path = f"/tmp/page_{i}.png" if os.path.exists('/tmp') else f"page_{i}.png"
                image.save(temp_path)
                
                result = self.extract_text_from_image(temp_path)
                all_text += result['text'] + "\n"
                confidences.append(result['confidence'])
                
                # Clean up
                try:
                    os.remove(temp_path)
                except:
                    pass
            
            avg_confidence = np.mean(confidences) if confidences else 0
            
            return {
                'text': all_text,
                'confidence': avg_confidence,
                'method': 'ocr_extraction',
                'pages_processed': len(images)
            }
            
        except Exception as e:
            logger.error(f"PDF text extraction failed: {e}")
            return {'text': '', 'confidence': 0.0, 'method': 'failed', 'error': str(e)}
    
    def detect_industry(self, text: str) -> str:
        """Detect industry based on certificate content"""
        text_lower = text.lower()
        
        industry_keywords = {
            'technology': ['programming', 'coding', 'software', 'development', 'python', 'java', 'javascript', 'web', 'app', 'tech', 'computer', 'it'],
            'data_science': ['data science', 'machine learning', 'analytics', 'statistics', 'data analysis', 'ai', 'artificial intelligence', 'big data'],
            'design': ['design', 'ux', 'ui', 'graphic', 'creative', 'photoshop', 'illustrator', 'figma', 'visual'],
            'business': ['business', 'management', 'leadership', 'strategy', 'mba', 'entrepreneurship', 'project management'],
            'marketing': ['marketing', 'digital marketing', 'social media', 'seo', 'content', 'advertising', 'brand'],
            'finance': ['finance', 'financial', 'accounting', 'investment', 'banking', 'economics', 'fintech'],
            'healthcare': ['healthcare', 'medical', 'nursing', 'health', 'clinical', 'patient', 'medicine'],
            'education': ['education', 'teaching', 'pedagogy', 'curriculum', 'learning', 'instruction']
        }
        
        scores = {}
        for industry, keywords in industry_keywords.items():
            score = sum(1 for keyword in keywords if keyword in text_lower)
            scores[industry] = score
        
        # Return industry with highest score, or 'general' if no clear match
        best_industry = max(scores, key=scores.get) if max(scores.values()) > 0 else 'general'
        return best_industry
    
    def analyze_certificate_content(self, text: str, confidence: float) -> CertificateAnalysis:
        """Enhanced certificate content analysis"""
        analysis = CertificateAnalysis()
        analysis.confidence_score = confidence
        
        if not text.strip():
            return analysis
        
        lines = [line.strip() for line in text.split('\n') if line.strip()]
        text_lower = text.lower()
        
        # Detect industry
        analysis.industry = self.detect_industry(text)
        
        # Extract title with improved logic
        analysis.title = self._extract_title(lines, text)
        
        # Extract organization
        analysis.organization = self._extract_organization(lines, text)
        
        # Extract recipient name
        analysis.recipient_name = self._extract_recipient_name(lines)
        
        # Extract completion status
        analysis.completion_status = self._extract_completion_status(text_lower)
        
        # Determine certificate type
        analysis.certificate_type = self._determine_certificate_type(text_lower)
        
        # Extract skills
        analysis.skills_covered = self._extract_skills_advanced(text)
        
        # Extract duration
        analysis.duration = self._extract_duration(text)
        
        # Extract date
        analysis.date_issued = self._extract_date(text)
        
        return analysis
    
    def _extract_title(self, lines: List[str], text: str) -> str:
        """Enhanced title extraction"""
        # Look for lines that are likely titles
        title_candidates = []
        
        for i, line in enumerate(lines):
            # Skip very short lines and common phrases
            if len(line) < 10:
                continue
                
            line_lower = line.lower()
            skip_phrases = ['certificate', 'awarded', 'this', 'presented', 'completion', 'successful', 'congratulations']
            
            if any(phrase in line_lower for phrase in skip_phrases):
                continue
            
            # Look for lines that might be titles
            if len(line) > 15 and len(line) < 100:
                # Give higher score to lines with title-like characteristics
                score = 0
                if line.isupper() or line.istitle():
                    score += 2
                if '"' in line or "'" in line:
                    score += 2
                if any(word in line_lower for word in ['course', 'program', 'certification', 'training']):
                    score += 1
                
                title_candidates.append((line, score, i))
        
        # Sort by score and position (prefer higher scores and earlier positions)
        title_candidates.sort(key=lambda x: (-x[1], x[2]))
        
        return title_candidates[0][0] if title_candidates else "Professional Development Program"
    
    def _extract_organization(self, lines: List[str], text: str) -> str:
        """Enhanced organization extraction"""
        org_patterns = [
            r'(?:issued by|offered by|presented by|from)\s+([^\\n]+)',
            r'([A-Z][a-zA-Z\s&,]+(?:University|Institute|College|Academy|Foundation|Company|Inc|LLC|Ltd|Corporation))',
            r'((?:[A-Z][a-z]+\s+){1,3}(?:University|Institute|College|Academy|School))'
        ]
        
        for pattern in org_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            if matches:
                return matches[0].strip()
        
        return ""
    
    def _extract_recipient_name(self, lines: List[str]) -> str:
        """Extract recipient name"""
        for i, line in enumerate(lines):
            line_lower = line.lower()
            if any(phrase in line_lower for phrase in ['this certifies', 'awarded to', 'presented to', 'hereby certifies']):
                if i + 1 < len(lines):
                    potential_name = lines[i + 1].strip()
                    # Basic name validation
                    if len(potential_name.split()) >= 2 and potential_name.replace(' ', '').isalpha():
                        return potential_name
        return ""
    
    def _extract_completion_status(self, text_lower: str) -> str:
        """Extract completion status"""
        for keyword in self.certificate_keywords['completion']:
            if keyword in text_lower:
                return 'completed'
        return 'participated'
    
    def _determine_certificate_type(self, text_lower: str) -> str:
        """Determine certificate type"""
        if any(word in text_lower for word in ['workshop', 'seminar', 'webinar']):
            return 'workshop'
        elif any(word in text_lower for word in ['course', 'training', 'program']):
            return 'course'
        elif any(word in text_lower for word in ['certification', 'certified', 'certificate']):
            return 'certification'
        else:
            return 'course'
    
    def _extract_skills_advanced(self, text: str) -> List[str]:
        """Advanced skills extraction using basic NLP"""
        skills = set()
        
        # Use TextBlob for noun phrase extraction if available
        if TEXTBLOB_AVAILABLE:
            try:
                blob = TextBlob(text)
                noun_phrases = blob.noun_phrases
                
                # Filter relevant noun phrases
                for phrase in noun_phrases:
                    phrase_clean = phrase.lower().strip()
                    if len(phrase_clean.split()) <= 3 and len(phrase_clean) > 3:
                        # Check if it looks like a skill
                        if not any(stop_word in phrase_clean for stop_word in ['the', 'this', 'that', 'with', 'from']):
                            skills.add(phrase_clean.title())
            except:
                pass
        
        # Traditional keyword-based extraction
        skill_patterns = [
            r'(?:including|covering|topics:|subjects:|modules:)\s*([^.]+)',
            r'(?:skills in|proficiency in|training in)\s+([^.]+)',
            r'(?:learn|learned|learning)\s+([^.]+)'
        ]
        
        for pattern in skill_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            for match in matches:
                # Split and clean skills
                potential_skills = re.split(r'[,;&]', match)
                for skill in potential_skills:
                    skill_clean = skill.strip().title()
                    if len(skill_clean) > 3 and len(skill_clean) < 30:
                        skills.add(skill_clean)
        
        return list(skills)[:8]  # Limit to 8 skills
    
    def _extract_duration(self, text: str) -> str:
        """Extract duration information"""
        for pattern in self.certificate_keywords['duration_patterns']:
            matches = re.findall(pattern, text, re.IGNORECASE)
            if matches:
                return f"{matches[0][0]} {matches[0][1]}"
        return ""
    
    def _extract_date(self, text: str) -> str:
        """Extract issue date"""
        date_patterns = [
            r'(\d{1,2}[/-]\d{1,2}[/-]\d{4})',
            r'(\d{4}[/-]\d{1,2}[/-]\d{1,2})',
            r'((?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4})',
            r'(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{4})'
        ]
        
        for pattern in date_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            if matches:
                return matches[0]
        return ""

# Initialize the enhanced analyzer
print("🚀 Initializing Enhanced Certificate Analyzer...")
analyzer = EnhancedCertificateAnalyzer(use_easyocr=EASYOCR_AVAILABLE)
print("✅ Enhanced Certificate Analyzer ready!")

In [None]:
# Enhanced Caption Generation System
class CaptionGenerator:
    """Advanced caption generator with multiple styles and platforms"""
    
    def __init__(self, analyzer: EnhancedCertificateAnalyzer):
        self.analyzer = analyzer
        self.templates = analyzer.caption_templates
    
    def generate_hashtags(self, analysis: CertificateAnalysis, platform: str = "linkedin", max_hashtags: int = 10) -> List[str]:
        """Generate relevant hashtags based on analysis and platform"""
        hashtags = set()
        
        # Add industry-specific hashtags
        industry_tags = self.analyzer.industry_hashtags.get(analysis.industry, self.analyzer.industry_hashtags['general'])
        hashtags.update(industry_tags[:3])
        
        # Add title-based hashtags
        title_words = re.findall(r'\b[A-Z][a-z]+\b', analysis.title)
        for word in title_words[:2]:
            if len(word) > 3:
                hashtags.add(f"#{word.replace(' ', '')}")
        
        # Add type-based hashtags
        if analysis.certificate_type == 'course':
            hashtags.update(["#OnlineLearning", "#ProfessionalDevelopment", "#SkillsUpgrade"])
        elif analysis.certificate_type == 'workshop':
            hashtags.update(["#Workshop", "#HandsOnLearning", "#PracticalSkills"])
        elif analysis.certificate_type == 'certification':
            hashtags.update(["#Certification", "#ProfessionalCertification", "#Achievement"])
        
        # Add skill-based hashtags
        for skill in analysis.skills_covered[:2]:
            clean_skill = re.sub(r'[^a-zA-Z0-9]', '', skill)
            if len(clean_skill) > 3:
                hashtags.add(f"#{clean_skill}")
        
        # Platform-specific adjustments
        if platform == "twitter":
            # Twitter has character limits, use shorter hashtags
            hashtags = {tag for tag in hashtags if len(tag) < 20}
            max_hashtags = min(max_hashtags, 5)
        elif platform == "instagram":
            # Instagram allows more hashtags
            hashtags.update(["#learning", "#growth", "#success", "#motivation"])
            max_hashtags = min(max_hashtags, 20)
        
        return list(hashtags)[:max_hashtags]
    
    def generate_caption(self, analysis: CertificateAnalysis, style: str = "professional", 
                        platform: str = "linkedin", include_skills: bool = True,
                        custom_message: str = "") -> str:
        """Generate platform-specific captions"""
        
        if platform == "linkedin":
            return self._generate_linkedin_caption(analysis, style, include_skills, custom_message)
        elif platform == "twitter":
            return self._generate_twitter_caption(analysis, style, include_skills)
        elif platform == "instagram":
            return self._generate_instagram_caption(analysis, style, include_skills)
        elif platform == "portfolio":
            return self._generate_portfolio_description(analysis, include_skills)
        else:
            return self._generate_linkedin_caption(analysis, style, include_skills, custom_message)
    
    def _generate_linkedin_caption(self, analysis: CertificateAnalysis, style: str, 
                                 include_skills: bool, custom_message: str) -> str:
        """Generate LinkedIn-optimized caption"""
        template = self.templates.get(style, self.templates['professional'])
        caption_parts = []
        
        # Opening
        if custom_message:
            caption_parts.append(custom_message)
        else:
            opening = np.random.choice(template.opening)
            caption_parts.append(f"{opening} 🎉")
        
        caption_parts.append("\n\n")
        
        # Achievement statement
        org_text = f" from {analysis.organization}" if analysis.organization else ""
        achievement_template = template.achievement_templates.get(analysis.certificate_type, 
                                                                template.achievement_templates['course'])
        achievement = achievement_template.format(title=analysis.title, organization_text=org_text)
        caption_parts.append(achievement)
        caption_parts.append("\n\n")
        
        # Skills section
        if include_skills and analysis.skills_covered:
            skills_text = ", ".join(analysis.skills_covered[:4])
            if style == "professional":
                skills_section = f"📚 Key areas covered: {skills_text}"
            elif style == "enthusiastic":
                skills_section = f"💡 Dove deep into: {skills_text} - mind blown! 🤯"
            else:
                skills_section = f"🔧 Technical competencies gained: {skills_text}"
            
            caption_parts.append(skills_section)
            caption_parts.append("\n\n")
        
        # Duration if available
        if analysis.duration:
            caption_parts.append(f"⏱️ Duration: {analysis.duration}")
            caption_parts.append("\n\n")
        
        # Value statement
        value_statement = np.random.choice(template.value_statements)
        caption_parts.append(value_statement)
        caption_parts.append("\n\n")
        
        # Call to action
        cta = np.random.choice(template.call_to_actions)
        caption_parts.append(cta)
        caption_parts.append("\n\n")
        
        # Hashtags
        hashtags = self.generate_hashtags(analysis, "linkedin")
        caption_parts.append(" ".join(hashtags))
        
        return "".join(caption_parts)
    
    def _generate_twitter_caption(self, analysis: CertificateAnalysis, style: str, include_skills: bool) -> str:
        """Generate Twitter-optimized caption (character limit conscious)"""
        parts = []
        
        if style == "enthusiastic":
            parts.append(f"🎉 Just completed {analysis.title}!")
        else:
            parts.append(f"✅ Completed: {analysis.title}")
        
        if analysis.organization:
            org_short = analysis.organization.split()[0] if len(analysis.organization) > 20 else analysis.organization
            parts.append(f" @{org_short}")
        
        if include_skills and analysis.skills_covered:
            skills_short = ", ".join(analysis.skills_covered[:2])
            parts.append(f"\n🔧 {skills_short}")
        
        parts.append("\n💪 Ready for new challenges!")
        
        # Add hashtags (Twitter limit)
        hashtags = self.generate_hashtags(analysis, "twitter", max_hashtags=3)
        parts.append(f"\n{' '.join(hashtags)}")
        
        caption = "".join(parts)
        
        # Ensure under 280 characters
        if len(caption) > 280:
            caption = caption[:277] + "..."
        
        return caption
    
    def _generate_instagram_caption(self, analysis: CertificateAnalysis, style: str, include_skills: bool) -> str:
        """Generate Instagram-optimized caption"""
        parts = []
        
        # Instagram loves emojis and stories
        parts.append("✨ NEW ACHIEVEMENT UNLOCKED ✨\n\n")
        
        if style == "enthusiastic":
            parts.append(f"🚀 Just crushed the {analysis.title} course! ")
        else:
            parts.append(f"🎓 Successfully completed {analysis.title}. ")
        
        if analysis.organization:
            parts.append(f"Huge thanks to {analysis.organization}! 🙏\n\n")
        
        if include_skills and analysis.skills_covered:
            parts.append("💡 What I learned:\n")
            for skill in analysis.skills_covered[:3]:
                parts.append(f"• {skill}\n")
            parts.append("\n")
        
        parts.append("This journey has been incredible! 💪 Can't wait to apply these new skills. ")
        parts.append("What's your latest learning achievement? Drop it in the comments! 👇\n\n")
        
        # Instagram allows many hashtags
        hashtags = self.generate_hashtags(analysis, "instagram", max_hashtags=15)
        parts.append(" ".join(hashtags))
        parts.append("\n\n#learning #growth #education #skills #achievement #motivation #success")
        
        return "".join(parts)
    
    def _generate_portfolio_description(self, analysis: CertificateAnalysis, include_skills: bool) -> str:
        """Generate professional portfolio description"""
        parts = []
        
        parts.append(f"**{analysis.title}**\n")
        
        if analysis.organization:
            parts.append(f"*{analysis.organization}*\n")
        
        if analysis.date_issued:
            parts.append(f"Completed: {analysis.date_issued}\n")
        elif analysis.duration:
            parts.append(f"Duration: {analysis.duration}\n")
        
        parts.append("\n")
        
        if include_skills and analysis.skills_covered:
            parts.append("**Key Competencies:**\n")
            for skill in analysis.skills_covered:
                parts.append(f"• {skill}\n")
            parts.append("\n")
        
        parts.append("This professional development program enhanced my expertise and provided ")
        parts.append("practical knowledge applicable to real-world challenges. The comprehensive ")
        parts.append("curriculum covered industry best practices and modern methodologies.")
        
        return "".join(parts)

# Enhanced File Processing Functions
class FileProcessor:
    """Handle multiple file input methods and environments"""
    
    def __init__(self, analyzer: EnhancedCertificateAnalyzer):
        self.analyzer = analyzer
    
    def process_file(self, file_input: Union[str, bytes], filename: str = "") -> Dict:
        """Process file from various input types"""
        try:
            # Determine file type and create temporary file
            temp_path = self._prepare_temp_file(file_input, filename)
            
            if not temp_path:
                return {'error': 'Could not process file input'}
            
            # Extract text based on file type
            if temp_path.lower().endswith('.pdf'):
                result = self.analyzer.extract_text_from_pdf(temp_path)
            else:
                result = self.analyzer.extract_text_from_image(temp_path)
            
            # Clean up temp file
            self._cleanup_temp_file(temp_path)
            
            if result['text'].strip():
                # Analyze content
                analysis = self.analyzer.analyze_certificate_content(result['text'], result['confidence'])
                return {
                    'success': True,
                    'analysis': analysis,
                    'extraction_info': result,
                    'raw_text': result['text']
                }
            else:
                return {
                    'error': 'No text could be extracted from the file. Please ensure the image is clear and contains readable text.',
                    'extraction_info': result
                }
                
        except Exception as e:
            logger.error(f"File processing error: {e}")
            return {'error': f'File processing failed: {str(e)}'}
    
    def _prepare_temp_file(self, file_input: Union[str, bytes], filename: str) -> Optional[str]:
        """Prepare temporary file from various input types"""
        try:
            if isinstance(file_input, str):
                # File path or URL
                if file_input.startswith(('http://', 'https://')):
                    return self._download_file(file_input, filename)
                else:
                    return file_input if os.path.exists(file_input) else None
            
            elif isinstance(file_input, bytes):
                # Bytes data
                temp_dir = tempfile.gettempdir()
                temp_filename = filename or f"temp_cert_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
                temp_path = os.path.join(temp_dir, temp_filename)
                
                with open(temp_path, 'wb') as f:
                    f.write(file_input)
                
                return temp_path
            
            return None
            
        except Exception as e:
            logger.error(f"Temp file preparation failed: {e}")
            return None
    
    def _download_file(self, url: str, filename: str) -> Optional[str]:
        """Download file from URL"""
        try:
            response = requests.get(url, stream=True, timeout=30)
            response.raise_for_status()
            
            temp_dir = tempfile.gettempdir()
            temp_filename = filename or f"downloaded_cert_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
            temp_path = os.path.join(temp_dir, temp_filename)
            
            with open(temp_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            
            return temp_path
            
        except Exception as e:
            logger.error(f"File download failed: {e}")
            return None
    
    def _cleanup_temp_file(self, file_path: str):
        """Clean up temporary files"""
        try:
            if file_path and os.path.exists(file_path) and '/tmp/' in file_path:
                os.remove(file_path)
        except:
            pass

# Initialize components
caption_generator = CaptionGenerator(analyzer)
file_processor = FileProcessor(analyzer)

print("✅ Enhanced caption generation system ready!")
print("🎯 Features available:")
print("   • Multiple OCR engines (PyTesseract + EasyOCR)")
print("   • Advanced image preprocessing")
print("   • Multiple caption styles (Professional, Enthusiastic, Technical)")
print("   • Multi-platform support (LinkedIn, Twitter, Instagram, Portfolio)")
print("   • Industry-specific hashtags")
print("   • URL and local file support")
print("   • Enhanced error handling")

In [None]:
# Multi-Platform User Interface Functions

def process_certificate_advanced(file_input: Union[str, bytes], 
                                filename: str = "",
                                style: str = "professional",
                                platform: str = "linkedin",
                                include_skills: bool = True,
                                custom_message: str = "") -> Dict:
    """
    Advanced certificate processing with multiple options
    
    Args:
        file_input: File path, URL, or bytes data
        filename: Optional filename for bytes input
        style: Caption style ('professional', 'enthusiastic', 'technical')
        platform: Target platform ('linkedin', 'twitter', 'instagram', 'portfolio')
        include_skills: Whether to include skills in caption
        custom_message: Custom opening message
    
    Returns:
        Dict with analysis, captions, and metadata
    """
    print("🔍 Processing certificate...")
    
    # Process file
    result = file_processor.process_file(file_input, filename)
    
    if 'error' in result:
        return result
    
    analysis = result['analysis']
    
    print("📊 Generating captions...")
    
    # Generate captions for all platforms
    captions = {}
    for platform_name in ['linkedin', 'twitter', 'instagram', 'portfolio']:
        captions[platform_name] = caption_generator.generate_caption(
            analysis, style, platform_name, include_skills, custom_message if platform_name == platform else ""
        )
    
    # Generate hashtags
    hashtags = caption_generator.generate_hashtags(analysis, platform)
    
    return {
        'success': True,
        'analysis': asdict(analysis),
        'captions': captions,
        'hashtags': hashtags,
        'primary_caption': captions[platform],
        'extraction_info': result['extraction_info'],
        'raw_text': result['raw_text'][:500] + "..." if len(result['raw_text']) > 500 else result['raw_text']
    }

# Google Colab Interface
def upload_and_process_colab(style: str = "professional", platform: str = "linkedin", 
                           include_skills: bool = True, custom_message: str = ""):
    """Google Colab file upload interface"""
    if not COLAB_ENV:
        print("❌ This function is only available in Google Colab")
        return None
    
    print("📤 Please upload your certificate file (PDF, JPG, PNG, etc.):")
    uploaded = colab_files.upload()
    
    if not uploaded:
        print("❌ No file uploaded!")
        return None
    
    filename = list(uploaded.keys())[0]
    file_content = uploaded[filename]
    
    print(f"🔄 Processing: {filename}")
    return process_certificate_advanced(file_content, filename, style, platform, include_skills, custom_message)

# Local File Interface
def process_local_file(file_path: str, style: str = "professional", platform: str = "linkedin",
                      include_skills: bool = True, custom_message: str = ""):
    """Process local file"""
    if not os.path.exists(file_path):
        print(f"❌ File not found: {file_path}")
        return None
    
    print(f"🔄 Processing local file: {file_path}")
    return process_certificate_advanced(file_path, style=style, platform=platform, 
                                      include_skills=include_skills, custom_message=custom_message)

# URL Interface
def process_url(url: str, style: str = "professional", platform: str = "linkedin",
               include_skills: bool = True, custom_message: str = ""):
    """Process certificate from URL"""
    print(f"🌐 Processing URL: {url}")
    return process_certificate_advanced(url, style=style, platform=platform,
                                      include_skills=include_skills, custom_message=custom_message)

# Manual Text Input (Fallback)
def process_manual_text(text: str, title: str = "", organization: str = "",
                       style: str = "professional", platform: str = "linkedin",
                       include_skills: bool = True, custom_message: str = ""):
    """Process manually entered text (fallback option)"""
    print("📝 Processing manual text input...")
    
    # Create manual analysis
    analysis = CertificateAnalysis()
    analysis.title = title or "Professional Development Program"
    analysis.organization = organization
    analysis.completion_status = "completed"
    analysis.certificate_type = "course"
    analysis.confidence_score = 0.9
    analysis.industry = analyzer.detect_industry(text + " " + title + " " + organization)
    
    # Extract skills from text
    analysis.skills_covered = analyzer._extract_skills_advanced(text + " " + title)
    
    # Generate captions
    captions = {}
    for platform_name in ['linkedin', 'twitter', 'instagram', 'portfolio']:
        captions[platform_name] = caption_generator.generate_caption(
            analysis, style, platform_name, include_skills, custom_message if platform_name == platform else ""
        )
    
    hashtags = caption_generator.generate_hashtags(analysis, platform)
    
    return {
        'success': True,
        'analysis': asdict(analysis),
        'captions': captions,
        'hashtags': hashtags,
        'primary_caption': captions[platform],
        'manual_input': True
    }

# Streamlit Interface (if available)
def create_streamlit_app():
    """Create Streamlit web interface"""
    if not STREAMLIT_AVAILABLE:
        print("❌ Streamlit not available. Install with: pip install streamlit")
        return None
    
    st.title("🎓 Enhanced Certificate Caption Generator")
    st.write("Generate professional social media captions from your certificates!")
    
    # Sidebar options
    st.sidebar.header("Options")
    style = st.sidebar.selectbox("Caption Style", ["professional", "enthusiastic", "technical"])
    platform = st.sidebar.selectbox("Platform", ["linkedin", "twitter", "instagram", "portfolio"])
    include_skills = st.sidebar.checkbox("Include Skills", value=True)
    custom_message = st.sidebar.text_area("Custom Opening Message (optional)")
    
    # File upload
    uploaded_file = st.file_uploader("Upload Certificate", type=['pdf', 'png', 'jpg', 'jpeg'])
    
    # URL input
    url_input = st.text_input("Or enter certificate URL:")
    
    # Manual input
    with st.expander("Manual Input (Fallback)"):
        manual_text = st.text_area("Certificate Text")
        manual_title = st.text_input("Certificate Title")
        manual_org = st.text_input("Organization")
    
    if st.button("Generate Caption"):
        result = None
        
        if uploaded_file:
            file_bytes = uploaded_file.read()
            result = process_certificate_advanced(file_bytes, uploaded_file.name, style, platform, include_skills, custom_message)
        elif url_input:
            result = process_url(url_input, style, platform, include_skills, custom_message)
        elif manual_text:
            result = process_manual_text(manual_text, manual_title, manual_org, style, platform, include_skills, custom_message)
        
        if result and result.get('success'):
            st.success("✅ Caption generated successfully!")
            
            # Display analysis
            with st.expander("📊 Certificate Analysis"):
                analysis = result['analysis']
                st.write(f"**Title:** {analysis['title']}")
                st.write(f"**Organization:** {analysis['organization']}")
                st.write(f"**Industry:** {analysis['industry']}")
                st.write(f"**Skills:** {', '.join(analysis['skills_covered'])}")
                st.write(f"**Confidence:** {analysis['confidence_score']:.2%}")
            
            # Display captions
            st.subheader(f"📝 {platform.title()} Caption")
            st.text_area("Copy this caption:", result['primary_caption'], height=200)
            
            # Other platform previews
            with st.expander("🌐 Other Platform Previews"):
                for p, caption in result['captions'].items():
                    if p != platform:
                        st.subheader(f"{p.title()} Version")
                        st.text_area(f"{p}_caption", caption, height=150, key=f"{p}_preview")
        
        elif result:
            st.error(f"❌ Error: {result.get('error', 'Unknown error')}")

# Gradio Interface (if available)
def create_gradio_app():
    """Create Gradio web interface"""
    if not GRADIO_AVAILABLE:
        print("❌ Gradio not available. Install with: pip install gradio")
        return None
    
    def process_for_gradio(file, url, manual_text, manual_title, manual_org, style, platform, include_skills, custom_message):
        if file:
            result = process_certificate_advanced(file, style=style, platform=platform, include_skills=include_skills, custom_message=custom_message)
        elif url:
            result = process_url(url, style, platform, include_skills, custom_message)
        elif manual_text:
            result = process_manual_text(manual_text, manual_title, manual_org, style, platform, include_skills, custom_message)
        else:
            return "Please provide a file, URL, or manual text input.", "", ""
        
        if result and result.get('success'):
            analysis_text = f"""
**Analysis Results:**
- Title: {result['analysis']['title']}
- Organization: {result['analysis']['organization']}
- Industry: {result['analysis']['industry']}
- Skills: {', '.join(result['analysis']['skills_covered'])}
- Confidence: {result['analysis']['confidence_score']:.2%}
            """
            return result['primary_caption'], analysis_text, f"Hashtags: {' '.join(result['hashtags'])}"
        else:
            return f"Error: {result.get('error', 'Unknown error')}", "", ""
    
    iface = gr.Interface(
        fn=process_for_gradio,
        inputs=[
            gr.File(label="Upload Certificate"),
            gr.Textbox(label="Certificate URL"),
            gr.Textbox(label="Manual Text Input", lines=3),
            gr.Textbox(label="Certificate Title"),
            gr.Textbox(label="Organization"),
            gr.Dropdown(["professional", "enthusiastic", "technical"], label="Style"),
            gr.Dropdown(["linkedin", "twitter", "instagram", "portfolio"], label="Platform"),
            gr.Checkbox(label="Include Skills", value=True),
            gr.Textbox(label="Custom Message", lines=2)
        ],
        outputs=[
            gr.Textbox(label="Generated Caption", lines=10),
            gr.Textbox(label="Analysis"),
            gr.Textbox(label="Hashtags")
        ],
        title="🎓 Enhanced Certificate Caption Generator",
        description="Upload a certificate or enter details to generate professional social media captions!"
    )
    
    return iface

# Display results function
def display_results(result: Dict):
    """Display results in a formatted way"""
    if not result or not result.get('success'):
        print(f"❌ Error: {result.get('error', 'Unknown error') if result else 'No result'}")
        return
    
    analysis = result['analysis']
    
    print("\n" + "="*70)
    print("✅ CERTIFICATE ANALYSIS COMPLETED SUCCESSFULLY!")
    print("="*70)
    
    print(f"\n📊 CERTIFICATE DETAILS:")
    print("-" * 40)
    print(f"🎯 Title: {analysis['title']}")
    print(f"🏢 Organization: {analysis['organization']}")
    print(f"👤 Recipient: {analysis['recipient_name']}")
    print(f"🏭 Industry: {analysis['industry']}")
    print(f"📚 Type: {analysis['certificate_type'].title()}")
    print(f"✅ Status: {analysis['completion_status'].title()}")
    print(f"⏱️ Duration: {analysis['duration'] or 'Not specified'}")
    print(f"📅 Date: {analysis['date_issued'] or 'Not specified'}")
    print(f"🔧 Skills: {', '.join(analysis['skills_covered']) if analysis['skills_covered'] else 'Not detected'}")
    print(f"📈 Confidence: {analysis['confidence_score']:.1%}")
    
    print(f"\n🔍 EXTRACTION INFO:")
    print("-" * 40)
    extraction = result['extraction_info']
    print(f"Method: {extraction.get('method', extraction.get('engine_used', 'Unknown'))}")
    print(f"Confidence: {extraction.get('confidence', 0):.1%}")
    
    print(f"\n📝 GENERATED CAPTIONS:")
    print("="*70)
    
    for platform, caption in result['captions'].items():
        print(f"\n🌐 {platform.upper()} VERSION:")
        print("-" * 40)
        print(caption)
        print(f"\nCharacters: {len(caption)}")
    
    print(f"\n🏷️ HASHTAGS:")
    print("-" * 40)
    print(" ".join(result['hashtags']))
    
    print("\n" + "="*70)
    print("📋 READY TO COPY AND PASTE!")
    print("="*70)

print("🚀 Enhanced Certificate Caption Generator is ready!")
print("\nAvailable functions:")
print("📱 For Google Colab: upload_and_process_colab()")
print("💻 For local files: process_local_file('path/to/file')")
print("🌐 For URLs: process_url('https://example.com/cert.pdf')")
print("📝 For manual input: process_manual_text('certificate text')")
if STREAMLIT_AVAILABLE:
    print("🖥️ For Streamlit app: create_streamlit_app()")
if GRADIO_AVAILABLE:
    print("🎨 For Gradio app: create_gradio_app()")

In [None]:
# Quick Start Functions and Examples

def quick_start():
    """Quick start guide and demo"""
    print("🚀 ENHANCED CERTIFICATE CAPTION GENERATOR")
    print("="*50)
    print("\n✨ FEATURES:")
    print("• Multiple OCR engines for better text extraction")
    print("• 3 caption styles: Professional, Enthusiastic, Technical")
    print("• 4 platforms: LinkedIn, Twitter, Instagram, Portfolio")
    print("• Industry-specific hashtags")
    print("• Support for files, URLs, and manual input")
    print("• Works in Google Colab, Jupyter, and standalone")
    
    print("\n🎯 QUICK START OPTIONS:")
    print("="*50)
    
    if COLAB_ENV:
        print("📱 Google Colab - Upload file:")
        print("   result = upload_and_process_colab()")
        print("   display_results(result)")
    else:
        print("💻 Local Environment:")
        print("   # Process local file")
        print("   result = process_local_file('path/to/certificate.pdf')")
        print("   display_results(result)")
        print()
        print("   # Process from URL")
        print("   result = process_url('https://example.com/cert.pdf')")
        print("   display_results(result)")
    
    print("\n📝 Manual Input (Fallback):")
    print("   result = process_manual_text(")
    print("       text='Course completion certificate...',")
    print("       title='Data Science Bootcamp',")
    print("       organization='Tech Academy'")
    print("   )")
    print("   display_results(result)")
    
    print("\n🎨 Customization Options:")
    print("   style: 'professional', 'enthusiastic', 'technical'")
    print("   platform: 'linkedin', 'twitter', 'instagram', 'portfolio'")
    print("   include_skills: True/False")
    print("   custom_message: 'Your custom opening message'")
    
    if STREAMLIT_AVAILABLE:
        print("\n🖥️ Web Interface (Streamlit):")
        print("   app = create_streamlit_app()")
        print("   # Then run: streamlit run your_script.py")
    
    if GRADIO_AVAILABLE:
        print("\n🎨 Web Interface (Gradio):")
        print("   app = create_gradio_app()")
        print("   app.launch()")

def demo_with_sample():
    """Demo with sample certificate text"""
    sample_text = '''
    Certificate of Completion
    
    This certifies that John Doe has successfully completed the
    Data Science and Machine Learning Bootcamp
    
    Topics covered:
    Python Programming, Statistics, Machine Learning, Data Visualization,
    Deep Learning, Natural Language Processing, SQL, Git
    
    Duration: 12 weeks
    Issued by: TechAcademy Institute
    Date: October 2024
    '''
    
    print("🎯 DEMO: Processing sample certificate...")
    
    result = process_manual_text(
        text=sample_text,
        title="Data Science and Machine Learning Bootcamp",
        organization="TechAcademy Institute",
        style="enthusiastic",
        platform="linkedin"
    )
    
    if result:
        display_results(result)
    else:
        print("❌ Demo failed")

def test_all_styles():
    """Test all caption styles with sample data"""
    sample_analysis = CertificateAnalysis(
        title="Advanced Python Programming",
        organization="Code Institute",
        certificate_type="course",
        skills_covered=["Python", "Object-Oriented Programming", "API Development", "Testing"],
        industry="technology",
        confidence_score=0.95
    )
    
    print("🎨 TESTING ALL CAPTION STYLES:")
    print("="*50)
    
    styles = ["professional", "enthusiastic", "technical"]
    
    for style in styles:
        print(f"\n📝 {style.upper()} STYLE:")
        print("-" * 30)
        caption = caption_generator.generate_caption(sample_analysis, style, "linkedin")
        print(caption)
        print(f"\nLength: {len(caption)} characters")

# Environment-specific startup
def auto_start():
    """Automatically start the best interface for current environment"""
    if COLAB_ENV:
        print("🔥 AUTO-STARTING GOOGLE COLAB INTERFACE...")
        quick_start()
        print("\n🎬 To begin, run: upload_and_process_colab()")
        
    elif STREAMLIT_AVAILABLE:
        print("🔥 STREAMLIT DETECTED!")
        print("Run this to start web interface:")
        print("create_streamlit_app()")
        
    elif GRADIO_AVAILABLE:
        print("🔥 GRADIO DETECTED!")
        print("Run this to start web interface:")
        print("app = create_gradio_app()")
        print("app.launch()")
        
    else:
        print("🔥 LOCAL ENVIRONMENT DETECTED")
        quick_start()

# Utility functions
def save_caption_to_file(caption: str, filename: str = ""):
    """Save generated caption to a text file"""
    if not filename:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"linkedin_caption_{timestamp}.txt"
    
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(caption)
        print(f"✅ Caption saved to: {filename}")
    except Exception as e:
        print(f"❌ Failed to save caption: {e}")

def analyze_caption_metrics(caption: str, platform: str = "linkedin") -> Dict:
    """Analyze caption metrics for different platforms"""
    metrics = {
        'character_count': len(caption),
        'word_count': len(caption.split()),
        'hashtag_count': len(re.findall(r'#\w+', caption)),
        'emoji_count': len(re.findall(r'[😀-🙏🌀-🗿🚀-🛿⭐-⭕]', caption))
    }
    
    # Platform-specific limits
    limits = {
        'linkedin': {'max_chars': 3000, 'recommended_chars': 1300},
        'twitter': {'max_chars': 280, 'recommended_chars': 240},
        'instagram': {'max_chars': 2200, 'recommended_chars': 1500},
        'portfolio': {'max_chars': 1000, 'recommended_chars': 500}
    }
    
    platform_limits = limits.get(platform, limits['linkedin'])
    metrics['within_limit'] = metrics['character_count'] <= platform_limits['max_chars']
    metrics['optimal_length'] = metrics['character_count'] <= platform_limits['recommended_chars']
    metrics['platform'] = platform
    metrics['limits'] = platform_limits
    
    return metrics

def print_metrics(caption: str, platform: str = "linkedin"):
    """Print formatted caption metrics"""
    metrics = analyze_caption_metrics(caption, platform)
    
    print(f"\n📊 CAPTION METRICS ({platform.upper()}):")
    print("-" * 30)
    print(f"Characters: {metrics['character_count']}")
    print(f"Words: {metrics['word_count']}")
    print(f"Hashtags: {metrics['hashtag_count']}")
    print(f"Emojis: {metrics['emoji_count']}")
    print(f"Within limit: {'✅' if metrics['within_limit'] else '❌'}")
    print(f"Optimal length: {'✅' if metrics['optimal_length'] else '⚠️'}")

# Auto-run quick start
print("\n" + "="*60)
print("🎉 ENHANCED CERTIFICATE CAPTION GENERATOR LOADED!")
print("="*60)

quick_start()

print("\n🚀 Ready to process your certificates!")
print("Type quick_start() for help, or demo_with_sample() for a demo.")

In [None]:
# Demo the Enhanced System
print("🎬 DEMONSTRATING THE ENHANCED CERTIFICATE CAPTION GENERATOR")
print("="*60)

# Test with sample certificate data using the test_all_styles function
print("🎨 TESTING ALL CAPTION STYLES:")
print("="*50)

sample_analysis = CertificateAnalysis(
    title="Advanced Python Programming Bootcamp",
    organization="Code Institute",
    certificate_type="course",
    skills_covered=["Python", "Object-Oriented Programming", "API Development", "Testing", "Data Analysis"],
    industry="technology",
    confidence_score=0.95,
    completion_status="completed"
)

styles = ["professional", "enthusiastic", "technical"]

for style in styles:
    print(f"\n📝 {style.upper()} STYLE:")
    print("-" * 40)
    caption = caption_generator.generate_caption(sample_analysis, style, "linkedin", include_skills=True)
    print(caption)
    print(f"\n📊 Length: {len(caption)} characters")
    
    # Show hashtags for this style
    hashtags = caption_generator.generate_hashtags(sample_analysis, "linkedin")
    print(f"🏷️ Hashtags: {' '.join(hashtags[:5])}")

print("\n" + "="*60)
print("✅ DEMO COMPLETED SUCCESSFULLY!")
print("="*60)

print("\n🚀 TO USE THE ENHANCED SYSTEM:")
print("• For local files: process_local_file('path/to/certificate.pdf')")
print("• For URLs: process_url('https://example.com/cert.pdf')")
print("• For manual input: process_manual_text('your certificate text', 'title', 'organization')")
print("• For web interface: create_streamlit_app() or create_gradio_app()")

In [None]:
# Create and Launch Web Interface (Optional)

print("🖥️ WEB INTERFACE OPTIONS:")
print("="*40)

# Option 1: Gradio Interface (Recommended for quick demos)
print("1️⃣ GRADIO INTERFACE (Recommended)")
print("   Simple, fast, and works great for demos")
print("   Uncomment the lines below to launch:")
print()
print("   # app = create_gradio_app()")
print("   # app.launch(share=True)  # share=True creates public link")
print()

# Option 2: Streamlit Interface (Better for production)
print("2️⃣ STREAMLIT INTERFACE (Production-ready)")
print("   More features, better for hosting")
print("   Save this notebook as a .py file and run:")
print("   streamlit run your_script.py")
print()

# Quick example of processing a file
print("3️⃣ COMMAND LINE USAGE EXAMPLE:")
print("   # Process a local certificate file")
print("   # result = process_local_file('certificate.pdf', style='enthusiastic', platform='linkedin')")
print("   # display_results(result)")

print("\n🎯 FEATURES SUMMARY:")
print("✅ Multi-platform support (Google Colab, Jupyter, VS Code, Standalone)")
print("✅ Multiple OCR engines (PyTesseract + EasyOCR)")
print("✅ 3 caption styles (Professional, Enthusiastic, Technical)")
print("✅ 4 output formats (LinkedIn, Twitter, Instagram, Portfolio)")
print("✅ Industry-specific hashtags")
print("✅ Support for files, URLs, and manual input")
print("✅ Advanced image preprocessing")
print("✅ Web interfaces (Streamlit & Gradio)")
print("✅ Enhanced error handling and fallbacks")

print("\n🚀 Your enhanced certificate caption generator is ready to use!")
print("   Choose your preferred method and start generating professional captions! 🎉")