<a href="https://colab.research.google.com/github/rmehdi1/CommunityProject_Mobilize/blob/main/MobilizeNow_Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# ============================================================================
# INTERACTIVE PETITION SUCCESS PREDICTOR - STANDALONE COLAB
# ============================================================================
"""
This standalone Colab file uses your trained model and features to provide
an interactive petition success prediction interface.

SETUP INSTRUCTIONS:
1. Upload these files to your Colab environment:
   - best_model.pkl (your trained model)
   - final_features.pkl (list of feature names)
   - categorical_encoders.pkl (label encoders)
   - processed_petition_data.xlsx (for reference benchmarks)

2. Run all cells in order
3. Use the interactive form at the bottom to analyze petitions
"""

# Install required packages
!pip install ipywidgets textstat nltk scikit-learn pandas numpy matplotlib seaborn -q

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import pickle
import re
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# Text processing imports
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize, sent_tokenize
try:
    from textstat import flesch_reading_ease, flesch_kincaid_grade, gunning_fog, automated_readability_index
except ImportError:
    print("Installing textstat...")
    !pip install textstat
    from textstat import flesch_reading_ease, flesch_kincaid_grade, gunning_fog, automated_readability_index

# Download NLTK data
nltk.download('vader_lexicon', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

print("🚀 PETITION SUCCESS PREDICTOR - INTERACTIVE INTERFACE")
print("="*80)

# ============================================================================
# LOAD TRAINED MODEL AND ARTIFACTS
# ============================================================================

def load_model_artifacts():
    """Load the trained model and required artifacts"""
    artifacts = {}

    try:
        # Load trained model
        with open('best_model.pkl', 'rb') as f:
            artifacts['model'] = pickle.load(f)
        print("✅ Trained model loaded successfully")

        # Load feature names
        with open('final_features.pkl', 'rb') as f:
            artifacts['features'] = pickle.load(f)
        print(f"✅ Feature list loaded ({len(artifacts['features'])} features)")

        # Load categorical encoders
        with open('categorical_encoders.pkl', 'rb') as f:
            artifacts['encoders'] = pickle.load(f)
        print("✅ Categorical encoders loaded")

        # Try to load reference data for benchmarking
        try:
            artifacts['reference_data'] = pd.read_excel('processed_petition_data.xlsx')
            print(f"✅ Reference data loaded ({len(artifacts['reference_data'])} records)")
        except FileNotFoundError:
            print("⚠️  Reference data not found - using default benchmarks")
            artifacts['reference_data'] = None

        return artifacts

    except FileNotFoundError as e:
        print(f"❌ Error loading artifacts: {e}")
        print("\nPlease upload the following files to your Colab environment:")
        print("- best_model.pkl")
        print("- final_features.pkl")
        print("- categorical_encoders.pkl")
        print("- processed_petition_data.xlsx (optional)")
        return None

# Load artifacts
model_artifacts = load_model_artifacts()

# ============================================================================
# PETITION PROCESSING PIPELINE
# ============================================================================

class StandalonePetitionPipeline:
    """Standalone petition processing pipeline that replicates your model's feature engineering"""

    def __init__(self, model_artifacts):
        self.model = model_artifacts['model'] if model_artifacts else None
        self.feature_names = model_artifacts['features'] if model_artifacts else []
        self.encoders = model_artifacts['encoders'] if model_artifacts else {}
        self.reference_data = model_artifacts['reference_data'] if model_artifacts else None

        # Initialize sentiment analyzer
        self.sia = SentimentIntensityAnalyzer()

        # Setup keyword categories (from your analysis)
        self.setup_keywords()

    def setup_keywords(self):
        """Define keyword categories for analysis"""
        self.urgency_keywords = [
            'urgent', 'immediate', 'immediately', 'now', 'today', 'emergency', 'crisis',
            'deadline', 'time running out', "before it's too late", 'last chance',
            'act now', 'breaking', 'critical', 'asap', 'quickly', 'rapidly', 'soon'
        ]

        self.action_keywords = [
            'stop', 'save', 'protect', 'demand', 'fight', 'defend', 'prevent',
            'ban', 'end', 'cancel', 'reverse', 'change', 'fix', 'solve',
            'help', 'support', 'join', 'sign', 'act', 'take action', 'make',
            'force', 'require', 'ensure', 'guarantee', 'implement', 'establish'
        ]

        self.power_words = [
            'justice', 'freedom', 'rights', 'equality', 'fair', 'unfair', 'wrong',
            'illegal', 'violation', 'abuse', 'corruption', 'scandal', 'outrage',
            'discrimination', 'injustice', 'betrayal', 'exploitation', 'oppression'
        ]

        self.authority_keywords = [
            'government', 'minister', 'ministry', 'department', 'authority', 'official',
            'court', 'judge', 'police', 'administration', 'commissioner', 'director',
            'secretary', 'chief', 'president', 'prime minister', 'governor', 'congress'
        ]

        self.cta_patterns = [
            r'\bsign\s+this\b', r'\bsign\s+now\b', r'\bjoin\s+us\b', r'\bhelp\s+us\b',
            r'\btake\s+action\b', r'\bact\s+now\b', r'\bmake\s+a\s+difference\b',
            r'\bdemand\s+action\b', r'\bstop\s+this\b', r'\bforce\s+them\b'
        ]

    def clean_html(self, text):
        """Remove HTML tags and clean text"""
        if pd.isna(text) or text is None:
            return ""
        clean = re.sub('<.*?>', '', str(text))
        return ' '.join(clean.split())

    def count_html_tags(self, text):
        """Count HTML tags in text"""
        if pd.isna(text) or text is None:
            return 0
        return len(re.findall('<.*?>', str(text)))

    def count_keywords(self, text, keywords):
        """Count keyword occurrences"""
        if pd.isna(text) or text is None:
            return 0
        clean_text = self.clean_html(text).lower()
        count = 0
        for keyword in keywords:
            count += clean_text.count(keyword.lower())
        return count

    def get_sentiment_scores(self, text):
        """Get sentiment scores"""
        if pd.isna(text) or text is None:
            return {'compound': 0, 'pos': 0, 'neg': 0, 'neu': 0}
        clean_text = self.clean_html(text)
        return self.sia.polarity_scores(clean_text)

    def calculate_readability(self, text):
        """Calculate readability metrics"""
        if pd.isna(text) or len(str(text).strip()) < 10:
            return {
                'flesch_ease': 0, 'flesch_kincaid': 0, 'gunning_fog': 0,
                'automated_readability': 0, 'avg_sentence_length': 0,
                'avg_word_length': 0, 'vocab_diversity': 0, 'caps_ratio': 0
            }

        clean_text = self.clean_html(text)

        try:
            flesch_ease = flesch_reading_ease(clean_text)
            flesch_kincaid = flesch_kincaid_grade(clean_text)
            gunning_fog_score = gunning_fog(clean_text)
            automated_readability = automated_readability_index(clean_text)
        except:
            flesch_ease = flesch_kincaid = gunning_fog_score = automated_readability = 0

        # Additional metrics
        try:
            sentences = sent_tokenize(clean_text)
            words = word_tokenize(clean_text)
            avg_sentence_length = len(words) / len(sentences) if sentences else 0
            avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
            unique_words = set(word.lower() for word in words if word.isalpha())
            vocab_diversity = len(unique_words) / len(words) if words else 0
            caps_words = sum(1 for word in words if word.isupper() and len(word) > 1)
            caps_ratio = caps_words / len(words) if words else 0
        except:
            avg_sentence_length = avg_word_length = vocab_diversity = caps_ratio = 0

        return {
            'flesch_ease': flesch_ease,
            'flesch_kincaid': flesch_kincaid,
            'gunning_fog': gunning_fog_score,
            'automated_readability': automated_readability,
            'avg_sentence_length': avg_sentence_length,
            'avg_word_length': avg_word_length,
            'vocab_diversity': vocab_diversity,
            'caps_ratio': caps_ratio
        }

    def extract_features(self, petition_data):
        """Extract all features from petition data"""
        features = {}

        # Text columns to process
        text_columns = ['title', 'description', 'letter_body', 'targeting_description']

        # Process each text column
        for col in text_columns:
            if col in petition_data:
                text = petition_data[col]

                # Basic text features
                features[f'{col}_length'] = len(str(text)) if pd.notna(text) else 0
                features[f'{col}_clean_length'] = len(self.clean_html(text))
                features[f'{col}_word_count'] = len(self.clean_html(text).split()) if self.clean_html(text) else 0

                # HTML features
                if col == 'description':
                    features[f'{col}_html_tags'] = self.count_html_tags(text)

                # Keyword counts
                features[f'{col}_urgency_count'] = self.count_keywords(text, self.urgency_keywords)
                features[f'{col}_action_count'] = self.count_keywords(text, self.action_keywords)
                features[f'{col}_power_count'] = self.count_keywords(text, self.power_words)
                features[f'{col}_authority_count'] = self.count_keywords(text, self.authority_keywords)

                # Boolean keyword features
                features[f'{col}_has_urgency'] = int(features[f'{col}_urgency_count'] > 0)
                features[f'{col}_has_action'] = int(features[f'{col}_action_count'] > 0)

                # CTA detection
                cta_count = sum(len(re.findall(pattern, str(text).lower())) for pattern in self.cta_patterns) if pd.notna(text) else 0
                features[f'{col}_cta_count'] = cta_count
                features[f'{col}_has_cta'] = int(cta_count > 0)

                # Numbers and statistics
                features[f'{col}_numbers_count'] = len(re.findall(r'\d+', str(text))) if pd.notna(text) else 0
                features[f'{col}_has_statistics'] = int(bool(re.search(r'\d+%|\d+\s*(percent|million|thousand|billion)', str(text), re.IGNORECASE)) if pd.notna(text) else False)

                # Text structure
                features[f'{col}_paragraph_count'] = len([p for p in str(text).split('\n') if p.strip()]) if pd.notna(text) else 0
                features[f'{col}_question_count'] = str(text).count('?') if pd.notna(text) else 0

                # Sentiment features
                sentiment = self.get_sentiment_scores(text)
                features[f'{col}_sentiment_compound'] = sentiment['compound']
                features[f'{col}_sentiment_positive'] = sentiment['pos']
                features[f'{col}_sentiment_negative'] = sentiment['neg']
                features[f'{col}_emotional_intensity'] = sentiment['pos'] + sentiment['neg']

                # Readability features
                readability = self.calculate_readability(text)
                for metric, value in readability.items():
                    features[f'{col}_{metric}'] = value

        # Strategic composite features
        features['content_comprehensiveness_score'] = (
            features.get('title_clean_length', 0) +
            features.get('description_clean_length', 0) +
            features.get('letter_body_clean_length', 0)
        )

        # Professional sophistication score (simplified)
        desc_complexity = features.get('description_flesch_kincaid', 0)
        desc_length = features.get('description_clean_length', 0)
        html_formatting = features.get('description_html_tags', 0)

        title_complexity_norm = min(features.get('title_flesch_kincaid', 0) / 20, 1)
        desc_length_norm = min(desc_length / 2000, 1)
        html_tags_norm = min(html_formatting / 25, 1)

        features['professional_sophistication_score'] = (
            title_complexity_norm * 0.4 + desc_length_norm * 0.3 + html_tags_norm * 0.3
        )

        # Strategic urgency score
        urgency_total = features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)
        action_total = features.get('title_action_count', 0) + features.get('description_action_count', 0)
        sentiment_score = max(0, features.get('title_sentiment_compound', 0) + 1) / 2

        features['strategic_urgency_score'] = min((urgency_total + action_total) / 10 * 0.7 + sentiment_score * 0.3, 1)

        # Authority targeting score
        features['authority_targeting_score'] = (
            features.get('title_authority_count', 0) +
            features.get('description_authority_count', 0) +
            features.get('targeting_description_word_count', 0) / 10
        )

        # Message coherence score (simplified)
        features['message_coherence_score'] = 0.5  # Default value

        # Categorical features
        features['original_locale_encoded'] = self.encoders.get('original_locale', {}).get(
            petition_data.get('original_locale', 'en-IN'), 0)
        features['has_location_encoded'] = int(petition_data.get('has_location', True))

        return features

    def predict_success(self, petition_data):
        """Predict petition success probability"""
        if not self.model:
            return self._demo_prediction(petition_data)

        # Extract features
        features = self.extract_features(petition_data)

        # Create feature vector
        feature_vector = []
        for feature_name in self.feature_names:
            feature_vector.append(features.get(feature_name, 0))

        # Make prediction
        feature_array = np.array(feature_vector).reshape(1, -1)
        probability = self.model.predict_proba(feature_array)[0, 1]
        prediction = self.model.predict(feature_array)[0]

        return probability, prediction, features

    def _demo_prediction(self, petition_data):
        """Demo prediction when model is not available"""
        features = self.extract_features(petition_data)

        # Simple scoring system
        score = 0.0

        # Content length (40% weight)
        content_score = features.get('content_comprehensiveness_score', 0)
        if content_score >= 2000:
            score += 0.4
        elif content_score >= 1000:
            score += 0.25
        elif content_score >= 500:
            score += 0.15

        # HTML formatting (20% weight)
        html_tags = features.get('description_html_tags', 0)
        score += min(html_tags / 25, 1) * 0.20

        # Strategic language (25% weight)
        urgency_count = features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)
        action_count = features.get('title_action_count', 0) + features.get('description_action_count', 0)
        strategic_score = min((urgency_count + action_count) / 8, 1)
        score += strategic_score * 0.25

        # Professional sophistication (15% weight)
        prof_score = features.get('professional_sophistication_score', 0)
        score += prof_score * 0.15

        probability = min(score, 0.95)
        prediction = 1 if probability >= 0.5 else 0

        return probability, prediction, features

# Initialize pipeline
if model_artifacts:
    pipeline = StandalonePetitionPipeline(model_artifacts)
    print("✅ Petition processing pipeline initialized with trained model")
else:
    pipeline = StandalonePetitionPipeline({})
    print("⚠️  Running in demo mode - upload model files for full functionality")

# ============================================================================
# FEEDBACK GENERATION SYSTEM
# ============================================================================

def generate_detailed_feedback(petition_data, features, probability, prediction):
    """Generate comprehensive feedback and recommendations"""

    feedback = {
        'probability': probability,
        'prediction': prediction,
        'grade': '',
        'strengths': [],
        'improvements': [],
        'specific_recommendations': [],
        'metrics': {}
    }

    # Overall grade
    if probability >= 0.8:
        feedback['grade'] = "🏆 EXCELLENT (A+)"
        feedback['overall'] = "Your petition has exceptional success potential!"
    elif probability >= 0.7:
        feedback['grade'] = "🎯 VERY GOOD (A)"
        feedback['overall'] = "Your petition has strong success potential with minor optimizations."
    elif probability >= 0.6:
        feedback['grade'] = "✅ GOOD (B+)"
        feedback['overall'] = "Your petition shows good potential with some improvements needed."
    elif probability >= 0.5:
        feedback['grade'] = "📈 MODERATE (B)"
        feedback['overall'] = "Your petition has moderate potential - several improvements recommended."
    elif probability >= 0.4:
        feedback['grade'] = "⚠️ NEEDS WORK (C)"
        feedback['overall'] = "Your petition needs significant improvements to succeed."
    else:
        feedback['grade'] = "🔧 MAJOR REVISION NEEDED (D)"
        feedback['overall'] = "Your petition requires major restructuring for success."

    # Analyze specific metrics
    content_score = features.get('content_comprehensiveness_score', 0)
    html_tags = features.get('description_html_tags', 0)
    urgency_count = features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)
    action_count = features.get('title_action_count', 0) + features.get('description_action_count', 0)
    prof_score = features.get('professional_sophistication_score', 0)

    # Content analysis
    if content_score >= 2000:
        feedback['strengths'].append("✅ Excellent content comprehensiveness")
    elif content_score >= 1000:
        feedback['strengths'].append("✅ Good content length")
    else:
        feedback['improvements'].append("📝 Increase content comprehensiveness")
        feedback['specific_recommendations'].append(
            f"Expand total content to 2000+ characters (current: {content_score:.0f})"
        )

    # HTML formatting
    if html_tags >= 15:
        feedback['strengths'].append("✅ Professional HTML formatting")
    else:
        feedback['improvements'].append("🎨 Improve formatting and structure")
        feedback['specific_recommendations'].append(
            f"Add HTML formatting: <b>bold</b>, <strong>emphasis</strong>, <h3>headers</h3> (current: {html_tags} tags)"
        )

    # Strategic language
    if urgency_count >= 2:
        feedback['strengths'].append("✅ Strong urgency language")
    else:
        feedback['specific_recommendations'].append(
            "Add urgency keywords: 'immediate', 'urgent', 'critical', 'emergency'"
        )

    if action_count >= 3:
        feedback['strengths'].append("✅ Strong action-oriented language")
    else:
        feedback['specific_recommendations'].append(
            "Include more action words: 'demand', 'stop', 'implement', 'enforce'"
        )

    # Professional sophistication
    if prof_score >= 0.6:
        feedback['strengths'].append("✅ High professional sophistication")
    else:
        feedback['improvements'].append("🎓 Increase professional sophistication")
        feedback['specific_recommendations'].append(
            "Use more sophisticated language and technical terminology"
        )

    # Store metrics for display
    feedback['metrics'] = {
        'Content Length': f"{content_score:.0f} characters",
        'HTML Tags': f"{html_tags}",
        'Urgency Words': f"{urgency_count}",
        'Action Words': f"{action_count}",
        'Professional Score': f"{prof_score:.2f}",
        'Success Probability': f"{probability:.1%}"
    }

    return feedback

# ============================================================================
# INTERACTIVE USER INTERFACE
# ============================================================================

def create_petition_form():
    """Create interactive petition analysis form"""

    # Apply custom CSS styling
    display(HTML("""
    <style>
    .petition-form {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        padding: 25px;
        border-radius: 15px;
        margin: 20px 0;
        color: white;
    }
    .form-title {
        font-size: 28px;
        font-weight: bold;
        text-align: center;
        margin-bottom: 20px;
        text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
    }
    .form-subtitle {
        font-size: 16px;
        text-align: center;
        margin-bottom: 25px;
        opacity: 0.9;
    }
    .results-container {
        background: rgba(255,255,255,0.95);
        color: #333;
        padding: 25px;
        border-radius: 15px;
        margin: 20px 0;
        box-shadow: 0 10px 30px rgba(0,0,0,0.2);
    }
    .metric-card {
        background: #f8f9fa;
        padding: 15px;
        border-radius: 10px;
        margin: 10px 0;
        border-left: 4px solid #667eea;
    }
    .recommendation {
        background: #e8f5e8;
        padding: 12px;
        margin: 8px 0;
        border-radius: 8px;
        border-left: 3px solid #4CAF50;
    }
    .improvement {
        background: #fff3cd;
        padding: 12px;
        margin: 8px 0;
        border-radius: 8px;
        border-left: 3px solid #ffc107;
    }
    </style>
    """))

    # Form header
    display(HTML("""
    <div class="petition-form">
        <div class="form-title">🎯 AI-Powered Petition Success Predictor</div>
        <div class="form-subtitle">
            Get instant feedback on your petition's success potential using advanced machine learning
        </div>
    </div>
    """))

    # Create form widgets
    title_widget = widgets.Textarea(
        value='',
        placeholder='Enter your petition title (e.g., "Mandatory Installation of Oxygen Plants in All Hospitals to Save Lives")',
        description='Title:',
        layout=widgets.Layout(width='100%', height='80px'),
        style={'description_width': '120px'}
    )

    description_widget = widgets.Textarea(
        value='',
        placeholder='Enter detailed petition description with background, problem statement, and proposed solution. Use HTML formatting for better results.',
        description='Description:',
        layout=widgets.Layout(width='100%', height='200px'),
        style={'description_width': '120px'}
    )

    letter_body_widget = widgets.Textarea(
        value='',
        placeholder='Enter the letter body that will be sent to decision makers. Be specific about your demands.',
        description='Letter Body:',
        layout=widgets.Layout(width='100%', height='150px'),
        style={'description_width': '120px'}
    )

    targeting_widget = widgets.Textarea(
        value='',
        placeholder='Who is this petition targeting? (e.g., "Ministry of Health, State Governments, Hospital Administrators")',
        description='Target:',
        layout=widgets.Layout(width='100%', height='80px'),
        style={'description_width': '120px'}
    )

    # Advanced options
    locale_widget = widgets.Dropdown(
        options=[('India (en-IN)', 'en-IN'), ('United States (en-US)', 'en-US'),
                ('United Kingdom (en-GB)', 'en-GB'), ('Canada (en-CA)', 'en-CA')],
        value='en-IN',
        description='Locale:',
        style={'description_width': '120px'}
    )

    location_widget = widgets.Checkbox(
        value=True,
        description='Has Geographic Location',
        style={'description_width': '200px'}
    )

    # Buttons
    analyze_button = widgets.Button(
        description='🔍 Analyze Petition',
        button_style='primary',
        layout=widgets.Layout(width='200px', height='45px')
    )

    sample_button = widgets.Button(
        description='📝 Load Sample',
        button_style='info',
        layout=widgets.Layout(width='200px', height='45px')
    )

    clear_button = widgets.Button(
        description='🗑️ Clear Form',
        button_style='warning',
        layout=widgets.Layout(width='200px', height='45px')
    )

    # Output area
    output = widgets.Output()

    # Display form elements
    display(title_widget)
    display(description_widget)
    display(letter_body_widget)
    display(targeting_widget)

    # Advanced options in accordion
    advanced_accordion = widgets.Accordion(children=[
        widgets.VBox([locale_widget, location_widget])
    ])
    advanced_accordion.set_title(0, '⚙️ Advanced Options')
    display(advanced_accordion)

    # Button row
    button_box = widgets.HBox([analyze_button, sample_button, clear_button])
    display(button_box)
    display(output)

    # Event handlers
    def on_analyze_clicked(b):
        with output:
            clear_output()

            # Get form values
            petition_data = {
                'title': title_widget.value.strip(),
                'description': description_widget.value.strip(),
                'letter_body': letter_body_widget.value.strip(),
                'targeting_description': targeting_widget.value.strip(),
                'original_locale': locale_widget.value,
                'has_location': location_widget.value
            }

            # Validate inputs
            if not petition_data['title'] or not petition_data['description']:
                display(HTML("""
                <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 5px; padding: 15px; color: #c62828;">
                    <h3>❌ Missing Required Fields</h3>
                    <p>Please fill in at least the title and description fields.</p>
                </div>
                """))
                return

            # Show processing message
            display(HTML("""
            <div style="background: #e3f2fd; border: 1px solid #2196f3; border-radius: 5px; padding: 15px; color: #1565c0; text-align: center;">
                <h3>🔄 Analyzing Your Petition...</h3>
                <p>Please wait while we process your petition through our AI analysis pipeline.</p>
            </div>
            """))

            try:
                # Make prediction
                probability, prediction, features = pipeline.predict_success(petition_data)

                # Generate feedback
                feedback = generate_detailed_feedback(petition_data, features, probability, prediction)

                # Clear processing message and show results
                clear_output()
                display_results(feedback, features)

            except Exception as e:
                clear_output()
                display(HTML(f"""
                <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 5px; padding: 15px; color: #c62828;">
                    <h3>❌ Analysis Error</h3>
                    <p>An error occurred during analysis: {str(e)}</p>
                    <p>Please check your inputs and try again.</p>
                </div>
                """))

    def on_sample_clicked(b):
        # Load sample petition
        title_widget.value = "Mandatory Installation of Oxygen Plants in All Hospitals Above 50 Beds to Save Lives During Medical Emergencies"
        description_widget.value = """<h3><strong>URGENT: Critical Oxygen Crisis in Indian Hospitals</strong></h3>

<p>The <strong>COVID-19 pandemic</strong> has exposed a devastating gap in our healthcare infrastructure: <strong>over 85% of hospitals</strong> lack adequate oxygen generation facilities.</p>

<h3><strong>The Problem:</strong></h3>
<ul>
<li><strong>Oxygen shortage</strong> affects 2,847 hospitals nationwide</li>
<li><strong>Supply chain disruptions</strong> lead to critical delays</li>
<li><strong>Rural hospitals</strong> are disproportionately affected</li>
<li><strong>Emergency patients</strong> face life-threatening delays</li>
</ul>

<h3><strong>Our Solution:</strong></h3>
<p>We demand the <strong>Ministry of Health and Family Welfare</strong> implement immediate regulations requiring:</p>
<ul>
<li><strong>Mandatory oxygen plants</strong> in all hospitals with 50+ beds</li>
<li><strong>24-month implementation timeline</strong> with government support</li>
<li><strong>Regular audits</strong> and compliance monitoring</li>
<li><strong>Financial assistance</strong> for rural and government hospitals</li>
</ul>

<p>This initiative will <strong>save over 50,000 lives annually</strong> and ensure that no patient dies due to oxygen shortage.</p>"""

        letter_body_widget.value = """Dear Honorable Minister of Health and Family Welfare,

We urgently request your immediate intervention to address the critical oxygen shortage crisis in Indian hospitals that has claimed thousands of lives.

As healthcare facilities nationwide struggle with inadequate oxygen infrastructure, patients continue to die from preventable causes. We demand mandatory installation of oxygen generation plants in all hospitals above 50 beds capacity.

This life-saving measure requires immediate policy implementation with a 24-month compliance timeline, government financial support, and regular monitoring.

We trust in your leadership to implement this critical healthcare reform that will save countless lives and strengthen our medical infrastructure.

Sincerely,
Concerned Citizens of India"""

        targeting_widget.value = "Ministry of Health and Family Welfare, Government of India; State Health Ministers; Hospital Administration Boards; Medical Council of India"

        with output:
            clear_output()
            display(HTML("""
            <div style="background: #e8f5e8; border: 1px solid #4caf50; border-radius: 5px; padding: 15px; color: #2e7d32;">
                <h3>✅ Sample Petition Loaded</h3>
                <p>A high-quality sample petition has been loaded. You can now analyze it or modify it as needed.</p>
            </div>
            """))

    def on_clear_clicked(b):
        title_widget.value = ''
        description_widget.value = ''
        letter_body_widget.value = ''
        targeting_widget.value = ''

        with output:
            clear_output()
            display(HTML("""
            <div style="background: #fff3e0; border: 1px solid #ff9800; border-radius: 5px; padding: 15px; color: #ef6c00;">
                <h3>🗑️ Form Cleared</h3>
                <p>All form fields have been cleared. You can now enter new petition details.</p>
            </div>
            """))

    # Connect event handlers
    analyze_button.on_click(on_analyze_clicked)
    sample_button.on_click(on_sample_clicked)
    clear_button.on_click(on_clear_clicked)

def display_results(feedback, features):
    """Display comprehensive analysis results"""

    # Main results display
    display(HTML(f"""
    <div class="results-container">
        <div style="text-align: center; margin-bottom: 25px;">
            <h2 style="color: #667eea; margin-bottom: 10px;">{feedback['grade']}</h2>
            <h3 style="color: #333; margin-bottom: 15px;">Success Probability: {feedback['probability']:.1%}</h3>
            <p style="font-size: 16px; color: #666;">{feedback['overall']}</p>
        </div>

        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 25px 0;">
            {' '.join([f'<div class="metric-card"><strong>{metric}:</strong><br>{value}</div>' for metric, value in feedback['metrics'].items()])}
        </div>
    </div>
    """))

    # Strengths section
    if feedback['strengths']:
        strengths_html = '<div style="margin: 20px 0;"><h3 style="color: #4CAF50;">🌟 Strengths</h3>'
        for strength in feedback['strengths']:
            strengths_html += f'<div class="recommendation">{strength}</div>'
        strengths_html += '</div>'
        display(HTML(strengths_html))

    # Improvements section
    if feedback['improvements']:
        improvements_html = '<div style="margin: 20px 0;"><h3 style="color: #ff9800;">🔧 Areas for Improvement</h3>'
        for improvement in feedback['improvements']:
            improvements_html += f'<div class="improvement">{improvement}</div>'
        improvements_html += '</div>'
        display(HTML(improvements_html))

    # Specific recommendations
    if feedback['specific_recommendations']:
        recommendations_html = '<div style="margin: 20px 0;"><h3 style="color: #2196f3;">📋 Specific Recommendations</h3>'
        for i, rec in enumerate(feedback['specific_recommendations'], 1):
            recommendations_html += f'<div class="recommendation">{i}. {rec}</div>'
        recommendations_html += '</div>'
        display(HTML(recommendations_html))

    # Next steps
    display(HTML("""
    <div style="margin: 25px 0;">
        <h3 style="color: #667eea;">🎯 Next Steps</h3>
        <div class="recommendation">1. Implement the recommendations above</div>
        <div class="recommendation">2. Re-analyze your petition to track improvements</div>
        <div class="recommendation">3. Consider A/B testing different versions</div>
        <div class="recommendation">4. Launch when you achieve 70%+ success probability</div>
    </div>
    """))

    # Advanced metrics (collapsible)
    advanced_metrics = {
        'Content Analysis': {
            'Total Content Length': f"{features.get('content_comprehensiveness_score', 0):.0f} characters",
            'Title Length': f"{features.get('title_clean_length', 0)} characters",
            'Description Length': f"{features.get('description_clean_length', 0)} characters",
            'HTML Tags': f"{features.get('description_html_tags', 0)} tags"
        },
        'Language Analysis': {
            'Urgency Keywords': f"{features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)}",
            'Action Keywords': f"{features.get('title_action_count', 0) + features.get('description_action_count', 0)}",
            'Authority References': f"{features.get('targeting_description_authority_count', 0)}",
            'Professional Score': f"{features.get('professional_sophistication_score', 0):.3f}"
        }
    }

    # Create collapsible advanced metrics section
    advanced_html = '<details style="margin: 20px 0; padding: 15px; background: #f8f9fa; border-radius: 10px;"><summary style="cursor: pointer; font-weight: bold; color: #667eea;">🔍 Advanced Metrics (Click to expand)</summary><div style="margin-top: 15px;">'

    for category, metrics in advanced_metrics.items():
        advanced_html += f'<h4 style="color: #333; margin: 15px 0 10px 0;">{category}</h4>'
        for metric, value in metrics.items():
            advanced_html += f'<div style="margin: 5px 0; padding: 8px; background: white; border-radius: 5px;"><strong>{metric}:</strong> {value}</div>'

    advanced_html += '</div></details>'
    display(HTML(advanced_html))

# ============================================================================
# USAGE INSTRUCTIONS AND TIPS
# ============================================================================

def show_usage_tips():
    """Display usage tips and best practices"""
    display(HTML("""
    <div style="background: #e3f2fd; border-radius: 15px; padding: 25px; margin: 20px 0;">
        <h3 style="color: #1565c0; margin-top: 0;">📚 How to Use This Tool</h3>

        <h4 style="color: #1976d2;">Getting Started:</h4>
        <ol style="color: #333;">
            <li><strong>Fill in the form:</strong> Enter your petition title, description, letter body, and target audience</li>
            <li><strong>Use HTML formatting:</strong> Include tags like &lt;strong&gt;, &lt;h3&gt;, &lt;ul&gt;&lt;li&gt; in your description</li>
            <li><strong>Be strategic:</strong> Use urgency language, action words, and specific targets</li>
            <li><strong>Click "Analyze":</strong> Get instant AI-powered predictions and recommendations</li>
            <li><strong>Iterate:</strong> Improve based on feedback and re-analyze</li>
        </ol>

        <h4 style="color: #1976d2;">Tips for High Success Probability:</h4>
        <ul style="color: #333;">
            <li><strong>Content Length:</strong> Aim for 2000+ total characters across all fields</li>
            <li><strong>HTML Formatting:</strong> Use professional formatting with 15+ HTML tags</li>
            <li><strong>Strategic Language:</strong> Include urgency words like "immediate", "urgent", "critical"</li>
            <li><strong>Action Words:</strong> Use "demand", "stop", "implement", "enforce"</li>
            <li><strong>Authority Targeting:</strong> Mention specific officials, departments, or institutions</li>
            <li><strong>Statistics:</strong> Include numbers, percentages, and data when possible</li>
        </ul>

        <h4 style="color: #1976d2;">Success Benchmarks:</h4>
        <div style="background: white; padding: 15px; border-radius: 10px; margin: 15px 0;">
            <div style="margin: 8px 0;"><strong>🎯 Target:</strong> 70%+ success probability</div>
            <div style="margin: 8px 0;"><strong>📝 Content:</strong> 2000+ total characters</div>
            <div style="margin: 8px 0;"><strong>🎨 Formatting:</strong> 15+ HTML tags</div>
            <div style="margin: 8px 0;"><strong>⚡ Language:</strong> 5+ urgency/action keywords</div>
            <div style="margin: 8px 0;"><strong>🏛️ Authority:</strong> Specific targets mentioned</div>
        </div>
    </div>
    """))

# ============================================================================
# ADDITIONAL FEATURES
# ============================================================================

def create_batch_analyzer():
    """Create batch analysis interface for multiple petitions"""
    display(HTML("""
    <div style="background: #f3e5f5; border-radius: 15px; padding: 25px; margin: 20px 0;">
        <h3 style="color: #7b1fa2; margin-top: 0;">🔬 Advanced Features</h3>
        <p style="color: #333;">Additional tools for power users and organizations.</p>

        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin: 20px 0;">
            <div style="background: white; padding: 20px; border-radius: 10px; border-left: 4px solid #9c27b0;">
                <h4 style="color: #7b1fa2; margin-top: 0;">📊 A/B Testing</h4>
                <p style="color: #666; margin-bottom: 15px;">Compare multiple versions of your petition to find the best approach.</p>
                <em style="color: #999;">Feature coming soon!</em>
            </div>

            <div style="background: white; padding: 20px; border-radius: 10px; border-left: 4px solid #2196f3;">
                <h4 style="color: #1976d2; margin-top: 0;">📋 Batch Analysis</h4>
                <p style="color: #666; margin-bottom: 15px;">Analyze multiple petitions at once from CSV files.</p>
                <em style="color: #999;">Feature coming soon!</em>
            </div>

            <div style="background: white; padding: 20px; border-radius: 10px; border-left: 4px solid #4caf50;">
                <h4 style="color: #388e3c; margin-top: 0;">💾 Export Results</h4>
                <p style="color: #666; margin-bottom: 15px;">Export analysis results to PDF or CSV formats.</p>
                <em style="color: #999;">Feature coming soon!</em>
            </div>
        </div>
    </div>
    """))

# ============================================================================
# MAIN APPLICATION LAUNCH
# ============================================================================

def launch_petition_analyzer():
    """Launch the complete petition analyzer application"""

    print("🚀 LAUNCHING PETITION SUCCESS PREDICTOR")
    print("="*60)
    print(f"🤖 Model Status: {'✅ Trained Model Loaded' if model_artifacts else '🎯 Demo Mode Active'}")
    if model_artifacts:
        print(f"🔧 Features: {len(model_artifacts['features'])} features")
        print(f"📊 Model Type: {type(model_artifacts['model']).__name__}")
    else:
        print("⚠️  Upload model files for full functionality")
    print("="*60)

    # Show usage tips
    show_usage_tips()

    # Create main form
    create_petition_form()

    # Show advanced features
    create_batch_analyzer()

    return True

# Auto-launch the application
print("\n🎯 PETITION SUCCESS PREDICTOR - READY TO LAUNCH!")
print("="*60)

# Launch the analyzer
success = launch_petition_analyzer()

if success:
    print("\n✅ Application launched successfully!")
    print("📝 Fill out the form above to analyze your petition")
    print("🔍 Click 'Load Sample' to see an example")
else:
    print("\n❌ Application launch failed")
    print("Please check that all required files are uploaded")

🚀 PETITION SUCCESS PREDICTOR - INTERACTIVE INTERFACE
✅ Trained model loaded successfully
✅ Feature list loaded (74 features)
✅ Categorical encoders loaded
✅ Reference data loaded (3081 records)
✅ Petition processing pipeline initialized with trained model

🎯 PETITION SUCCESS PREDICTOR - READY TO LAUNCH!
🚀 LAUNCHING PETITION SUCCESS PREDICTOR
🤖 Model Status: ✅ Trained Model Loaded
🔧 Features: 74 features
📊 Model Type: GradientBoostingClassifier


Textarea(value='', description='Title:', layout=Layout(height='80px', width='100%'), placeholder='Enter your p…

Textarea(value='', description='Description:', layout=Layout(height='200px', width='100%'), placeholder='Enter…

Textarea(value='', description='Letter Body:', layout=Layout(height='150px', width='100%'), placeholder='Enter…

Textarea(value='', description='Target:', layout=Layout(height='80px', width='100%'), placeholder='Who is this…

Accordion(children=(VBox(children=(Dropdown(description='Locale:', options=(('India (en-IN)', 'en-IN'), ('Unit…

HBox(children=(Button(button_style='primary', description='🔍 Analyze Petition', layout=Layout(height='45px', w…

Output()


✅ Application launched successfully!
📝 Fill out the form above to analyze your petition
🔍 Click 'Load Sample' to see an example


In [7]:
# ============================================================================
# INTERACTIVE PETITION SUCCESS PREDICTOR - STANDALONE COLAB
# ============================================================================
"""
This standalone Colab file uses your trained model and features to provide
an interactive petition success prediction interface.

SETUP INSTRUCTIONS:
1. Upload these files to your Colab environment:
   - best_model.pkl (your trained model)
   - final_features.pkl (list of feature names)
   - categorical_encoders.pkl (label encoders)
   - processed_petition_data.xlsx (for reference benchmarks)

2. Run all cells in order
3. Use the interactive form at the bottom to analyze petitions
"""

# Install required packages
!pip install ipywidgets textstat nltk scikit-learn pandas numpy matplotlib seaborn -q

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import pickle
import re
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# Text processing imports
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize, sent_tokenize
try:
    from textstat import flesch_reading_ease, flesch_kincaid_grade, gunning_fog, automated_readability_index
except ImportError:
    print("Installing textstat...")
    !pip install textstat
    from textstat import flesch_reading_ease, flesch_kincaid_grade, gunning_fog, automated_readability_index

# Download NLTK data
nltk.download('vader_lexicon', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

print("🚀 PETITION SUCCESS PREDICTOR - INTERACTIVE INTERFACE")
print("="*80)

# ============================================================================
# LOAD TRAINED MODEL AND ARTIFACTS
# ============================================================================

def load_model_artifacts():
    """Load the trained model and required artifacts"""
    artifacts = {}

    try:
        # Load trained model
        with open('best_model.pkl', 'rb') as f:
            artifacts['model'] = pickle.load(f)
        print("✅ Trained model loaded successfully")

        # Load feature names
        with open('final_features.pkl', 'rb') as f:
            artifacts['features'] = pickle.load(f)
        print(f"✅ Feature list loaded ({len(artifacts['features'])} features)")

        # Load categorical encoders
        with open('categorical_encoders.pkl', 'rb') as f:
            artifacts['encoders'] = pickle.load(f)
        print("✅ Categorical encoders loaded")

        # Try to load reference data for benchmarking
        try:
            artifacts['reference_data'] = pd.read_excel('processed_petition_data.xlsx')
            print(f"✅ Reference data loaded ({len(artifacts['reference_data'])} records)")
        except FileNotFoundError:
            print("⚠️  Reference data not found - using default benchmarks")
            artifacts['reference_data'] = None

        return artifacts

    except FileNotFoundError as e:
        print(f"❌ Error loading artifacts: {e}")
        print("\nPlease upload the following files to your Colab environment:")
        print("- best_model.pkl")
        print("- final_features.pkl")
        print("- categorical_encoders.pkl")
        print("- processed_petition_data.xlsx (optional)")
        return None

# Load artifacts
model_artifacts = load_model_artifacts()

# ============================================================================
# PETITION PROCESSING PIPELINE
# ============================================================================

class StandalonePetitionPipeline:
    """Standalone petition processing pipeline that replicates your model's feature engineering"""

    def __init__(self, model_artifacts):
        self.model = model_artifacts['model'] if model_artifacts else None
        self.feature_names = model_artifacts['features'] if model_artifacts else []
        self.encoders = model_artifacts['encoders'] if model_artifacts else {}
        self.reference_data = model_artifacts['reference_data'] if model_artifacts else None

        # Initialize sentiment analyzer
        self.sia = SentimentIntensityAnalyzer()

        # Setup keyword categories (from your analysis)
        self.setup_keywords()

    def setup_keywords(self):
        """Define keyword categories for analysis"""
        self.urgency_keywords = [
            'urgent', 'immediate', 'immediately', 'now', 'today', 'emergency', 'crisis',
            'deadline', 'time running out', "before it's too late", 'last chance',
            'act now', 'breaking', 'critical', 'asap', 'quickly', 'rapidly', 'soon'
        ]

        self.action_keywords = [
            'stop', 'save', 'protect', 'demand', 'fight', 'defend', 'prevent',
            'ban', 'end', 'cancel', 'reverse', 'change', 'fix', 'solve',
            'help', 'support', 'join', 'sign', 'act', 'take action', 'make',
            'force', 'require', 'ensure', 'guarantee', 'implement', 'establish'
        ]

        self.power_words = [
            'justice', 'freedom', 'rights', 'equality', 'fair', 'unfair', 'wrong',
            'illegal', 'violation', 'abuse', 'corruption', 'scandal', 'outrage',
            'discrimination', 'injustice', 'betrayal', 'exploitation', 'oppression'
        ]

        self.authority_keywords = [
            'government', 'minister', 'ministry', 'department', 'authority', 'official',
            'court', 'judge', 'police', 'administration', 'commissioner', 'director',
            'secretary', 'chief', 'president', 'prime minister', 'governor', 'congress'
        ]

        self.cta_patterns = [
            r'\bsign\s+this\b', r'\bsign\s+now\b', r'\bjoin\s+us\b', r'\bhelp\s+us\b',
            r'\btake\s+action\b', r'\bact\s+now\b', r'\bmake\s+a\s+difference\b',
            r'\bdemand\s+action\b', r'\bstop\s+this\b', r'\bforce\s+them\b'
        ]

    def clean_html(self, text):
        """Remove HTML tags and clean text"""
        if pd.isna(text) or text is None:
            return ""
        clean = re.sub('<.*?>', '', str(text))
        return ' '.join(clean.split())

    def count_html_tags(self, text):
        """Count HTML tags in text"""
        if pd.isna(text) or text is None:
            return 0
        return len(re.findall('<.*?>', str(text)))

    def count_keywords(self, text, keywords):
        """Count keyword occurrences"""
        if pd.isna(text) or text is None:
            return 0
        clean_text = self.clean_html(text).lower()
        count = 0
        for keyword in keywords:
            count += clean_text.count(keyword.lower())
        return count

    def get_sentiment_scores(self, text):
        """Get sentiment scores"""
        if pd.isna(text) or text is None:
            return {'compound': 0, 'pos': 0, 'neg': 0, 'neu': 0}
        clean_text = self.clean_html(text)
        return self.sia.polarity_scores(clean_text)

    def calculate_readability(self, text):
        """Calculate readability metrics"""
        if pd.isna(text) or len(str(text).strip()) < 10:
            return {
                'flesch_ease': 0, 'flesch_kincaid': 0, 'gunning_fog': 0,
                'automated_readability': 0, 'avg_sentence_length': 0,
                'avg_word_length': 0, 'vocab_diversity': 0, 'caps_ratio': 0
            }

        clean_text = self.clean_html(text)

        try:
            flesch_ease = flesch_reading_ease(clean_text)
            flesch_kincaid = flesch_kincaid_grade(clean_text)
            gunning_fog_score = gunning_fog(clean_text)
            automated_readability = automated_readability_index(clean_text)
        except:
            flesch_ease = flesch_kincaid = gunning_fog_score = automated_readability = 0

        # Additional metrics
        try:
            sentences = sent_tokenize(clean_text)
            words = word_tokenize(clean_text)
            avg_sentence_length = len(words) / len(sentences) if sentences else 0
            avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
            unique_words = set(word.lower() for word in words if word.isalpha())
            vocab_diversity = len(unique_words) / len(words) if words else 0
            caps_words = sum(1 for word in words if word.isupper() and len(word) > 1)
            caps_ratio = caps_words / len(words) if words else 0
        except:
            avg_sentence_length = avg_word_length = vocab_diversity = caps_ratio = 0

        return {
            'flesch_ease': flesch_ease,
            'flesch_kincaid': flesch_kincaid,
            'gunning_fog': gunning_fog_score,
            'automated_readability': automated_readability,
            'avg_sentence_length': avg_sentence_length,
            'avg_word_length': avg_word_length,
            'vocab_diversity': vocab_diversity,
            'caps_ratio': caps_ratio
        }

    def extract_features(self, petition_data):
        """Extract all features from petition data"""
        features = {}

        # Text columns to process
        text_columns = ['title', 'description', 'letter_body', 'targeting_description']

        # Process each text column
        for col in text_columns:
            if col in petition_data:
                text = petition_data[col]

                # Basic text features
                features[f'{col}_length'] = len(str(text)) if pd.notna(text) else 0
                features[f'{col}_clean_length'] = len(self.clean_html(text))
                features[f'{col}_word_count'] = len(self.clean_html(text).split()) if self.clean_html(text) else 0

                # HTML features
                if col == 'description':
                    features[f'{col}_html_tags'] = self.count_html_tags(text)

                # Keyword counts
                features[f'{col}_urgency_count'] = self.count_keywords(text, self.urgency_keywords)
                features[f'{col}_action_count'] = self.count_keywords(text, self.action_keywords)
                features[f'{col}_power_count'] = self.count_keywords(text, self.power_words)
                features[f'{col}_authority_count'] = self.count_keywords(text, self.authority_keywords)

                # Boolean keyword features
                features[f'{col}_has_urgency'] = int(features[f'{col}_urgency_count'] > 0)
                features[f'{col}_has_action'] = int(features[f'{col}_action_count'] > 0)

                # CTA detection
                cta_count = sum(len(re.findall(pattern, str(text).lower())) for pattern in self.cta_patterns) if pd.notna(text) else 0
                features[f'{col}_cta_count'] = cta_count
                features[f'{col}_has_cta'] = int(cta_count > 0)

                # Numbers and statistics
                features[f'{col}_numbers_count'] = len(re.findall(r'\d+', str(text))) if pd.notna(text) else 0
                features[f'{col}_has_statistics'] = int(bool(re.search(r'\d+%|\d+\s*(percent|million|thousand|billion)', str(text), re.IGNORECASE)) if pd.notna(text) else False)

                # Text structure
                features[f'{col}_paragraph_count'] = len([p for p in str(text).split('\n') if p.strip()]) if pd.notna(text) else 0
                features[f'{col}_question_count'] = str(text).count('?') if pd.notna(text) else 0

                # Sentiment features
                sentiment = self.get_sentiment_scores(text)
                features[f'{col}_sentiment_compound'] = sentiment['compound']
                features[f'{col}_sentiment_positive'] = sentiment['pos']
                features[f'{col}_sentiment_negative'] = sentiment['neg']
                features[f'{col}_emotional_intensity'] = sentiment['pos'] + sentiment['neg']

                # Readability features
                readability = self.calculate_readability(text)
                for metric, value in readability.items():
                    features[f'{col}_{metric}'] = value

        # Strategic composite features
        features['content_comprehensiveness_score'] = (
            features.get('title_clean_length', 0) +
            features.get('description_clean_length', 0) +
            features.get('letter_body_clean_length', 0)
        )

        # Professional sophistication score (simplified)
        desc_complexity = features.get('description_flesch_kincaid', 0)
        desc_length = features.get('description_clean_length', 0)
        html_formatting = features.get('description_html_tags', 0)

        title_complexity_norm = min(features.get('title_flesch_kincaid', 0) / 20, 1)
        desc_length_norm = min(desc_length / 2000, 1)
        html_tags_norm = min(html_formatting / 25, 1)

        features['professional_sophistication_score'] = (
            title_complexity_norm * 0.4 + desc_length_norm * 0.3 + html_tags_norm * 0.3
        )

        # Strategic urgency score
        urgency_total = features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)
        action_total = features.get('title_action_count', 0) + features.get('description_action_count', 0)
        sentiment_score = max(0, features.get('title_sentiment_compound', 0) + 1) / 2

        features['strategic_urgency_score'] = min((urgency_total + action_total) / 10 * 0.7 + sentiment_score * 0.3, 1)

        # Authority targeting score
        features['authority_targeting_score'] = (
            features.get('title_authority_count', 0) +
            features.get('description_authority_count', 0) +
            features.get('targeting_description_word_count', 0) / 10
        )

        # Message coherence score (simplified)
        features['message_coherence_score'] = 0.5  # Default value

        # Categorical features
        # Handle original_locale encoding
        if 'original_locale' in self.encoders:
            try:
                locale_value = petition_data.get('original_locale', 'en-IN')
                # Check if it's a LabelEncoder object
                if hasattr(self.encoders['original_locale'], 'transform'):
                    # It's a LabelEncoder - use transform method
                    try:
                        features['original_locale_encoded'] = self.encoders['original_locale'].transform([locale_value])[0]
                    except ValueError:
                        # Value not seen during training - use most common value (0)
                        features['original_locale_encoded'] = 0
                else:
                    # It's a dictionary - use direct lookup
                    features['original_locale_encoded'] = self.encoders['original_locale'].get(locale_value, 0)
            except:
                features['original_locale_encoded'] = 0
        else:
            # Default encoding
            locale_map = {'en-IN': 0, 'en-US': 1, 'en-GB': 2, 'en-CA': 3, 'ja-JP': 4, 'de-DE': 5, 'it-IT': 6}
            features['original_locale_encoded'] = locale_map.get(petition_data.get('original_locale', 'en-IN'), 0)

        # Handle has_location encoding
        features['has_location_encoded'] = int(petition_data.get('has_location', True))

        return features

    def predict_success(self, petition_data):
        """Predict petition success probability"""
        if not self.model:
            return self._demo_prediction(petition_data)

        try:
            # Extract features
            features = self.extract_features(petition_data)

            # Create feature vector
            feature_vector = []
            missing_features = []

            for feature_name in self.feature_names:
                if feature_name in features:
                    feature_vector.append(features[feature_name])
                else:
                    feature_vector.append(0)  # Default value for missing features
                    missing_features.append(feature_name)

            if missing_features:
                print(f"⚠️ Warning: {len(missing_features)} features set to default values")

            # Make prediction
            feature_array = np.array(feature_vector).reshape(1, -1)

            # Ensure feature array has correct shape
            if feature_array.shape[1] != len(self.feature_names):
                raise ValueError(f"Feature array shape mismatch: {feature_array.shape[1]} vs {len(self.feature_names)}")

            probability = self.model.predict_proba(feature_array)[0, 1]
            prediction = self.model.predict(feature_array)[0]

            return probability, prediction, features

        except Exception as e:
            print(f"Model prediction failed: {e}")
            print("Falling back to demo prediction...")
            return self._demo_prediction(petition_data)

    def _demo_prediction(self, petition_data):
        """Demo prediction when model is not available"""
        features = self.extract_features(petition_data)

        # Simple scoring system
        score = 0.0

        # Content length (40% weight)
        content_score = features.get('content_comprehensiveness_score', 0)
        if content_score >= 2000:
            score += 0.4
        elif content_score >= 1000:
            score += 0.25
        elif content_score >= 500:
            score += 0.15

        # HTML formatting (20% weight)
        html_tags = features.get('description_html_tags', 0)
        score += min(html_tags / 25, 1) * 0.20

        # Strategic language (25% weight)
        urgency_count = features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)
        action_count = features.get('title_action_count', 0) + features.get('description_action_count', 0)
        strategic_score = min((urgency_count + action_count) / 8, 1)
        score += strategic_score * 0.25

        # Professional sophistication (15% weight)
        prof_score = features.get('professional_sophistication_score', 0)
        score += prof_score * 0.15

        probability = min(score, 0.95)
        prediction = 1 if probability >= 0.5 else 0

        return probability, prediction, features

# Initialize pipeline
if model_artifacts:
    pipeline = StandalonePetitionPipeline(model_artifacts)
    print("✅ Petition processing pipeline initialized with trained model")
else:
    pipeline = StandalonePetitionPipeline({})
    print("⚠️  Running in demo mode - upload model files for full functionality")

# ============================================================================
# FEEDBACK GENERATION SYSTEM
# ============================================================================

def generate_detailed_feedback(petition_data, features, probability, prediction):
    """Generate comprehensive feedback and recommendations"""

    feedback = {
        'probability': probability,
        'prediction': prediction,
        'grade': '',
        'strengths': [],
        'improvements': [],
        'specific_recommendations': [],
        'metrics': {}
    }

    # Overall grade
    if probability >= 0.8:
        feedback['grade'] = "🏆 EXCELLENT (A+)"
        feedback['overall'] = "Your petition has exceptional success potential!"
    elif probability >= 0.7:
        feedback['grade'] = "🎯 VERY GOOD (A)"
        feedback['overall'] = "Your petition has strong success potential with minor optimizations."
    elif probability >= 0.6:
        feedback['grade'] = "✅ GOOD (B+)"
        feedback['overall'] = "Your petition shows good potential with some improvements needed."
    elif probability >= 0.5:
        feedback['grade'] = "📈 MODERATE (B)"
        feedback['overall'] = "Your petition has moderate potential - several improvements recommended."
    elif probability >= 0.4:
        feedback['grade'] = "⚠️ NEEDS WORK (C)"
        feedback['overall'] = "Your petition needs significant improvements to succeed."
    else:
        feedback['grade'] = "🔧 MAJOR REVISION NEEDED (D)"
        feedback['overall'] = "Your petition requires major restructuring for success."

    # Analyze specific metrics
    content_score = features.get('content_comprehensiveness_score', 0)
    html_tags = features.get('description_html_tags', 0)
    urgency_count = features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)
    action_count = features.get('title_action_count', 0) + features.get('description_action_count', 0)
    prof_score = features.get('professional_sophistication_score', 0)

    # Content analysis
    if content_score >= 2000:
        feedback['strengths'].append("✅ Excellent content comprehensiveness")
    elif content_score >= 1000:
        feedback['strengths'].append("✅ Good content length")
    else:
        feedback['improvements'].append("📝 Increase content comprehensiveness")
        feedback['specific_recommendations'].append(
            f"Expand total content to 2000+ characters (current: {content_score:.0f})"
        )

    # HTML formatting
    if html_tags >= 15:
        feedback['strengths'].append("✅ Professional HTML formatting")
    else:
        feedback['improvements'].append("🎨 Improve formatting and structure")
        feedback['specific_recommendations'].append(
            f"Add HTML formatting: <b>bold</b>, <strong>emphasis</strong>, <h3>headers</h3> (current: {html_tags} tags)"
        )

    # Strategic language
    if urgency_count >= 2:
        feedback['strengths'].append("✅ Strong urgency language")
    else:
        feedback['specific_recommendations'].append(
            "Add urgency keywords: 'immediate', 'urgent', 'critical', 'emergency'"
        )

    if action_count >= 3:
        feedback['strengths'].append("✅ Strong action-oriented language")
    else:
        feedback['specific_recommendations'].append(
            "Include more action words: 'demand', 'stop', 'implement', 'enforce'"
        )

    # Professional sophistication
    if prof_score >= 0.6:
        feedback['strengths'].append("✅ High professional sophistication")
    else:
        feedback['improvements'].append("🎓 Increase professional sophistication")
        feedback['specific_recommendations'].append(
            "Use more sophisticated language and technical terminology"
        )

    # Store metrics for display
    feedback['metrics'] = {
        'Content Length': f"{content_score:.0f} characters",
        'HTML Tags': f"{html_tags}",
        'Urgency Words': f"{urgency_count}",
        'Action Words': f"{action_count}",
        'Professional Score': f"{prof_score:.2f}",
        'Success Probability': f"{probability:.1%}"
    }

    return feedback

# ============================================================================
# INTERACTIVE USER INTERFACE
# ============================================================================

def create_petition_form():
    """Create interactive petition analysis form"""

    # Apply custom CSS styling
    display(HTML("""
    <style>
    .petition-form {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        padding: 25px;
        border-radius: 15px;
        margin: 20px 0;
        color: white;
    }
    .form-title {
        font-size: 28px;
        font-weight: bold;
        text-align: center;
        margin-bottom: 20px;
        text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
    }
    .form-subtitle {
        font-size: 16px;
        text-align: center;
        margin-bottom: 25px;
        opacity: 0.9;
    }
    .results-container {
        background: rgba(255,255,255,0.95);
        color: #333;
        padding: 25px;
        border-radius: 15px;
        margin: 20px 0;
        box-shadow: 0 10px 30px rgba(0,0,0,0.2);
    }
    .metric-card {
        background: #f8f9fa;
        padding: 15px;
        border-radius: 10px;
        margin: 10px 0;
        border-left: 4px solid #667eea;
    }
    .recommendation {
        background: #e8f5e8;
        padding: 12px;
        margin: 8px 0;
        border-radius: 8px;
        border-left: 3px solid #4CAF50;
    }
    .improvement {
        background: #fff3cd;
        padding: 12px;
        margin: 8px 0;
        border-radius: 8px;
        border-left: 3px solid #ffc107;
    }
    </style>
    """))

    # Form header
    display(HTML("""
    <div class="petition-form">
        <div class="form-title">🎯 AI-Powered Petition Success Predictor</div>
        <div class="form-subtitle">
            Get instant feedback on your petition's success potential using advanced machine learning
        </div>
    </div>
    """))

    # Create form widgets
    title_widget = widgets.Textarea(
        value='',
        placeholder='Enter your petition title (e.g., "Mandatory Installation of Oxygen Plants in All Hospitals to Save Lives")',
        description='Title:',
        layout=widgets.Layout(width='100%', height='80px'),
        style={'description_width': '120px'}
    )

    description_widget = widgets.Textarea(
        value='',
        placeholder='Enter detailed petition description with background, problem statement, and proposed solution. Use HTML formatting for better results.',
        description='Description:',
        layout=widgets.Layout(width='100%', height='200px'),
        style={'description_width': '120px'}
    )

    letter_body_widget = widgets.Textarea(
        value='',
        placeholder='Enter the letter body that will be sent to decision makers. Be specific about your demands.',
        description='Letter Body:',
        layout=widgets.Layout(width='100%', height='150px'),
        style={'description_width': '120px'}
    )

    targeting_widget = widgets.Textarea(
        value='',
        placeholder='Who is this petition targeting? (e.g., "Ministry of Health, State Governments, Hospital Administrators")',
        description='Target:',
        layout=widgets.Layout(width='100%', height='80px'),
        style={'description_width': '120px'}
    )

    # Advanced options
    locale_widget = widgets.Dropdown(
        options=[('India (en-IN)', 'en-IN'), ('United States (en-US)', 'en-US'),
                ('United Kingdom (en-GB)', 'en-GB'), ('Canada (en-CA)', 'en-CA')],
        value='en-IN',
        description='Locale:',
        style={'description_width': '120px'}
    )

    location_widget = widgets.Checkbox(
        value=True,
        description='Has Geographic Location',
        style={'description_width': '200px'}
    )

    # Buttons
    analyze_button = widgets.Button(
        description='🔍 Analyze Petition',
        button_style='primary',
        layout=widgets.Layout(width='200px', height='45px')
    )

    sample_button = widgets.Button(
        description='📝 Load Sample',
        button_style='info',
        layout=widgets.Layout(width='200px', height='45px')
    )

    clear_button = widgets.Button(
        description='🗑️ Clear Form',
        button_style='warning',
        layout=widgets.Layout(width='200px', height='45px')
    )

    # Output area
    output = widgets.Output()

    # Display form elements
    display(title_widget)
    display(description_widget)
    display(letter_body_widget)
    display(targeting_widget)

    # Advanced options in accordion
    advanced_accordion = widgets.Accordion(children=[
        widgets.VBox([locale_widget, location_widget])
    ])
    advanced_accordion.set_title(0, '⚙️ Advanced Options')
    display(advanced_accordion)

    # Button row
    button_box = widgets.HBox([analyze_button, sample_button, clear_button])
    display(button_box)
    display(output)

    # Event handlers
    def on_analyze_clicked(b):
        with output:
            clear_output()

            # Get form values
            petition_data = {
                'title': title_widget.value.strip(),
                'description': description_widget.value.strip(),
                'letter_body': letter_body_widget.value.strip(),
                'targeting_description': targeting_widget.value.strip(),
                'original_locale': locale_widget.value,
                'has_location': location_widget.value
            }

            # Validate inputs
            if not petition_data['title'] or not petition_data['description']:
                display(HTML("""
                <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 5px; padding: 15px; color: #c62828;">
                    <h3>❌ Missing Required Fields</h3>
                    <p>Please fill in at least the title and description fields.</p>
                </div>
                """))
                return

            # Show processing message
            display(HTML("""
            <div style="background: #e3f2fd; border: 1px solid #2196f3; border-radius: 5px; padding: 15px; color: #1565c0; text-align: center;">
                <h3>🔄 Analyzing Your Petition...</h3>
                <p>Please wait while we process your petition through our AI analysis pipeline.</p>
            </div>
            """))

            try:
                # Make prediction
                probability, prediction, features = pipeline.predict_success(petition_data)

                # Generate feedback
                feedback = generate_detailed_feedback(petition_data, features, probability, prediction)

                # Clear processing message and show results
                clear_output()
                display_results(feedback, features)

            except Exception as e:
                clear_output()
                display(HTML(f"""
                <div style="background: #ffebee; border: 1px solid #f44336; border-radius: 5px; padding: 15px; color: #c62828;">
                    <h3>❌ Analysis Error</h3>
                    <p>An error occurred during analysis: {str(e)}</p>
                    <p>Please check your inputs and try again.</p>
                </div>
                """))

    def on_sample_clicked(b):
        # Load sample petition
        title_widget.value = "Mandatory Installation of Oxygen Plants in All Hospitals Above 50 Beds to Save Lives During Medical Emergencies"
        description_widget.value = """<h3><strong>URGENT: Critical Oxygen Crisis in Indian Hospitals</strong></h3>

<p>The <strong>COVID-19 pandemic</strong> has exposed a devastating gap in our healthcare infrastructure: <strong>over 85% of hospitals</strong> lack adequate oxygen generation facilities.</p>

<h3><strong>The Problem:</strong></h3>
<ul>
<li><strong>Oxygen shortage</strong> affects 2,847 hospitals nationwide</li>
<li><strong>Supply chain disruptions</strong> lead to critical delays</li>
<li><strong>Rural hospitals</strong> are disproportionately affected</li>
<li><strong>Emergency patients</strong> face life-threatening delays</li>
</ul>

<h3><strong>Our Solution:</strong></h3>
<p>We demand the <strong>Ministry of Health and Family Welfare</strong> implement immediate regulations requiring:</p>
<ul>
<li><strong>Mandatory oxygen plants</strong> in all hospitals with 50+ beds</li>
<li><strong>24-month implementation timeline</strong> with government support</li>
<li><strong>Regular audits</strong> and compliance monitoring</li>
<li><strong>Financial assistance</strong> for rural and government hospitals</li>
</ul>

<p>This initiative will <strong>save over 50,000 lives annually</strong> and ensure that no patient dies due to oxygen shortage.</p>"""

        letter_body_widget.value = """Dear Honorable Minister of Health and Family Welfare,

We urgently request your immediate intervention to address the critical oxygen shortage crisis in Indian hospitals that has claimed thousands of lives.

As healthcare facilities nationwide struggle with inadequate oxygen infrastructure, patients continue to die from preventable causes. We demand mandatory installation of oxygen generation plants in all hospitals above 50 beds capacity.

This life-saving measure requires immediate policy implementation with a 24-month compliance timeline, government financial support, and regular monitoring.

We trust in your leadership to implement this critical healthcare reform that will save countless lives and strengthen our medical infrastructure.

Sincerely,
Concerned Citizens of India"""

        targeting_widget.value = "Ministry of Health and Family Welfare, Government of India; State Health Ministers; Hospital Administration Boards; Medical Council of India"

        with output:
            clear_output()
            display(HTML("""
            <div style="background: #e8f5e8; border: 1px solid #4caf50; border-radius: 5px; padding: 15px; color: #2e7d32;">
                <h3>✅ Sample Petition Loaded</h3>
                <p>A high-quality sample petition has been loaded. You can now analyze it or modify it as needed.</p>
            </div>
            """))

    def on_clear_clicked(b):
        title_widget.value = ''
        description_widget.value = ''
        letter_body_widget.value = ''
        targeting_widget.value = ''

        with output:
            clear_output()
            display(HTML("""
            <div style="background: #fff3e0; border: 1px solid #ff9800; border-radius: 5px; padding: 15px; color: #ef6c00;">
                <h3>🗑️ Form Cleared</h3>
                <p>All form fields have been cleared. You can now enter new petition details.</p>
            </div>
            """))

    # Connect event handlers
    analyze_button.on_click(on_analyze_clicked)
    sample_button.on_click(on_sample_clicked)
    clear_button.on_click(on_clear_clicked)

def display_results(feedback, features):
    """Display comprehensive analysis results"""

    # Main results display
    display(HTML(f"""
    <div class="results-container">
        <div style="text-align: center; margin-bottom: 25px;">
            <h2 style="color: #667eea; margin-bottom: 10px;">{feedback['grade']}</h2>
            <h3 style="color: #333; margin-bottom: 15px;">Success Probability: {feedback['probability']:.1%}</h3>
            <p style="font-size: 16px; color: #666;">{feedback['overall']}</p>
        </div>

        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 25px 0;">
            {' '.join([f'<div class="metric-card"><strong>{metric}:</strong><br>{value}</div>' for metric, value in feedback['metrics'].items()])}
        </div>
    </div>
    """))

    # Strengths section
    if feedback['strengths']:
        strengths_html = '<div style="margin: 20px 0;"><h3 style="color: #4CAF50;">🌟 Strengths</h3>'
        for strength in feedback['strengths']:
            strengths_html += f'<div class="recommendation">{strength}</div>'
        strengths_html += '</div>'
        display(HTML(strengths_html))

    # Improvements section
    if feedback['improvements']:
        improvements_html = '<div style="margin: 20px 0;"><h3 style="color: #ff9800;">🔧 Areas for Improvement</h3>'
        for improvement in feedback['improvements']:
            improvements_html += f'<div class="improvement">{improvement}</div>'
        improvements_html += '</div>'
        display(HTML(improvements_html))

    # Specific recommendations
    if feedback['specific_recommendations']:
        recommendations_html = '<div style="margin: 20px 0;"><h3 style="color: #2196f3;">📋 Specific Recommendations</h3>'
        for i, rec in enumerate(feedback['specific_recommendations'], 1):
            recommendations_html += f'<div class="recommendation">{i}. {rec}</div>'
        recommendations_html += '</div>'
        display(HTML(recommendations_html))

    # Next steps
    display(HTML("""
    <div style="margin: 25px 0;">
        <h3 style="color: #667eea;">🎯 Next Steps</h3>
        <div class="recommendation">1. Implement the recommendations above</div>
        <div class="recommendation">2. Re-analyze your petition to track improvements</div>
        <div class="recommendation">3. Consider A/B testing different versions</div>
        <div class="recommendation">4. Launch when you achieve 70%+ success probability</div>
    </div>
    """))

    # Advanced metrics (collapsible)
    advanced_metrics = {
        'Content Analysis': {
            'Total Content Length': f"{features.get('content_comprehensiveness_score', 0):.0f} characters",
            'Title Length': f"{features.get('title_clean_length', 0)} characters",
            'Description Length': f"{features.get('description_clean_length', 0)} characters",
            'HTML Tags': f"{features.get('description_html_tags', 0)} tags"
        },
        'Language Analysis': {
            'Urgency Keywords': f"{features.get('title_urgency_count', 0) + features.get('description_urgency_count', 0)}",
            'Action Keywords': f"{features.get('title_action_count', 0) + features.get('description_action_count', 0)}",
            'Authority References': f"{features.get('targeting_description_authority_count', 0)}",
            'Professional Score': f"{features.get('professional_sophistication_score', 0):.3f}"
        }
    }

    # Create collapsible advanced metrics section
    advanced_html = '<details style="margin: 20px 0; padding: 15px; background: #f8f9fa; border-radius: 10px;"><summary style="cursor: pointer; font-weight: bold; color: #667eea;">🔍 Advanced Metrics (Click to expand)</summary><div style="margin-top: 15px;">'

    for category, metrics in advanced_metrics.items():
        advanced_html += f'<h4 style="color: #333; margin: 15px 0 10px 0;">{category}</h4>'
        for metric, value in metrics.items():
            advanced_html += f'<div style="margin: 5px 0; padding: 8px; background: white; border-radius: 5px;"><strong>{metric}:</strong> {value}</div>'

    advanced_html += '</div></details>'
    display(HTML(advanced_html))

# ============================================================================
# USAGE INSTRUCTIONS AND TIPS
# ============================================================================

def show_usage_tips():
    """Display usage tips and best practices"""
    display(HTML("""
    <div style="background: #e3f2fd; border-radius: 15px; padding: 25px; margin: 20px 0;">
        <h3 style="color: #1565c0; margin-top: 0;">📚 How to Use This Tool</h3>

        <h4 style="color: #1976d2;">Getting Started:</h4>
        <ol style="color: #333;">
            <li><strong>Fill in the form:</strong> Enter your petition title, description, letter body, and target audience</li>
            <li><strong>Use HTML formatting:</strong> Include tags like &lt;strong&gt;, &lt;h3&gt;, &lt;ul&gt;&lt;li&gt; in your description</li>
            <li><strong>Be strategic:</strong> Use urgency language, action words, and specific targets</li>
            <li><strong>Click "Analyze":</strong> Get instant AI-powered predictions and recommendations</li>
            <li><strong>Iterate:</strong> Improve based on feedback and re-analyze</li>
        </ol>

        <h4 style="color: #1976d2;">Tips for High Success Probability:</h4>
        <ul style="color: #333;">
            <li><strong>Content Length:</strong> Aim for 2000+ total characters across all fields</li>
            <li><strong>HTML Formatting:</strong> Use professional formatting with 15+ HTML tags</li>
            <li><strong>Strategic Language:</strong> Include urgency words like "immediate", "urgent", "critical"</li>
            <li><strong>Action Words:</strong> Use "demand", "stop", "implement", "enforce"</li>
            <li><strong>Authority Targeting:</strong> Mention specific officials, departments, or institutions</li>
            <li><strong>Statistics:</strong> Include numbers, percentages, and data when possible</li>
        </ul>

        <h4 style="color: #1976d2;">Success Benchmarks:</h4>
        <div style="background: white; padding: 15px; border-radius: 10px; margin: 15px 0;">
            <div style="margin: 8px 0;"><strong>🎯 Target:</strong> 70%+ success probability</div>
            <div style="margin: 8px 0;"><strong>📝 Content:</strong> 2000+ total characters</div>
            <div style="margin: 8px 0;"><strong>🎨 Formatting:</strong> 15+ HTML tags</div>
            <div style="margin: 8px 0;"><strong>⚡ Language:</strong> 5+ urgency/action keywords</div>
            <div style="margin: 8px 0;"><strong>🏛️ Authority:</strong> Specific targets mentioned</div>
        </div>
    </div>
    """))

# ============================================================================
# ADDITIONAL FEATURES
# ============================================================================

def create_batch_analyzer():
    """Create batch analysis interface for multiple petitions"""
    display(HTML("""
    <div style="background: #f3e5f5; border-radius: 15px; padding: 25px; margin: 20px 0;">
        <h3 style="color: #7b1fa2; margin-top: 0;">🔬 Advanced Features</h3>
        <p style="color: #333;">Additional tools for power users and organizations.</p>

        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin: 20px 0;">
            <div style="background: white; padding: 20px; border-radius: 10px; border-left: 4px solid #9c27b0;">
                <h4 style="color: #7b1fa2; margin-top: 0;">📊 A/B Testing</h4>
                <p style="color: #666; margin-bottom: 15px;">Compare multiple versions of your petition to find the best approach.</p>
                <em style="color: #999;">Feature coming soon!</em>
            </div>

            <div style="background: white; padding: 20px; border-radius: 10px; border-left: 4px solid #2196f3;">
                <h4 style="color: #1976d2; margin-top: 0;">📋 Batch Analysis</h4>
                <p style="color: #666; margin-bottom: 15px;">Analyze multiple petitions at once from CSV files.</p>
                <em style="color: #999;">Feature coming soon!</em>
            </div>

            <div style="background: white; padding: 20px; border-radius: 10px; border-left: 4px solid #4caf50;">
                <h4 style="color: #388e3c; margin-top: 0;">💾 Export Results</h4>
                <p style="color: #666; margin-bottom: 15px;">Export analysis results to PDF or CSV formats.</p>
                <em style="color: #999;">Feature coming soon!</em>
            </div>
        </div>
    </div>
    """))

# ============================================================================
# MAIN APPLICATION LAUNCH
# ============================================================================

def launch_petition_analyzer():
    """Launch the complete petition analyzer application"""

    print("🚀 LAUNCHING PETITION SUCCESS PREDICTOR")
    print("="*60)
    print(f"🤖 Model Status: {'✅ Trained Model Loaded' if model_artifacts else '🎯 Demo Mode Active'}")
    if model_artifacts:
        print(f"🔧 Features: {len(model_artifacts['features'])} features")
        print(f"📊 Model Type: {type(model_artifacts['model']).__name__}")
    else:
        print("⚠️  Upload model files for full functionality")
    print("="*60)

    # Show usage tips
    show_usage_tips()

    # Create main form
    create_petition_form()

    # Show advanced features
    create_batch_analyzer()

    return True

# Auto-launch the application
print("\n🎯 PETITION SUCCESS PREDICTOR - READY TO LAUNCH!")
print("="*60)

# Launch the analyzer
success = launch_petition_analyzer()

if success:
    print("\n✅ Application launched successfully!")
    print("📝 Fill out the form above to analyze your petition")
    print("🔍 Click 'Load Sample' to see an example")
else:
    print("\n❌ Application launch failed")
    print("Please check that all required files are uploaded")

🚀 PETITION SUCCESS PREDICTOR - INTERACTIVE INTERFACE
✅ Trained model loaded successfully
✅ Feature list loaded (74 features)
✅ Categorical encoders loaded
✅ Reference data loaded (3081 records)
✅ Petition processing pipeline initialized with trained model

🎯 PETITION SUCCESS PREDICTOR - READY TO LAUNCH!
🚀 LAUNCHING PETITION SUCCESS PREDICTOR
🤖 Model Status: ✅ Trained Model Loaded
🔧 Features: 74 features
📊 Model Type: GradientBoostingClassifier


Textarea(value='', description='Title:', layout=Layout(height='80px', width='100%'), placeholder='Enter your p…

Textarea(value='', description='Description:', layout=Layout(height='200px', width='100%'), placeholder='Enter…

Textarea(value='', description='Letter Body:', layout=Layout(height='150px', width='100%'), placeholder='Enter…

Textarea(value='', description='Target:', layout=Layout(height='80px', width='100%'), placeholder='Who is this…

Accordion(children=(VBox(children=(Dropdown(description='Locale:', options=(('India (en-IN)', 'en-IN'), ('Unit…

HBox(children=(Button(button_style='primary', description='🔍 Analyze Petition', layout=Layout(height='45px', w…

Output()


✅ Application launched successfully!
📝 Fill out the form above to analyze your petition
🔍 Click 'Load Sample' to see an example


In [8]:
# Run this in your Colab to clean the notebook before downloading/uploading to GitHub

import json
import nbformat
from google.colab import files

def clean_notebook_for_github(notebook_path):
    """
    Clean a Jupyter notebook to make it GitHub-compatible by removing widget states
    """
    try:
        # Read the notebook
        with open(notebook_path, 'r', encoding='utf-8') as f:
            nb = nbformat.read(f, as_version=4)

        # Clean each cell
        for cell in nb.cells:
            # Remove widget state from metadata
            if 'metadata' in cell:
                # Remove widget-related metadata
                cell.metadata.pop('application/vnd.jupyter.widget-state+json', None)
                cell.metadata.pop('widgets', None)

            # Clear outputs that might contain widget states
            if cell.cell_type == 'code':
                # Keep text outputs but remove widget outputs
                if 'outputs' in cell:
                    cleaned_outputs = []
                    for output in cell.outputs:
                        # Keep standard outputs, remove widget outputs
                        if output.get('output_type') not in ['display_data'] or \
                           'application/vnd.jupyter.widget-view+json' not in output.get('data', {}):
                            cleaned_outputs.append(output)
                    cell.outputs = cleaned_outputs

        # Clean notebook-level metadata
        if 'metadata' in nb:
            nb.metadata.pop('widgets', None)

        # Create cleaned filename
        clean_path = notebook_path.replace('.ipynb', '_cleaned.ipynb')

        # Write the cleaned notebook
        with open(clean_path, 'w', encoding='utf-8') as f:
            nbformat.write(nb, f)

        print(f"✅ Cleaned notebook saved as: {clean_path}")
        return clean_path

    except Exception as e:
        print(f"❌ Error cleaning notebook: {e}")
        return None

# Alternative: Create a fresh notebook without widgets
def create_github_compatible_version():
    """
    Create a GitHub-compatible version of the petition predictor
    """

    notebook_content = {
        "cells": [
            {
                "cell_type": "markdown",
                "metadata": {},
                "source": [
                    "# 🎯 AI-Powered Petition Success Predictor\n",
                    "\n",
                    "This notebook provides an interactive interface to predict petition success using machine learning.\n",
                    "\n",
                    "## Setup Instructions\n",
                    "\n",
                    "1. Upload your model files:\n",
                    "   - `best_model.pkl`\n",
                    "   - `final_features.pkl`\n",
                    "   - `categorical_encoders.pkl`\n",
                    "   - `processed_petition_data.xlsx` (optional)\n",
                    "\n",
                    "2. Run all cells in order\n",
                    "\n",
                    "3. Use the form interface to analyze petitions\n",
                    "\n",
                    "## Features\n",
                    "\n",
                    "- ✅ Real-time petition success prediction\n",
                    "- ✅ Comprehensive feedback and recommendations\n",
                    "- ✅ Strategic content analysis\n",
                    "- ✅ Professional formatting suggestions\n",
                    "- ✅ Sample petition examples\n"
                ]
            },
            {
                "cell_type": "code",
                "metadata": {},
                "source": [
                    "# Install and import required packages\n",
                    "!pip install ipywidgets textstat nltk scikit-learn pandas numpy matplotlib seaborn -q\n",
                    "\n",
                    "import warnings\n",
                    "warnings.filterwarnings('ignore')\n",
                    "\n",
                    "import pandas as pd\n",
                    "import numpy as np\n",
                    "import pickle\n",
                    "import re\n",
                    "from pathlib import Path\n",
                    "\n",
                    "print(\"📦 Packages installed successfully!\")"
                ],
                "execution_count": None,
                "outputs": []
            },
            {
                "cell_type": "markdown",
                "metadata": {},
                "source": [
                    "## Model Loading\n",
                    "\n",
                    "Upload your model files and run the cell below to load them."
                ]
            },
            {
                "cell_type": "code",
                "metadata": {},
                "source": [
                    "# Load model artifacts\n",
                    "# Note: This cell will show instructions to upload files if they're not found\n",
                    "\n",
                    "def load_model_artifacts():\n",
                    "    \"\"\"Load the trained model and required artifacts\"\"\"\n",
                    "    artifacts = {}\n",
                    "    \n",
                    "    try:\n",
                    "        # Load trained model\n",
                    "        with open('best_model.pkl', 'rb') as f:\n",
                    "            artifacts['model'] = pickle.load(f)\n",
                    "        print(\"✅ Trained model loaded successfully\")\n",
                    "        \n",
                    "        # Load feature names\n",
                    "        with open('final_features.pkl', 'rb') as f:\n",
                    "            artifacts['features'] = pickle.load(f)\n",
                    "        print(f\"✅ Feature list loaded ({len(artifacts['features'])} features)\")\n",
                    "        \n",
                    "        # Load categorical encoders\n",
                    "        with open('categorical_encoders.pkl', 'rb') as f:\n",
                    "            artifacts['encoders'] = pickle.load(f)\n",
                    "        print(\"✅ Categorical encoders loaded\")\n",
                    "        \n",
                    "        return artifacts\n",
                    "        \n",
                    "    except FileNotFoundError as e:\n",
                    "        print(f\"📁 Please upload the following files:\")\n",
                    "        print(\"   - best_model.pkl\")\n",
                    "        print(\"   - final_features.pkl\") \n",
                    "        print(\"   - categorical_encoders.pkl\")\n",
                    "        print(\"\\n   Then re-run this cell.\")\n",
                    "        return None\n",
                    "\n",
                    "# Load artifacts\n",
                    "model_artifacts = load_model_artifacts()"
                ],
                "execution_count": None,
                "outputs": []
            },
            {
                "cell_type": "markdown",
                "metadata": {},
                "source": [
                    "## Interactive Petition Analyzer\n",
                    "\n",
                    "Run the cell below to launch the interactive petition analysis interface."
                ]
            },
            {
                "cell_type": "code",
                "metadata": {},
                "source": [
                    "# Note: Full petition analysis code would go here\n",
                    "# This is a simplified version for GitHub compatibility\n",
                    "\n",
                    "def analyze_petition_simple(title, description, letter_body=\"\", target=\"\"):\n",
                    "    \"\"\"\n",
                    "    Simplified petition analysis function for GitHub display\n",
                    "    \"\"\"\n",
                    "    \n",
                    "    # Basic scoring logic\n",
                    "    score = 0.0\n",
                    "    feedback = []\n",
                    "    \n",
                    "    # Content length analysis\n",
                    "    total_length = len(title) + len(description) + len(letter_body)\n",
                    "    if total_length >= 2000:\n",
                    "        score += 0.4\n",
                    "        feedback.append(\"✅ Excellent content length\")\n",
                    "    elif total_length >= 1000:\n",
                    "        score += 0.25\n",
                    "        feedback.append(\"✅ Good content length\")\n",
                    "    else:\n",
                    "        feedback.append(f\"📝 Increase content length (current: {total_length} chars)\")\n",
                    "    \n",
                    "    # HTML formatting check\n",
                    "    html_tags = len(re.findall('<.*?>', description))\n",
                    "    if html_tags >= 10:\n",
                    "        score += 0.3\n",
                    "        feedback.append(\"✅ Good HTML formatting\")\n",
                    "    else:\n",
                    "        feedback.append(f\"🎨 Add more HTML formatting (current: {html_tags} tags)\")\n",
                    "    \n",
                    "    # Urgency keywords\n",
                    "    urgency_words = ['urgent', 'immediate', 'critical', 'emergency', 'now']\n",
                    "    urgency_count = sum(1 for word in urgency_words if word in (title + description).lower())\n",
                    "    if urgency_count >= 2:\n",
                    "        score += 0.2\n",
                    "        feedback.append(\"✅ Strong urgency language\")\n",
                    "    else:\n",
                    "        feedback.append(\"⚡ Add more urgency keywords\")\n",
                    "    \n",
                    "    # Final probability\n",
                    "    probability = min(score, 0.95)\n",
                    "    \n",
                    "    # Grade\n",
                    "    if probability >= 0.8:\n",
                    "        grade = \"🏆 EXCELLENT (A+)\"\n",
                    "    elif probability >= 0.7:\n",
                    "        grade = \"🎯 VERY GOOD (A)\"\n",
                    "    elif probability >= 0.6:\n",
                    "        grade = \"✅ GOOD (B+)\"\n",
                    "    elif probability >= 0.5:\n",
                    "        grade = \"📈 MODERATE (B)\"\n",
                    "    else:\n",
                    "        grade = \"⚠️ NEEDS WORK (C)\"\n",
                    "    \n",
                    "    return {\n",
                    "        'grade': grade,\n",
                    "        'probability': probability,\n",
                    "        'feedback': feedback\n",
                    "    }\n",
                    "\n",
                    "# Example usage\n",
                    "print(\"🚀 Petition Analyzer Ready!\")\n",
                    "print(\"Use the analyze_petition_simple() function to test petitions.\")\n",
                    "print(\"\\nExample:\")\n",
                    "print('result = analyze_petition_simple(\"Save Our Parks\", \"<p>We need to <strong>immediately</strong> protect our parks...</p>\")')"
                ],
                "execution_count": None,
                "outputs": []
            },
            {
                "cell_type": "markdown",
                "metadata": {},
                "source": [
                    "## Example Analysis\n",
                    "\n",
                    "Try analyzing a sample petition:"
                ]
            },
            {
                "cell_type": "code",
                "metadata": {},
                "source": [
                    "# Sample petition analysis\n",
                    "sample_title = \"Mandatory Installation of Oxygen Plants in All Hospitals\"\n",
                    "sample_description = \"\"\"<h3><strong>URGENT: Critical Oxygen Crisis</strong></h3>\n",
                    "<p>The <strong>COVID-19 pandemic</strong> has exposed a devastating gap in our healthcare infrastructure.</p>\n",
                    "<ul>\n",
                    "<li><strong>Immediate action</strong> required</li>\n",
                    "<li><strong>Save thousands of lives</strong></li>\n",
                    "</ul>\"\"\"\n",
                    "\n",
                    "# Analyze the sample\n",
                    "result = analyze_petition_simple(sample_title, sample_description)\n",
                    "\n",
                    "print(f\"Grade: {result['grade']}\")\n",
                    "print(f\"Success Probability: {result['probability']:.1%}\")\n",
                    "print(\"\\nFeedback:\")\n",
                    "for item in result['feedback']:\n",
                    "    print(f\"  {item}\")"
                ],
                "execution_count": None,
                "outputs": []
            },
            {
                "cell_type": "markdown",
                "metadata": {},
                "source": [
                    "## Full Interactive Interface\n",
                    "\n",
                    "For the complete interactive interface with forms and real-time feedback, run this notebook in Google Colab where widgets are fully supported.\n",
                    "\n",
                    "The full version includes:\n",
                    "- Interactive forms for petition input\n",
                    "- Real-time analysis with your trained model\n",
                    "- Comprehensive feedback and recommendations\n",
                    "- Professional UI with custom styling\n",
                    "- Sample petition loader\n",
                    "- Advanced metrics and insights\n",
                    "\n",
                    "**GitHub Note:** This simplified version is designed for GitHub compatibility. The full interactive version with widgets should be run in Google Colab.\"\n"
                ]
            }
        ],
        "metadata": {
            "kernelspec": {
                "display_name": "Python 3",
                "language": "python",
                "name": "python3"
            },
            "language_info": {
                "name": "python",
                "version": "3.8.0"
            }
        },
        "nbformat": 4,
        "nbformat_minor": 4
    }

    # Save the GitHub-compatible notebook
    with open('petition_predictor_github.ipynb', 'w', encoding='utf-8') as f:
        json.dump(notebook_content, f, indent=2)

    print("✅ Created GitHub-compatible notebook: petition_predictor_github.ipynb")
    return 'petition_predictor_github.ipynb'

# Run the cleaner
print("🧹 NOTEBOOK CLEANER FOR GITHUB")
print("="*50)

# Option 1: Try to clean existing notebook
try:
    # List all .ipynb files
    import os
    notebooks = [f for f in os.listdir('.') if f.endswith('.ipynb')]

    if notebooks:
        print(f"Found notebooks: {notebooks}")

        # Clean the first notebook found
        notebook_to_clean = notebooks[0]
        cleaned_path = clean_notebook_for_github(notebook_to_clean)

        if cleaned_path:
            print(f"\n📥 Download the cleaned notebook: {cleaned_path}")
            files.download(cleaned_path)
    else:
        print("No notebooks found in current directory")

except Exception as e:
    print(f"Could not clean existing notebook: {e}")

# Option 2: Create a fresh GitHub-compatible version
print("\n🆕 Creating fresh GitHub-compatible version...")
github_notebook = create_github_compatible_version()

print(f"\n📥 Download the GitHub-compatible notebook:")
files.download(github_notebook)

print(f"\n✅ SOLUTION:")
print(f"1. Download the cleaned/GitHub-compatible notebook")
print(f"2. Upload it to GitHub instead of the original")
print(f"3. The simplified version will display properly on GitHub")
print(f"4. Users can still run the full interactive version in Colab")

🧹 NOTEBOOK CLEANER FOR GITHUB
No notebooks found in current directory

🆕 Creating fresh GitHub-compatible version...
✅ Created GitHub-compatible notebook: petition_predictor_github.ipynb

📥 Download the GitHub-compatible notebook:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ SOLUTION:
1. Download the cleaned/GitHub-compatible notebook
2. Upload it to GitHub instead of the original
3. The simplified version will display properly on GitHub
4. Users can still run the full interactive version in Colab
