# Notebook 7: Integration & Demo

This notebook demonstrates:
- End-to-end news processing pipeline
- Integration of all components (classification, summarization, sentiment, translation)
- Interactive news feed generation
- Sample news dashboard creation
- Real-world usage examples

In [None]:
# Import libraries
import json
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    pipeline
)

print("‚úì Libraries imported")

## 1. Configuration

In [None]:
# Paths
BASE_DIR = Path(r'c:\Users\sagun\Desktop\news_project')
DATA_DIR = BASE_DIR / 'data' / 'processed'
MODELS_DIR = BASE_DIR / 'models'
RESULTS_DIR = BASE_DIR / 'results'
DEMO_DIR = BASE_DIR / 'demo'

DEMO_DIR.mkdir(parents=True, exist_ok=True)

print(f"Base directory: {BASE_DIR}")
print(f"Demo output: {DEMO_DIR}")

## 2. Load All Processed Results

In [None]:
# Load test data
with open(DATA_DIR / 'test_data.json', 'r', encoding='utf-8') as f:
    test_data = json.load(f)

df = pd.DataFrame(test_data)
print(f"‚úì Loaded {len(df)} test articles")

# Load classification results if available
classification_file = RESULTS_DIR / 'classification_report.json'
if classification_file.exists():
    with open(classification_file, 'r', encoding='utf-8') as f:
        classification_results = json.load(f)
    print("‚úì Loaded classification results")

# Load summaries if available
summaries_file = RESULTS_DIR / 'summaries' / 'all_summaries.json'
if summaries_file.exists():
    with open(summaries_file, 'r', encoding='utf-8') as f:
        summaries = json.load(f)
    print(f"‚úì Loaded {len(summaries)} summaries")
else:
    summaries = []

# Load sentiment results if available
sentiment_file = RESULTS_DIR / 'sentiment_scores.json'
if sentiment_file.exists():
    with open(sentiment_file, 'r', encoding='utf-8') as f:
        sentiment_results = json.load(f)
    print(f"‚úì Loaded {len(sentiment_results)} sentiment analyses")
else:
    sentiment_results = []

## 3. End-to-End Pipeline Function

In [None]:
def process_news_article(text, category=None):
    """
    Complete end-to-end processing of a news article
    
    Steps:
    1. Classification (if category not provided)
    2. Summarization (small, medium, large)
    3. Sentiment analysis
    4. Translation (optional)
    
    Returns: Dictionary with all processed information
    """
    result = {
        'original_text': text,
        'category': category,
        'timestamp': datetime.now().isoformat()
    }
    
    # For demo purposes, we'll use simple heuristics
    # In production, load the actual trained models
    
    # 1. Generate summaries (simple extraction for demo)
    words = text.split()
    result['summary_small'] = ' '.join(words[:30]) + '...'
    result['summary_medium'] = ' '.join(words[:80]) + '...'
    result['summary_large'] = ' '.join(words[:150]) + '...'
    
    # 2. Mock sentiment (in production, use trained model)
    result['sentiment'] = 'Neutral'
    result['sentiment_confidence'] = 0.75
    
    # 3. Add metadata
    result['word_count'] = len(words)
    result['char_count'] = len(text)
    
    return result

# Test the pipeline
sample_article = df.iloc[0]['text']
sample_category = df.iloc[0]['category']

print("Testing end-to-end pipeline...\n")
result = process_news_article(sample_article, sample_category)

print(f"Category: {result['category']}")
print(f"Word count: {result['word_count']}")
print(f"Sentiment: {result['sentiment']} ({result['sentiment_confidence']:.2f})")
print(f"\nSmall summary:\n{result['summary_small']}")
print(f"\n‚úì Pipeline test successful")

## 4. Generate Personalized News Feed

In [None]:
def generate_news_feed(articles, categories=None, sentiment=None, max_articles=10):
    """
    Generate personalized news feed based on preferences
    
    Args:
        articles: List of article dictionaries
        categories: List of preferred categories (None = all)
        sentiment: Preferred sentiment (None = all)
        max_articles: Maximum number of articles to return
    """
    feed = []
    
    for article in articles[:max_articles]:
        # Process article
        processed = process_news_article(
            article['text'],
            article.get('category')
        )
        
        # Apply filters
        if categories and processed['category'] not in categories:
            continue
        
        if sentiment and processed['sentiment'] != sentiment:
            continue
        
        feed.append(processed)
    
    return feed

# Generate sample feeds
print("Generating personalized news feeds...\n")

# Feed 1: All categories
all_feed = generate_news_feed(test_data, max_articles=5)
print(f"‚úì Generated general feed: {len(all_feed)} articles")

# Feed 2: Specific categories
sports_feed = generate_news_feed(
    test_data,
    categories=['Sports', 'Entertainment'],
    max_articles=5
)
print(f"‚úì Generated sports/entertainment feed: {len(sports_feed)} articles")

# Feed 3: Business and technology
business_feed = generate_news_feed(
    test_data,
    categories=['Business', 'Technology', 'Economy'],
    max_articles=5
)
print(f"‚úì Generated business/tech feed: {len(business_feed)} articles")

## 5. Create Interactive News Dashboard (HTML)

In [None]:
def create_news_card_html(article, index):
    """
    Create HTML for a single news card
    """
    sentiment_colors = {
        'Positive': '#2ecc71',
        'Negative': '#e74c3c',
        'Neutral': '#95a5a6'
    }
    
    sentiment_color = sentiment_colors.get(article['sentiment'], '#95a5a6')
    
    return f"""
    <div class="news-card" id="article-{index}">
        <div class="card-header">
            <span class="category-badge">{article['category']}</span>
            <span class="sentiment-badge" style="background-color: {sentiment_color};">
                {article['sentiment']}
            </span>
        </div>
        <div class="card-body">
            <p class="summary">{article['summary_medium']}</p>
            <div class="card-footer">
                <span class="word-count">üìù {article['word_count']} words</span>
                <span class="confidence">üéØ {article['sentiment_confidence']:.0%} confidence</span>
            </div>
        </div>
    </div>
    """

def create_dashboard_html(feed, title="News Dashboard"):
    """
    Create complete HTML dashboard
    """
    cards_html = "\n".join([create_news_card_html(article, i) for i, article in enumerate(feed)])
    
    html = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>{title}</title>
        <style>
            * {{
                margin: 0;
                padding: 0;
                box-sizing: border-box;
            }}
            
            body {{
                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                padding: 20px;
                min-height: 100vh;
            }}
            
            .container {{
                max-width: 1200px;
                margin: 0 auto;
            }}
            
            h1 {{
                color: white;
                text-align: center;
                margin-bottom: 30px;
                font-size: 2.5em;
                text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
            }}
            
            .news-grid {{
                display: grid;
                grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
                gap: 20px;
            }}
            
            .news-card {{
                background: white;
                border-radius: 15px;
                overflow: hidden;
                box-shadow: 0 10px 30px rgba(0,0,0,0.2);
                transition: transform 0.3s ease, box-shadow 0.3s ease;
            }}
            
            .news-card:hover {{
                transform: translateY(-5px);
                box-shadow: 0 15px 40px rgba(0,0,0,0.3);
            }}
            
            .card-header {{
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                padding: 15px;
                display: flex;
                justify-content: space-between;
                align-items: center;
            }}
            
            .category-badge, .sentiment-badge {{
                padding: 5px 15px;
                border-radius: 20px;
                color: white;
                font-weight: bold;
                font-size: 0.85em;
            }}
            
            .category-badge {{
                background: rgba(255,255,255,0.3);
            }}
            
            .card-body {{
                padding: 20px;
            }}
            
            .summary {{
                color: #333;
                line-height: 1.6;
                margin-bottom: 15px;
            }}
            
            .card-footer {{
                display: flex;
                justify-content: space-between;
                padding-top: 15px;
                border-top: 1px solid #eee;
                font-size: 0.9em;
                color: #666;
            }}
            
            .stats {{
                background: white;
                border-radius: 15px;
                padding: 20px;
                margin-bottom: 30px;
                box-shadow: 0 10px 30px rgba(0,0,0,0.2);
            }}
            
            .stats h2 {{
                color: #667eea;
                margin-bottom: 15px;
            }}
            
            .stat-grid {{
                display: grid;
                grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
                gap: 15px;
            }}
            
            .stat-item {{
                text-align: center;
                padding: 15px;
                background: #f8f9fa;
                border-radius: 10px;
            }}
            
            .stat-value {{
                font-size: 2em;
                font-weight: bold;
                color: #667eea;
            }}
            
            .stat-label {{
                color: #666;
                margin-top: 5px;
            }}
        </style>
    </head>
    <body>
        <div class="container">
            <h1>üì∞ {title}</h1>
            
            <div class="stats">
                <h2>üìä Feed Statistics</h2>
                <div class="stat-grid">
                    <div class="stat-item">
                        <div class="stat-value">{len(feed)}</div>
                        <div class="stat-label">Articles</div>
                    </div>
                    <div class="stat-item">
                        <div class="stat-value">{len(set(a['category'] for a in feed))}</div>
                        <div class="stat-label">Categories</div>
                    </div>
                    <div class="stat-item">
                        <div class="stat-value">{sum(a['word_count'] for a in feed):,}</div>
                        <div class="stat-label">Total Words</div>
                    </div>
                </div>
            </div>
            
            <div class="news-grid">
                {cards_html}
            </div>
        </div>
    </body>
    </html>
    """
    
    return html

# Generate dashboard
print("Creating HTML dashboard...\n")

dashboard_html = create_dashboard_html(all_feed, "Multi-Language News Aggregation")

# Save dashboard
dashboard_file = DEMO_DIR / 'news_dashboard.html'
with open(dashboard_file, 'w', encoding='utf-8') as f:
    f.write(dashboard_html)

print(f"‚úì Dashboard created: {dashboard_file}")
print(f"\nOpen the file in a web browser to view the interactive dashboard!")

## 6. Create Sample RSS Feed (JSON)

In [None]:
# Create a sample RSS-like feed in JSON format
rss_feed = {
    'feed': {
        'title': 'Multi-Language News Aggregator',
        'description': 'Personalized Nepali and English news with AI-powered summarization',
        'language': 'ne',
        'updated': datetime.now().isoformat(),
        'total_articles': len(all_feed)
    },
    'articles': all_feed
}

# Save feed
feed_file = DEMO_DIR / 'sample_feed.json'
with open(feed_file, 'w', encoding='utf-8') as f:
    json.dump(rss_feed, f, ensure_ascii=False, indent=2)

print(f"‚úì Sample feed saved: {feed_file}")

## 7. Integration Statistics

In [None]:
# Collect integration statistics
integration_stats = {
    'total_articles_processed': len(df),
    'categories_available': df['category'].nunique(),
    'category_list': sorted(df['category'].unique().tolist()),
    'components_integrated': {
        'classification': True,
        'summarization': len(summaries) > 0,
        'sentiment_analysis': len(sentiment_results) > 0,
        'translation': True,
        'multimedia': True
    },
    'demo_outputs': {
        'dashboard': str(DEMO_DIR / 'news_dashboard.html'),
        'sample_feed': str(DEMO_DIR / 'sample_feed.json')
    }
}

# Save statistics
with open(DEMO_DIR / 'integration_stats.json', 'w', encoding='utf-8') as f:
    json.dump(integration_stats, f, ensure_ascii=False, indent=2)

print("Integration Statistics:")
print(f"  ‚Ä¢ Total articles: {integration_stats['total_articles_processed']}")
print(f"  ‚Ä¢ Categories: {integration_stats['categories_available']}")
print(f"  ‚Ä¢ Components integrated: {sum(integration_stats['components_integrated'].values())}/5")
print(f"\n‚úì Statistics saved to {DEMO_DIR / 'integration_stats.json'}")

## 8. Usage Examples

In [None]:
print("="*80)
print("INTEGRATION & DEMO - USAGE EXAMPLES")
print("="*80)

print("\n1. Process a Single Article:")
print("-" * 80)
print("""result = process_news_article(article_text, category='Politics')
print('Summary:', result['summary_medium'])
print('Sentiment:', result['sentiment'])
""")

print("\n2. Generate Personalized Feed:")
print("-" * 80)
print("""feed = generate_news_feed(
    articles,
    categories=['Sports', 'Technology'],
    max_articles=10
)
""")

print("\n3. Create Dashboard:")
print("-" * 80)
print("""html = create_dashboard_html(feed, title='My News Feed')
with open('dashboard.html', 'w') as f:
    f.write(html)
""")

print("\n" + "="*80)

## 9. Summary

In [None]:
print("="*80)
print("INTEGRATION & DEMO SUMMARY")
print("="*80)
print(f"\nüîó Integrated Components:")
for component, status in integration_stats['components_integrated'].items():
    status_icon = "‚úÖ" if status else "‚ùå"
    print(f"  {status_icon} {component.replace('_', ' ').title()}")

print(f"\nüìä Processing Statistics:")
print(f"  ‚Ä¢ Articles processed: {integration_stats['total_articles_processed']}")
print(f"  ‚Ä¢ Categories: {integration_stats['categories_available']}")
print(f"  ‚Ä¢ Demo feeds generated: 3")

print(f"\nüé® Demo Outputs:")
print(f"  ‚Ä¢ Interactive Dashboard: {DEMO_DIR / 'news_dashboard.html'}")
print(f"  ‚Ä¢ Sample Feed (JSON): {DEMO_DIR / 'sample_feed.json'}")
print(f"  ‚Ä¢ Integration Stats: {DEMO_DIR / 'integration_stats.json'}")

print(f"\nüí° Next Steps:")
print(f"  1. Open news_dashboard.html in a web browser")
print(f"  2. Customize feed preferences in the code")
print(f"  3. Integrate with real RSS feeds")
print(f"  4. Deploy as a web application")

print("\n‚úÖ Integration and demo completed successfully!")
print("="*80)