# News Data API Testing Notebook

Interactive testing environment for the News Data Cloudflare Worker API.

## Setup

Install requirements:
```bash
pip install requests jupyter pandas
```

In [None]:
import requests
import json
from datetime import datetime
from typing import Dict, List, Optional
import pandas as pd

# Configuration
BASE_URL = "https://news-data.omc345.workers.dev"
# Or use local dev
# BASE_URL = "http://localhost:8787"

def pretty_print(data):
    """Pretty print JSON responses"""
    print(json.dumps(data, indent=2))

def make_request(method: str, endpoint: str, data: Optional[Dict] = None, params: Optional[Dict] = None):
    """Make HTTP request to API"""
    url = f"{BASE_URL}{endpoint}"
    
    if method == "GET":
        response = requests.get(url, params=params)
    elif method == "POST":
        response = requests.post(url, json=data)
    else:
        raise ValueError(f"Unsupported method: {method}")
    
    print(f"{method} {url}")
    print(f"Status: {response.status_code}")
    
    try:
        return response.json()
    except:
        return response.text

print("âœ“ Setup complete")
print(f"Base URL: {BASE_URL}")

## 1. Health Check

In [None]:
# Health check
result = make_request("GET", "/health")
pretty_print(result)

## 2. Crawl Articles (Raw Only)

Test crawling without AI transformation to save costs.

In [None]:
# Crawl HackerNews - raw only (no AI transformation)
crawl_result = make_request(
    "POST",
    "/api/admin/crawl",
    data={
        "sources": ["hackernews"],
        "count": 2
    }
)

pretty_print(crawl_result)

# Save article IDs for later tests
if crawl_result.get("success"):
    article_ids = [article["id"] for article in crawl_result.get("articles", [])]
    print(f"\nâœ“ Crawled {len(article_ids)} articles")
    print(f"Article IDs: {article_ids}")
else:
    article_ids = []

## 3. View Raw Articles

In [None]:
# Get raw article (no transformation)
if article_ids:
    article_id = article_ids[0]
    raw_article = make_request("GET", f"/api/articles/{article_id}")
    
    print(f"\nTitle: {raw_article.get('originalTitle')}")
    print(f"Content length: {len(raw_article.get('originalContent', ''))} chars")
    print(f"Source: {raw_article.get('source')}")
    print(f"URL: {raw_article.get('originalUrl')}")

## 4. Transform Article with Different Variants

Generate multiple transformation styles for A/B testing.

In [None]:
# Transform article into multiple variants
if article_ids:
    article_id = article_ids[0]
    
    transform_result = make_request(
        "POST",
        "/api/admin/transform",
        data={
            "articleId": article_id,
            "variants": ["default", "technical", "casual", "brief"]
        }
    )
    
    # Display each variant
    if transform_result.get("success"):
        print(f"\nâœ“ Generated {len(transform_result['variants'])} variants\n")
        
        for variant_data in transform_result["variants"]:
            print(f"{'='*60}")
            print(f"Variant: {variant_data['variant'].upper()}")
            print(f"{'='*60}")
            print(f"Title: {variant_data['title']}")
            print(f"Content: {variant_data['content'][:200]}...")
            print(f"Model: {variant_data['metadata']['model']}")
            print()
    else:
        pretty_print(transform_result)

## 5. Fetch Specific Variants

Request articles in specific transformation styles.

In [None]:
# Test getting different variants
if article_ids:
    article_id = article_ids[0]
    variants_to_test = ["raw", "default", "technical", "casual", "brief"]
    
    results = []
    
    for variant in variants_to_test:
        article = make_request(
            "GET",
            f"/api/articles/{article_id}",
            params={"variant": variant}
        )
        
        # Extract title and content length
        if variant == "raw":
            title = article.get("originalTitle", "")
            content_length = len(article.get("originalContent", ""))
        else:
            title = article.get("title", "")
            content_length = len(article.get("content", ""))
        
        results.append({
            "Variant": variant,
            "Title": title[:50] + "..." if len(title) > 50 else title,
            "Content Length": content_length
        })
    
    # Display as table
    df = pd.DataFrame(results)
    print(df.to_string(index=False))

## 6. List Available Variants

In [None]:
# Check which variants have been generated
if article_ids:
    article_id = article_ids[0]
    
    variants_list = make_request(
        "GET",
        f"/api/articles/{article_id}/variants"
    )
    
    print(f"\nAvailable variants for {article_id}:")
    for variant in variants_list.get("variants", []):
        print(f"  â€¢ {variant}")

## 7. Batch Transform by Source

In [None]:
# Batch transform latest HackerNews articles
batch_result = make_request(
    "POST",
    "/api/admin/transform",
    data={
        "source": "hackernews",
        "limit": 3,
        "variant": "default"
    }
)

if batch_result.get("success"):
    print(f"\nâœ“ Transformed {batch_result['count']} articles")
    
    for result in batch_result.get("results", [])[:3]:  # Show first 3
        print(f"\nArticle: {result['articleId']}")
        for variant in result.get("variants", []):
            print(f"  Variant: {variant['variant']}")
            print(f"  Title: {variant['title'][:60]}...")
else:
    pretty_print(batch_result)

## 8. Compare Variants Side-by-Side

In [None]:
# Compare different transformation styles
if article_ids:
    article_id = article_ids[0]
    
    # Fetch all variants
    raw = make_request("GET", f"/api/articles/{article_id}", params={"variant": "raw"})
    default = make_request("GET", f"/api/articles/{article_id}", params={"variant": "default"})
    technical = make_request("GET", f"/api/articles/{article_id}", params={"variant": "technical"})
    casual = make_request("GET", f"/api/articles/{article_id}", params={"variant": "casual"})
    brief = make_request("GET", f"/api/articles/{article_id}", params={"variant": "brief"})
    
    print(f"\n{'='*80}")
    print(f"VARIANT COMPARISON FOR: {article_id}")
    print(f"{'='*80}\n")
    
    print("ðŸ“„ RAW (Original)")
    print(f"Title: {raw.get('originalTitle')}")
    print(f"Content: {raw.get('originalContent', '')[:150]}...\n")
    
    print("âœ¨ DEFAULT (General Audience)")
    print(f"Title: {default.get('title')}")
    print(f"Content: {default.get('content', '')[:150]}...\n")
    
    print("ðŸ”§ TECHNICAL (Developer Focus)")
    print(f"Title: {technical.get('title')}")
    print(f"Content: {technical.get('content', '')[:150]}...\n")
    
    print("ðŸ’¬ CASUAL (Conversational)")
    print(f"Title: {casual.get('title')}")
    print(f"Content: {casual.get('content', '')[:150]}...\n")
    
    print("âš¡ BRIEF (Ultra-concise)")
    print(f"Title: {brief.get('title')}")
    print(f"Content: {brief.get('content', '')}\n")

## 9. Test Multiple Sources

In [None]:
# Crawl from multiple sources
multi_source_result = make_request(
    "POST",
    "/api/admin/crawl",
    data={
        "sources": ["hackernews", "t24"],
        "count": 1
    }
)

if multi_source_result.get("success"):
    print(f"\nâœ“ Crawled from multiple sources")
    
    # Group by source
    by_source = {}
    for article in multi_source_result.get("articles", []):
        source = article["source"]
        if source not in by_source:
            by_source[source] = []
        by_source[source].append(article["originalTitle"])
    
    for source, titles in by_source.items():
        print(f"\n{source.upper()}:")
        for title in titles:
            print(f"  â€¢ {title}")
else:
    pretty_print(multi_source_result)

## 10. List All Articles

In [None]:
# List all articles
articles_result = make_request(
    "GET",
    "/api/articles",
    params={"limit": 10}
)

if "articles" in articles_result:
    articles_df = pd.DataFrame([
        {
            "ID": a["id"],
            "Source": a["source"],
            "Title": a["originalTitle"][:50] + "..." if len(a["originalTitle"]) > 50 else a["originalTitle"],
            "Language": a["language"]
        }
        for a in articles_result["articles"]
    ])
    
    print(articles_df.to_string(index=False))
    print(f"\nTotal: {articles_result['count']} articles")
else:
    pretty_print(articles_result)

## 11. Performance Test: Variant Caching

In [None]:
import time

if article_ids:
    article_id = article_ids[0]
    
    # First request (generates variant)
    start = time.time()
    first = make_request("GET", f"/api/articles/{article_id}", params={"variant": "default"})
    first_time = time.time() - start
    
    # Second request (should be cached)
    start = time.time()
    second = make_request("GET", f"/api/articles/{article_id}", params={"variant": "default"})
    second_time = time.time() - start
    
    print(f"\nPerformance Comparison:")
    print(f"First request (generation): {first_time:.2f}s")
    print(f"Second request (cached): {second_time:.2f}s")
    print(f"Speedup: {first_time/second_time:.1f}x faster")

## 12. Clean Up (Optional)

Clear KV indexes for testing.

In [None]:
# Uncomment to clean storage
# clean_result = make_request("POST", "/api/admin/clean")
# pretty_print(clean_result)

## Summary

This notebook demonstrated:

1. âœ… Raw article crawling (no AI costs)
2. âœ… On-demand transformation with multiple variants
3. âœ… Fetching articles in specific styles
4. âœ… Batch transformation by source
5. âœ… Variant caching for performance
6. âœ… Multi-source crawling

### Next Steps

- Integrate with your frontend
- A/B test different variants
- Set up automated crawling with `wrangler cron`
- Monitor AI costs and optimize variant usage

### Documentation

- [Variant System Guide](../docs/VARIANT_SYSTEM.md)
- [API Documentation](../docs/API.md)
- [Architecture Overview](../docs/ARCHITECTURE.md)