# Text Summarizer Demo

This notebook demonstrates the text summarization functionality using the OpenAI API.
It uses the same sample texts that are used for testing the Chrome extension.

In [None]:
# Install required packages (if not already installed)
# !pip install openai python-dotenv

In [None]:
import json
import os
from openai import OpenAI
from pathlib import Path

## Setup

Set your OpenAI API key. You can either:
1. Set it as an environment variable: `export OPENAI_API_KEY='your-key-here'`
2. Or uncomment and set it directly in the code below (not recommended for production)

In [None]:
# Option 1: Use environment variable (recommended)
api_key = os.getenv('OPENAI_API_KEY')

# Option 2: Set directly (uncomment and add your key)
# api_key = 'sk-your-api-key-here'

if not api_key:
    print("‚ö†Ô∏è Warning: OPENAI_API_KEY not set. Please set your API key.")
else:
    print("‚úÖ API key loaded successfully")
    client = OpenAI(api_key=api_key)

## Load Sample Texts

In [None]:
# Load sample texts from JSON file
sample_data_path = Path('../data/sample_texts.json')

with open(sample_data_path, 'r') as f:
    data = json.load(f)

samples = data['samples']
print(f"Loaded {len(samples)} sample texts")

# Display available samples
for sample in samples:
    print(f"\n{sample['id']}. {sample['title']}")
    print(f"   Length: {len(sample['text'])} characters")

## Summarization Function

In [None]:
def summarize_text(text, model='gpt-3.5-turbo', max_tokens=150):
    """
    Summarize text using OpenAI API
    
    Args:
        text: Text to summarize
        model: OpenAI model to use
        max_tokens: Maximum tokens in response
    
    Returns:
        Summary text
    """
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    'role': 'system',
                    'content': 'You are a helpful assistant that creates concise summaries of text.'
                },
                {
                    'role': 'user',
                    'content': f'Please provide a concise summary of the following text in 2-3 sentences:\n\n{text}'
                }
            ],
            max_tokens=max_tokens,
            temperature=0.7
        )
        
        return response.choices[0].message.content.strip()
    
    except Exception as e:
        return f"Error: {str(e)}"

## Test Summarization on Sample Texts

Let's test the summarization on each sample text and compare with expected results.

In [None]:
# Test on first sample (short article)
sample = samples[0]

print(f"üìÑ {sample['title']}")
print(f"\n{'='*80}")
print("\nüìù Original Text:")
print(sample['text'])
print(f"\n{'='*80}")
print("\n‚ú® Generated Summary:")

if api_key:
    summary = summarize_text(sample['text'])
    print(summary)
    print(f"\n{'='*80}")
    print("\nüìã Expected Summary:")
    print(sample['expected_summary'])
else:
    print("‚ö†Ô∏è Cannot generate summary: API key not set")

In [None]:
# Test on medium article (AI)
sample = samples[1]

print(f"üìÑ {sample['title']}")
print(f"\n{'='*80}")
print("\nüìù Original Text:")
print(sample['text'])
print(f"\n{'='*80}")
print("\n‚ú® Generated Summary:")

if api_key:
    summary = summarize_text(sample['text'])
    print(summary)
    print(f"\n{'='*80}")
    print("\nüìã Expected Summary:")
    print(sample['expected_summary'])
else:
    print("‚ö†Ô∏è Cannot generate summary: API key not set")

In [None]:
# Test on long article (Space Exploration)
sample = samples[2]

print(f"üìÑ {sample['title']}")
print(f"\n{'='*80}")
print("\nüìù Original Text (truncated for display):")
print(sample['text'][:500] + "...")
print(f"\nFull length: {len(sample['text'])} characters")
print(f"\n{'='*80}")
print("\n‚ú® Generated Summary:")

if api_key:
    summary = summarize_text(sample['text'])
    print(summary)
    print(f"\n{'='*80}")
    print("\nüìã Expected Summary:")
    print(sample['expected_summary'])
else:
    print("‚ö†Ô∏è Cannot generate summary: API key not set")

## Batch Processing

Process all samples and compare results

In [None]:
if api_key:
    results = []
    
    for sample in samples:
        print(f"\nProcessing: {sample['title']}...")
        summary = summarize_text(sample['text'])
        
        results.append({
            'id': sample['id'],
            'title': sample['title'],
            'original_length': len(sample['text']),
            'summary_length': len(summary),
            'compression_ratio': f"{(1 - len(summary)/len(sample['text'])) * 100:.1f}%",
            'summary': summary
        })
    
    print("\n" + "="*80)
    print("üìä Summary Statistics")
    print("="*80)
    
    for result in results:
        print(f"\n{result['id']}. {result['title']}")
        print(f"   Original: {result['original_length']} chars")
        print(f"   Summary: {result['summary_length']} chars")
        print(f"   Compression: {result['compression_ratio']}")
        print(f"   Summary: {result['summary'][:100]}...")
else:
    print("‚ö†Ô∏è Cannot process samples: API key not set")

## Custom Text Summarization

Try summarizing your own text!

In [None]:
# Enter your custom text here
custom_text = """
Enter your text here to generate a summary.
This can be any text you want to summarize.
"""

if api_key and custom_text.strip():
    print("üìù Your Text:")
    print(custom_text)
    print("\n" + "="*80)
    print("\n‚ú® Generated Summary:")
    summary = summarize_text(custom_text)
    print(summary)
else:
    print("‚ö†Ô∏è Please set your API key and enter custom text above")

## Conclusion

This notebook demonstrates:
- Loading sample texts from JSON
- Using OpenAI API for text summarization
- Comparing generated summaries with expected results
- Analyzing compression ratios
- Testing with custom text

The same logic is used in the Chrome extension's background script to generate summaries of highlighted text.