In [4]:
# summarization-lab.ipynb (Claude + prompt variants + hashtags + retry + autosave)

import json
import os
import time
import random
import boto3
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
aws_region = os.getenv("AWS_REGION", "us-east-1")
model_id = os.getenv("BEDROCK_MODEL_ID", "anthropic.claude-3-sonnet-20240229")
print(model_id)

bedrock = boto3.client(
    service_name="bedrock-runtime",
    region_name=aws_region,
    aws_access_key_id=aws_access_key,
    aws_secret_access_key=aws_secret_key
)

PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))
DATA_PATH = os.path.join(PROJECT_ROOT, 'test_output.json')
SAVE_PATH = os.path.join(PROJECT_ROOT, 'summarized_output.json')

# Load articles
try:
    with open(DATA_PATH, "r", encoding="utf-8") as f:
        articles = json.load(f)
except FileNotFoundError:
    print(f"Could not find test_output.json at: {DATA_PATH}")
    articles = []

# Prompt builders

def build_prompt_v1(article):
    return (
        f"Summarize the following AI research article in a fun and engaging way appropriate for social media using 3-4 sentences.\n"
        f"Title: {article['title']}\n"
        f"Authors: {', '.join(article['authors'])}\n"
        f"Abstract: {article['snippet']}"
    )

def build_prompt_v2(article):
    return (
        f"You are an expert technical writer. Provide a concise, informative summary of this research paper.\n"
        f"Focus on any novel methods, key findings, or real-world relevance.\n\n"
        f"Abstract: {article['snippet']}"
    )

def build_hashtag_prompt(article):
    return (
        f"Suggest 3-5 short and relevant hashtags for this AI paper.\n"
        f"Title: {article['title']}\n"
        f"Abstract: {article['snippet']}"
    )

# Claude summary function

def summarize_with_claude(prompt):
    payload = {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 300,
        "temperature": 0.7,
        "messages": [
            {"role": "user", "content": prompt}
        ]
    }
    response = bedrock.invoke_model(
        modelId=model_id,
        contentType="application/json",
        accept="application/json",
        body=json.dumps(payload)
    )
    result = json.loads(response["body"].read())
    return result["content"][0]["text"].strip()

# Retry logic with timeout

def retry_until_timeout(func, max_seconds=600, base_delay=3):
    start_time = time.time()
    attempt = 0
    while time.time() - start_time < max_seconds:
        try:
            return func()
        except Exception as e:
            if "ThrottlingException" in str(e):
                delay = min(base_delay * (2 ** attempt), 60) + random.uniform(1, 3)
                print(f"[{datetime.utcnow().isoformat()}] Throttled. Retrying in {delay:.2f}s...")
                time.sleep(delay)
                attempt += 1
            else:
                print(f"[{datetime.utcnow().isoformat()}] Non-throttling error:", e)
                return "[Summary unavailable]"
    return "[Summary unavailable after max retry time]"

# Summarize and generate hashtags for each article

summarized = []
for article in articles[:2]:
    p1 = build_prompt_v1(article)
    p2 = build_prompt_v2(article)
    htag_prompt = build_hashtag_prompt(article)

    summary_v1 = retry_until_timeout(lambda: summarize_with_claude(p1))
    summary_v2 = retry_until_timeout(lambda: summarize_with_claude(p2))
    hashtags = retry_until_timeout(lambda: summarize_with_claude(htag_prompt))

    result = {
        **article,
        "v1_summary": summary_v1,
        "v2_summary": summary_v2,
        "hashtags": hashtags
    }
    summarized.append(result)

    with open(SAVE_PATH, "w", encoding="utf-8") as out:
        json.dump(summarized, out, indent=2, ensure_ascii=False)

# Preview result
for i, entry in enumerate(summarized):
    print(f"--- Article {i+1} ---")
    print("Prompt v1 Summary:\n", entry['v1_summary'])
    print("\nPrompt v2 Summary:\n", entry['v2_summary'])
    print("\nSuggested Hashtags:\n", entry['hashtags'])
    print("\n")







[2025-04-10T21:01:32.279101] Throttled. Retrying in 4.18s...


KeyboardInterrupt: 