# 📄 AWS Bedrock + LangChain PDF Summarizer Demo
This notebook demonstrates how to summarize a PDF file using Amazon Bedrock with Claude and LangChain.

In [None]:
import boto3
from langchain.llms.bedrock import Bedrock
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
import PyPDF2

In [None]:
# --- CONFIGURATION ---
BEDROCK_REGION = "us-east-1"
MODEL_ID = "anthropic.claude-v2"

# --- INIT BEDROCK CLIENT ---
boto3_bedrock = boto3.client("bedrock-runtime", region_name=BEDROCK_REGION)
llm = Bedrock(model_id=MODEL_ID, client=boto3_bedrock)

In [None]:
# --- FUNCTION: Load PDF and Extract Text ---
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text += page.extract_text()
    return text[:4000]  # Limit token length

In [None]:
# --- LOAD PDF ---
pdf_path = "sample.pdf"
raw_text = extract_text_from_pdf(pdf_path)
document = Document(page_content=raw_text)

In [None]:
# --- PROMPT TEMPLATE ---
prompt_template = """Summarize the following content professionally:

{content}

Summary:
"""
prompt = PromptTemplate.from_template(prompt_template)
chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)

In [None]:
# --- RUN SUMMARY ---
summary = chain.run([document])
print("\n📄 Document Summary:\n")
print(summary)