In [1]:
!pip install reportlab
!pip install langchain_openai
!pip install langgraph
!pip install trafilatura

Collecting reportlab
  Downloading reportlab-4.4.1-py3-none-any.whl.metadata (1.8 kB)
Downloading reportlab-4.4.1-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.4.1
Collecting langchain_openai
  Downloading langchain_openai-0.3.21-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-core<1.0.0,>=0.3.64 (from langchain_openai)
  Downloading langchain_core-0.3.64-py3-none-any.whl.metadata (5.8 kB)
Collecting langsmith<0.4,>=0.3.45 (from langchain-core<1.0.0,>=0.3.64->langchain_openai)
  Downloading langsmith-0.3.45-py3-none-any.whl.metadata (15 kB)
Downloading langchain_openai-0.3.21-py3-none-any.whl (65 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.2/65.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.64-py3-none-any.whl (438 kB)
[2K   [90m━━━━━━━━━

# Setup for OpenRouter and LangChain to initialize the LLM and load required libraries for web content extraction and PDF generation.


In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
from langgraph.graph import StateGraph, END
from typing import TypedDict
import trafilatura
import tempfile
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
import os

# 🔐 OpenRouter setup
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
os.environ["OPENAI_API_KEY"] = input("🔐 Enter your OpenRouter API key: ")

llm = ChatOpenAI(
    model="openai/gpt-3.5-turbo",
    base_url="https://openrouter.ai/api/v1"
)


In [3]:
# to save the output pdf file in google drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Defines the structure of state data used in the LangGraph, including URL, extracted text, summary, and generated PDF path.


In [4]:
class GraphState(TypedDict):
    url: str
    article_text: str
    summary: str
    pdf_path: str


# Defines the core LangGraph nodes: fetch web content, summarize it, validate format, and generate a branded PDF.


In [5]:
# 🧩 Node 1: Fetch article
def fetch_url_node(state):
    url = state["url"]
    downloaded = trafilatura.fetch_url(url)
    content = trafilatura.extract(downloaded)
    return {"url": url, "article_text": content}

# 🧩 Node 2: Summarize – You may customize the summarization prompt/template as needed.
def summarize_node(state):
    prompt = [
        SystemMessage(content="You are a summarization expert."),
        HumanMessage(content=f"Summarize this article in no more than 5 sentences:\n\n{state['article_text']}")
    ]
    summary = llm.invoke(prompt).content
    return {"summary": summary, "url": state["url"]}

# 🧩 Node 3: Cross-check summary format
def cross_check_summary_node(state):
    summary = state["summary"].strip()
    sentences = [s.strip() for s in summary.replace('\n', ' ').split('.') if s.strip()]
    checked = '. '.join(sentences[:5])
    if not checked.endswith('.'):
        checked += '.'
    return {"summary": checked, "url": state["url"]}

# 🧩 Node 4: Generate PDF with branding
def generate_pdf_node(state):
    summary = state["summary"]
    url = state["url"]

    # Make a safe, permanent file path
    output_path = "/content/drive/MyDrive/summary.pdf"
    c = canvas.Canvas(output_path, pagesize=A4)


    # Page 1: Branding
    c.setFont("Helvetica-Bold", 24)
    c.drawString(100, 780, "Wikipedia")
    c.setFont("Helvetica", 14)
    c.drawString(100, 750, "Website: https://www.wikipedia.org/")
    c.setFont("Helvetica-Oblique", 12)
    c.drawString(100, 730, "The free encylopedia")
    c.showPage()

    # Page 2: Summary
    c.setFont("Helvetica-Bold", 18)
    c.drawString(100, 780, "URL Summary")
    c.setFont("Helvetica", 12)
    c.drawString(100, 750, f"Source: {url}")
    text = c.beginText(100, 720)
    for line in summary.split('. '):
        text.textLine(line.strip())
    c.drawText(text)
    c.showPage()
    c.save()

    print("✅ PDF saved on Desktop:", output_path)
    return {"pdf_path": output_path, "summary": summary, "url": url}



# Builds and compiles the LangGraph by defining nodes, setting execution flow, and linking them sequentially.


In [6]:
graph = StateGraph(GraphState)
graph.add_node("fetch_url", fetch_url_node)
graph.add_node("summarize", summarize_node)
graph.add_node("cross_check", cross_check_summary_node)
graph.add_node("generate_pdf", generate_pdf_node)

graph.set_entry_point("fetch_url")
graph.add_edge("fetch_url", "summarize")
graph.add_edge("summarize", "cross_check")
graph.add_edge("cross_check", "generate_pdf")
graph.add_edge("generate_pdf", END)

app = graph.compile()


# Invokes the compiled graph with a target URL and prints the final summary and PDF file location.


In [7]:
result = app.invoke({"url": "https://en.wikipedia.org/wiki/LangChain"})
print("✅ Final Summary:\n", result["summary"])
print("📄 PDF Location:\n", result["pdf_path"])


✅ PDF saved on Desktop: /content/drive/MyDrive/summary.pdf
✅ Final Summary:
 LangChain is a software framework developed by Harrison Chase, released in October 2022, for integrating large language models into applications. It has gained popularity and funding, with features such as the LangChain Expression Language and LangServe tool for deploying APIs. LangChain's capabilities include chatbots, document summarization, synthetic data generation, and integration with various systems like Amazon, Google, and Microsoft Azure. The framework offers a wide range of tools and APIs for different tasks, such as financial data analysis, web scraping, text-to-speech, and more.
📄 PDF Location:
 /content/drive/MyDrive/summary.pdf
