In [1]:
!pip install gradio
!pip install langchain_google_genai
!pip install tiktoken openai langchain --quiet
!pip install langchain-community langchain-google-vertexai beautifulsoup4 langgraph pypdf gradio --quiet
!pip install matplotlib.pyplot as plt

Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.3 (from gradio)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [2]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.documents import Document
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langgraph.constants import Send
from langgraph.graph import END, START, StateGraph
from langchain.document_loaders import PyPDFLoader
import textwrap
from typing import List, Literal, TypedDict

In [3]:

os.environ["GOOGLE_API_KEY"] = " AIzaSyCZCb***"


llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)


In [4]:

documents = [
    Document(page_content="Apples are red", metadata={"title": "apple_book"}),
    Document(page_content="Blueberries are blue", metadata={"title": "blueberry_book"}),
    Document(page_content="Bananas are yellow", metadata={"title": "banana_book"}),
]


In [5]:

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

docs = split_docs(documents)

SIMPLE MAP REDUCE SUMMARIZATION

In [6]:

chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=False)
output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width=100)
print("Map-Reduce Summary:\n", wrapped_text)

  output_summary = chain.run(docs)


Map-Reduce Summary:
 Fruits have various colors.


ITERATIVE REFINEMENT

In [7]:

class State(TypedDict):
    contents: List[str]
    index: int
    summary: str

summarize_prompt = ChatPromptTemplate([("human", "Write a concise summary of the following: {context}")])
initial_summary_chain = summarize_prompt | llm | StrOutputParser()

refine_template = """
Produce a final summary.
Existing summary up to this point:
{existing_answer}
New context:
------------
{context}
------------
Given the new context, refine the original summary.
"""
refine_prompt = ChatPromptTemplate([("human", refine_template)])
refine_summary_chain = refine_prompt | llm | StrOutputParser()

async def generate_initial_summary(state: State, config):
    summary = await initial_summary_chain.ainvoke(state["contents"][0], config)
    return {"summary": summary, "index": 1}

async def refine_summary(state: State, config):
    content = state["contents"][state["index"]]
    summary = await refine_summary_chain.ainvoke({"existing_answer": state["summary"], "context": content}, config)
    return {"summary": summary, "index": state["index"] + 1}

def should_refine(state: State) -> Literal["refine_summary", END]:
    return "refine_summary" if state["index"] < len(state["contents"]) else END

graph = StateGraph(State)
graph.add_node("generate_initial_summary", generate_initial_summary)
graph.add_node("refine_summary", refine_summary)
graph.add_edge(START, "generate_initial_summary")
graph.add_conditional_edges("generate_initial_summary", should_refine)
graph.add_conditional_edges("refine_summary", should_refine)
app = graph.compile()

async for step in app.astream({"contents": [doc.page_content for doc in documents]}, stream_mode="values"):
    if summary := step.get("summary"):
        print("Iterative Refinement Summary:\n", summary)



Iterative Refinement Summary:
 Apples can be red.

Iterative Refinement Summary:
 Apples can be red, and blueberries are blue.

Iterative Refinement Summary:
 Apples are red, blueberries are blue, and bananas are yellow.



PDF ACESSING

In [33]:
from google.colab import userdata
import os
os.environ['GOOGLE_API_KEY'] = " AIzaSyCZCb***"



In [34]:
from langchain.document_loaders import PyPDFLoader
from langchain.chains.summarize import load_summarize_chain
from langchain_google_genai import ChatGoogleGenerativeAI



In [35]:
llm = ChatGoogleGenerativeAI(
      model="gemini-1.5-flash",
      temperature=0,
      max_tokens=None,
      timeout=None,
      max_retries=2
)



In [36]:
def summarize_pdf(pdf_file_path):
    loader = PyPDFLoader(pdf_file_path)
    docs = loader.load_and_split()
    chain = load_summarize_chain(llm, chain_type="refine")
    summary = chain.invoke(docs)

    return summary



In [37]:
summary = summarize_pdf("/content/sample.pdf")

print(summary['output_text'])

Dhanush is a highly successful Indian actor, filmmaker, lyricist, and singer primarily known for his work in Tamil cinema.  With over 50 films and numerous awards including four National Film Awards, he's one of India's highest-paid actors, achieving critical and commercial success through films like *Aadukalam*, *Velaiilla Pattadhari*, and *Asuran*, with *Raayan* being his highest-grossing film to date.

