<a href="https://colab.research.google.com/github/sudarshan-koirala/youtube-stuffs/blob/main/PDFSummarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PDF Summarizer with few lines of code using Gradio, OpenAI and LangChain

## Install necessary packages

[Langchain website link](https://docs.langchain.com/docs/)

In [None]:
!pip install -q gradio openai pypdf tiktoken langchain

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "sk-xxxxxxxxxxxxxxxxxxxxx"

In [None]:

import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string("tiktoken is great!", "cl100k_base")

In [None]:
import gradio as gr
from langchain import OpenAI, PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader

llm = OpenAI(temperature=0)

## LangChain part 
#### Function that takes PDF file as input and returns the summary of that PDF
- langchain `PyPDFLoader` helps load the PDF
- After that we can split the document in smaller chunks
- We then use the `load_summarize_chain` to create a summarization chain

In [None]:
def summarize_pdf(pdf_file_path):
    loader = PyPDFLoader(pdf_file_path)
    docs = loader.load_and_split()
    chain = load_summarize_chain(llm, chain_type="map_reduce")
    summary = chain.run(docs)   
    return summary

In [None]:
summarize = summarize_pdf("/content/OA_Paper_2023_04_15.pdf")
summarize

## Create a simple gradio UI (if you prefer UI)

In [None]:

input_pdf_path = gr.components.Textbox(label="Provide the PDF file path")
output_summary = gr.components.Textbox(label="Summary")

interface = gr.Interface(
    fn=summarize_pdf,
    inputs=input_pdf_path,
    outputs=output_summary,
    title="PDF Summarizer",
    description="Provide PDF file path to get the summary.",
).launch(share=True)