# Summary with Refine Chain

In [None]:
import os
import openai
from IPython.display import display, HTML, Markdown
from pprint import pprint

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
from langchain.callbacks import OpenAICallbackHandler

totals_cb = OpenAICallbackHandler()

print(totals_cb)

---

## Mind the costs!!!

This notebook performs many requests to OpenAI LLM endpoints with very large count of tokens.

Running it with latest and greatest models may cost several $ per one PDF summarized!!!

---

In [None]:
from langchain.document_loaders import PyPDFLoader

papers=[
    "https://arxiv.org/pdf/2304.00612.pdf", # total tokens used - ca. 30k
    "https://arxiv.org/pdf/2305.04091.pdf", # total tokens used - ca. 42k
    "https://arxiv.org/pdf/2307.10195.pdf", # total tokens used - ca. 24k
    "https://arxiv.org/pdf/2210.03629.pdf", # total tokens used - ca. 86k
    "https://arxiv.org/pdf/2307.10169.pdf", # total tokens used - ca. 200k
]

loader = PyPDFLoader(papers[0])

docs = loader.load()

print(f"Document has {len(docs)} pages\n")
pprint(docs[0])

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain

summary_chain_refine = load_summarize_chain(ChatOpenAI(temperature=0.0), chain_type="refine", return_intermediate_steps=True)

In [None]:
# Initial Chain prompt template

summary_chain_refine.initial_llm_chain.prompt.template

In [None]:
# Refine Chain prompt template

summary_chain_refine.refine_llm_chain.prompt.template

In [None]:
summary_refine = summary_chain_refine(docs, callbacks=[totals_cb])

In [None]:
pprint(summary_refine["intermediate_steps"])

In [None]:
display(Markdown(summary_refine["output_text"]))

In [None]:
pprint(totals_cb)