# **Summarizing Documents**

In [None]:
!pip install langchain
!pip install openai
!pip install tiktoken

In [2]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

In [3]:
import os
os.environ["OPENAI_API_KEY"] = 'YOUR_API_KEY'

In [4]:
llm = OpenAI(temperature=0.9)

In [5]:
with open('sample.txt') as f:
  data = f.read()

In [6]:
#Split text
text_splitter = CharacterTextSplitter()
texts = text_splitter.split_text(data)

In [7]:
#Documentize
docs = [Document(page_content=t) for t in texts]

In [8]:
docs

[Document(page_content="Kenya, officially the Republic of Kenya (Swahili: Jamhuri ya Kenya), is a country in East Africa. A member of the Commonwealth with a population of more than 47.6 million in the 2019 census,[12] Kenya is the 28th most populous country in the world[7] and 7th most populous in Africa. Kenya's capital and largest city is Nairobi, while its oldest and second largest city, which until 1907 was also Kenya's first capital city, is the major port city of Mombasa which includes Mombasa Island in the Indian Ocean and the surrounding mainland. Other important cities include Kisumu and Nakuru. Kenya is bordered by South Sudan to the northwest, Ethiopia to the north, Somalia to the east, Uganda to the west, Tanzania to the south, and the Indian Ocean to the southeast. Kenya's geography, climate and population vary widely, ranging from cold snow-capped mountaintops (Batian, Nelion and Point Lenana on Mount Kenya) with vast surrounding forests, wildlife and fertile agricultura

In [10]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)
chain.run(docs)



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Kenya, officially the Republic of Kenya (Swahili: Jamhuri ya Kenya), is a country in East Africa. A member of the Commonwealth with a population of more than 47.6 million in the 2019 census,[12] Kenya is the 28th most populous country in the world[7] and 7th most populous in Africa. Kenya's capital and largest city is Nairobi, while its oldest and second largest city, which until 1907 was also Kenya's first capital city, is the major port city of Mombasa which includes Mombasa Island in the Indian Ocean and the surrounding mainland. Other important cities include Kisumu and Nakuru. Kenya is bordered by South Sudan to the northwest, Ethiopia to the north, Somalia to the east, Uganda to the west, Tanzania to the south, and the Indian Ocean to the southeast. Kenya's geography, climate and population vary wid

" \n\nKenya is a country in East Africa with a population of over 47.6 million. It has a diverse geography and climate, and a rich history dating back to the Pleistocene epoch. It gained independence from European colonization in 1963 and is now a member of several international organizations. The country has a lower-middle-income economy, with agriculture and tourism as major sectors. It is a presidential representative democratic republic with a multi-party system, but struggles with corruption and human rights issues. Kenya's economy has seen steady growth in sectors such as finance and tourism, but faces challenges such as food insecurity and poverty. The country has a diverse population, with Christianity as the main religion, and a strong presence in sports, music and literature. Kenyan cuisine is made up of staples such as ugali and regional variations."

# **HTTP Requests**
**LLM Requests Chain**

In [11]:
from langchain.chains import LLMRequestsChain, LLMChain

In [13]:
template = """ Extract the answer to the question 'query' or say 'not found' if the information is not available
{requests_result}
"""

prompt = PromptTemplate(
    input_variables=['query', 'requests_result'],
    template=template
)

In [14]:
llm = OpenAI()

In [15]:
chain = LLMRequestsChain(llm_chain=LLMChain(llm=llm, prompt=prompt))

In [18]:
question = "What is the capital of Kenya?"
inputs = {
    "query": question,
    "url": "https://www.google.com/search?q=" + question.replace(" ", "+"),
}

In [19]:
chain(inputs)

{'query': 'What is the capital of Kenya?',
 'url': 'https://www.google.com/search?q=What+is+the+capital+of+Kenya?',
 'output': '\nThe capital of Kenya is Nairobi.'}

In [20]:
import inspect
print(inspect.getsource(chain._call))

    def _call(
        self,
        inputs: Dict[str, Any],
        run_manager: Optional[CallbackManagerForChainRun] = None,
    ) -> Dict[str, Any]:
        from bs4 import BeautifulSoup

        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
        # Other keys are assumed to be needed for LLM prediction
        other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
        url = inputs[self.input_key]
        res = self.requests_wrapper.get(url)
        # extract the text from the html
        soup = BeautifulSoup(res, "html.parser")
        other_keys[self.requests_key] = soup.get_text()[: self.text_length]
        result = self.llm_chain.predict(
            callbacks=_run_manager.get_child(), **other_keys
        )
        return {self.output_key: result}

