## Chat Models - Summarizing Chains


In [6]:
%pip install langchain_community bs4

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting beautifulsoup4 (from bs4)
  Downloading beautifulsoup4-4.12.3-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4->bs4)
  Downloading soupsieve-2.5-py3-none-any.whl.metadata (4.7 kB)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Downloading beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
   ---------------------------------------- 0.0/147.9 kB ? eta -:--:--
   -------- ------------------------------- 30.7/147.9 kB 1.4 MB/s eta 0:00:01
   ---------------------------------------- 147.9/147.9 kB 1.8 MB/s eta 0:00:00
Downloading soupsieve-2.5-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4, bs4
Successfully installed beautifulsoup4-4.12.3 bs4-0.0.2 soupsieve-2.5
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
os.environ['OPENAI_API_KEY'] = 'API_KEY_HERE'

In [9]:
from langchain_openai.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain_community.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
chain = load_summarize_chain(llm, chain_type="stuff")

chain.invoke(docs)

{'input_documents': [Document(page_content="\n\n\n\n\n\n\nNowgoal Mobi - Live Football Scores & Live Streaming\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nFootball\n\n\n\n\nFootball\n\n\n\n\n\n\nBasketball\n\n\n\n\n\n\nTennis\n\n\n\n\n\n\nBaseball\n\n\n\n\n\n\n\n\n\nScores\n\n\r\n                                        Community\r\n                                        \n\n\nLeagues\n\n\n\n\n\n\n\n\n\nAll\nHot\nFavorites\nFinished\nFixtures\n\n\n\n\n\n\r\n        Hidden 0 ,\r\n        \r\n            show all\r\n        \n\n\n\n\nLoading...Nowgoal Livescore\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nx\nNotifications\n\nDear Users,\nNowgoal.ltd has been updated into the new domain. Please bookmark and get started with our new site：https://www.nowgoal24.com\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nNowgoal24.com\n\n\nAdvertise\nDisclaimer\nFeedback\nSitemap\n\n\n\n\n Desktop View\r\n                \n\n\n\n App\r

In [10]:
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain, StuffDocumentsChain

llm = ChatOpenAI(temperature=0)

# Map
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the main themes
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)

# map_chain:
map_chain = LLMChain(llm=llm, prompt=map_prompt)

# Reduce
reduce_template = """The following is set of summaries:
{doc_summaries}
Take these and distill it into a final, consolidated summary of the main themes.
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)

In [11]:
# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

In [12]:
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=0
)
split_docs = text_splitter.split_documents(docs)

In [13]:
print(map_reduce_chain.invoke(split_docs))

{'input_documents': [Document(page_content="Nowgoal Mobi - Live Football Scores & Live Streaming\n\n\nFootball\n\n\nFootball\n\n\nBasketball\n\n\nTennis\n\n\nBaseball\n\nScores\n\n\r\n                                        Community\r\n                                        \n\n\nLeagues\n\nAll\nHot\nFavorites\nFinished\nFixtures\n\n\r\n        Hidden 0 ,\r\n        \r\n            show all\r\n        \n\n\nLoading...Nowgoal Livescore\n\n\nx\nNotifications\n\nDear Users,\nNowgoal.ltd has been updated into the new domain. Please bookmark and get started with our new site：https://www.nowgoal24.com\n\n\nNowgoal24.com\n\n\nAdvertise\nDisclaimer\nFeedback\nSitemap\n\n\n Desktop View\r\n                \n\n App\r\n                \n\n Football\r\n                \n\n Basketball\r\n                \n\nFacebook\nTwitter\nTelegram\n\n\nCopyright © 2024 NowGoal All Rights Reserved.\n\n\n\r\n                Nowgoal's Mobi version can offer you the fast soccer(football) and basketball livescores

In [15]:
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

refine_template = (
    "Your job is to produce a final summary\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "Given the new context, refine the original summary"
    "If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate.from_template(refine_template)
chain = load_summarize_chain(
    llm=llm,
    chain_type="refine",
    question_prompt=prompt,
    refine_prompt=refine_prompt,
    return_intermediate_steps=True,
    input_key="input_documents",
    output_key="output_text",
)
result = chain({"input_documents": split_docs}, return_only_outputs=True)

# Page 1 --> Page 2 (Refine) --> Page 3 (Refine)