In [41]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader


In [42]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

# loader = WebBaseLoader("https://www.gadventures.com/terms-conditions/booking-terms/")
loader = TextLoader('Cambodia_to_Vietnam_Night_Markets_Noodle-Making.txt')
docs = loader.load()

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k", openai_api_key=api_key)

In [43]:
# Map
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the main themes and include key sites and attractions. 
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

In [50]:
# Reduce
reduce_template = """The following is set of summaries:
{doc_summaries}
Take these and distill it into a final, consolidated summary of the main themes. 
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)

In [51]:
# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

In [52]:
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)


In [53]:
# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

In [None]:
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)


The main themes of this set of documents revolve around cultural exploration and immersion, adventure and outdoor activities, nightlife and socializing, and historical and educational sites. Travelers can expect to experience various cultural activities, try local cuisine, visit historical sites, and participate in outdoor adventures such as snorkeling, hiking, and kayaking. The documents also mention opportunities for socializing and enjoying the nightlife in different cities. Key sites and attractions include Angkor Wat, Tuol Sleng Prison, the Silver Pagoda, and the ancient ruins of My Son.
