In [23]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader


In [24]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

# loader = WebBaseLoader("https://www.gadventures.com/terms-conditions/booking-terms/")
itinerary1 = raw_data/
itinerary2 = ""

loader = TextLoader([itinerar1, itinerary2])
docs = loader.load()

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k", openai_api_key=api_key)

In [32]:
# Map
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the main themes and include key sites and attractions. 
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

In [33]:
# Reduce
reduce_template = """The following is set of summaries:
{doc_summaries}
Take these and distill it into a final, consolidated summary of the main themes. 
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)

In [34]:
# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

In [35]:
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)


In [36]:
# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

In [37]:
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)


text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
docs = text_splitter.split_documents(docs)

In [38]:
print(map_reduce_chain.run(docs))

The main themes of this set of documents revolve around cultural exploration, culinary experiences, beach activities, nightlife, and transportation/accommodation. Travelers can expect to visit historical sites such as Angkor Wat, Tuol Sleng Prison, and the Imperial City of Huế. They can also indulge in local cuisine and street food, participate in beach and water activities, and enjoy the vibrant nightlife in cities like Siem Reap, Ho Chi Minh City, and Hoi An. Transportation options include overnight trains and cruising in Halong Bay, while accommodation ranges from backpackers hostels to beach resorts.
