In [43]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader


In [44]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

# loader = WebBaseLoader("https://www.gadventures.com/terms-conditions/booking-terms/")
itinerary1 = "raw_data/itinerary_text/Highlights of Morocco .txt"
itinerary2 = "raw_data/itinerary_text/Coastal Morocco: Waves & Market Stalls .txt"

llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k", openai_api_key=api_key)

In [45]:
# Map
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the main themes. Also include the itinerary name and this list of interests: Surfing.
If one or more of the interests is not mentioned this include this in the answer.
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

In [46]:
# Map
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the itinerary name, tour length, travel style, physical grading, an summary of the itinerary and this list of interests: Surfing.
If one or more of the interests is not mentioned this include this in the answer.
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

In [None]:
####### here ##########

In [74]:
interests_list = ['surfing']
interests = ", ".join(interests_list)

intermediate_template = f"""Based on this list of docs, please identify the itinerary name, tour length, travel style, physical grading, an summary of the itinerary and this list of interests: {interests}.
If one or more of the interests is not mentioned this include this in the answer.
Helpful Answer:"""

map_template = """The following is a set of documents
{docs}
"""
map_template += intermediate_template

map_prompt = PromptTemplate.from_template(map_template)
map_prompt

PromptTemplate(input_variables=['docs'], output_parser=None, partial_variables={}, template='The following is a set of documents\n{docs}\nBased on this list of docs, please identify the itinerary name, tour length, travel style, physical grading, an summary of the itinerary and this list of interests: surfing.\nIf one or more of the interests is not mentioned this include this in the answer.\nHelpful Answer:', template_format='f-string', validate_template=True)

In [None]:
##### here ######

In [48]:
map_chain = LLMChain(llm=llm, prompt=map_prompt())

In [49]:
# Reduce
reduce_template = """The following is set of summaries:
{doc_summaries}
Take these and distill it into a final, consolidated summary of the main themes. 
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)

In [50]:
# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

In [51]:
# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)


In [52]:
# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

In [53]:
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)



In [54]:
loader = TextLoader(itinerary1)
docs = loader.load()
itinerary_1_summary = map_reduce_chain.run(docs)

In [55]:
loader = TextLoader(itinerary2)
docs = loader.load()
itinerary_2_summary = map_reduce_chain.run(docs)

In [56]:
from langchain.docstore.document import Document

doc1 = Document(page_content=itinerary_1_summary)
doc2 = Document(page_content=itinerary_2_summary)

In [57]:
text_vars = [doc1, doc2] 

In [58]:
# Reduce
reduce_template = """
You are an AI travel agent speaking to a user. The following is set of summaries that fits the users basic travel needs:
{doc_summaries}
Take these and rank them based on the following user interests: Surfing.
Return the origional summary along with a reason for ranking each itinerary based on their interests.
Refer to the user as - you.
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)

In [59]:
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

In [60]:
combine_chain = StuffDocumentsChain(llm_chain=reduce_chain,
                                    document_variable_name="doc_summaries")

In [61]:
reduce_chain = ReduceDocumentsChain(
    combine_documents_chain=combine_chain
)

In [62]:
reduced = reduce_chain.run(text_vars)

In [63]:
print(itinerary_1_summary)

The 15-day Highlights of Morocco tour offers a classic travel experience at a great price. Suitable for most fitness levels, the itinerary includes visits to Tangier, Chefchaouen, Fès, Marrakech, the Sahara Desert, and Essaouira. Guided tours of medinas, ancient ruins, camel rides, hikes in the Atlas Mountains, and free time for exploration and relaxation are included. Surfing is not mentioned as an interest.


In [64]:
print(itinerary_2_summary)

Coastal Morocco: Waves & Market Stalls is a 5-day tour designed for young, budget-minded travelers. The itinerary includes light walking and hiking suitable for most fitness levels. The tour starts in Marrakech and offers opportunities to explore the city and get to know fellow travelers. Highlights include visiting the market town of Taroudant, the fishing village of Taghazout for surfing, and the city of Essaouira for beach activities. The tour concludes with a return to Marrakech.


In [25]:
print(reduced)

Based on your interest in surfing, here is the ranking of the itineraries:

1. Coastal Morocco: Waves & Market Stalls - This itinerary is ranked first because it specifically focuses on surfing. It offers young and budget-minded travelers the opportunity to explore the coastal regions of Morocco while engaging in surfing activities. The itinerary is designed for those interested in catching waves and includes light physical activities suitable for most fitness levels.

2. Highlights of Morocco - This itinerary is ranked second because although it does not specifically mention surfing as an activity, it offers a classic travel experience with access to culture and memorable experiences. While it may not cater directly to your interest in surfing, it does provide comfortable accommodations and a mix of public and private transport for a well-rounded travel experience.

Reason for ranking:
The Coastal Morocco itinerary is ranked first because it explicitly mentions surfing as a focus and 