In [1]:
import pandas as pd
from langchain.chains import LLMChain, ReduceDocumentsChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import create_tagging_chain_pydantic
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.docstore.document import Document
from langchain.document_loaders.csv_loader import CSVLoader
import pandas as pd

In [2]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [3]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=api_key)

In [4]:
# map prompt
def map_prompt():

    intermediate_template = f"""Based on this list of docs, please identify the itinerary name, url, tour length, cost, departure date, travel style, physical grading and a summary of the itinerary.
    If one or more of the interests is not mentioned include this in the answer.
    Helpful Answer:"""

    map_template = """The following is a set of documents
    {docs}
    """
    map_template += intermediate_template

    map_prompt = PromptTemplate.from_template(map_template)
    return map_prompt

# first reduce prompt
def first_reduce_prompt():
    # Reduce
    reduce_template = """The following is set of summaries:
    {doc_summaries}
    Take these and distill it into a final, consolidated summary of the the itinerary name, url, tour length, travel style, physical grading and a summary of the itinerary. After the name also include the range of costs and departure dates.
    Helpful Answer:"""
    reduce_prompt = PromptTemplate.from_template(reduce_template)
    return reduce_prompt

In [5]:
map_chain = LLMChain(llm=llm, prompt=map_prompt())

# Run chain
reduce_chain = LLMChain(llm=llm, prompt=first_reduce_prompt())

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)

loader = CSVLoader(file_path="raw_data/new_format_test.csv", encoding='latin-1')
docs = loader.load()
itinerary_summary = map_reduce_chain.run(docs)

In [6]:
print(itinerary_summary)

Itinerary Name: Highlights of Morocco
URL: https://www.gadventures.com/trips/highlights-of-morocco/DCMH/
Tour Length: 15 days
Cost Range: Standard - Adult: $1119-$1500
Departure Dates: 19/04/2024, 20/04/2024
Travel Style: Classic
Physical Grading: 2 - Light

Summary of Itinerary:
This 15-day tour explores the Highlights of Morocco, starting with a guided tour of Tangier and Chefchaouen. Enjoy free time in Chefchaouen and visit the Spanish Mosque for a panoramic view. Explore the ancient Roman ruins of Volubilis and the Fès medina with a local guide. Travel to the Sahara Desert, ride camels at sunset, and visit the Todra Gorge. Experience the Aït Ben Haddou Kasbah and hike to the village of Aroumd for views of Mt Toubkal. Explore Essaouira and Marrakech before departing. The tour offers a range of costs and departure dates.


In [7]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

# Define prompt
prompt_template = """Write a concise summary of the following including the url:
"{text}"
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(
    llm_chain=llm_chain, document_variable_name="text"
)

loader = CSVLoader(file_path="raw_data/new_format_test.csv", encoding='latin-1')
docs = loader.load()
print(stuff_chain.run(docs))

The "Highlights of Morocco" tour is a 15-day adventure that takes you through the highlights of Morocco. The tour includes visits to cities, kasbahs, and spice markets, as well as camel rides in the Sahara Desert and hikes in the Atlas Mountains. The tour starts on 19/04/2024 and is priced at $1119 for adults. The itinerary includes stops in Tangier, Chefchaouen, Fès, the Sahara Desert, Todra Gorge, Aït Ben Haddou Kasbah, and Essaouira, among others. The tour is classified as Classic and is a small group trip. The tour can be booked through the following URL: https://www.gadventures.com/trips/highlights-of-morocco/DCMH/
