In [1]:
import pandas as pd
from langchain.chains import LLMChain, ReduceDocumentsChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import create_tagging_chain_pydantic
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.docstore.document import Document
from langchain.document_loaders.csv_loader import CSVLoader
import pandas as pd

In [2]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [3]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=api_key)

In [7]:
# map prompt
def map_prompt():

    intermediate_template = f"""Based on this list of docs, please identify the itinerary name, web address, tour length, cost, departure date, travel style, physical grading and a summary of the itinerary.
    If one or more of the interests is not mentioned include this in the answer.
    Helpful Answer:"""

    map_template = """The following is a set of documents
    {docs}
    """
    map_template += intermediate_template

    map_prompt = PromptTemplate.from_template(map_template)
    return map_prompt

# first reduce prompt
def first_reduce_prompt():
    # Reduce
    reduce_template = """The following is set of summaries:
    {doc_summaries}
    Take these and distill it into a final, consolidated summary of the the itinerary name, web address, tour length, travel style, physical grading and a summary of the itinerary. After the name also include the range of costs and the departure dates.
    Return a single paragraph rather than bullet points
    Helpful Answer:"""
    reduce_prompt = PromptTemplate.from_template(reduce_template)
    return reduce_prompt

In [8]:
map_chain = LLMChain(llm=llm, prompt=map_prompt())

# Run chain
reduce_chain = LLMChain(llm=llm, prompt=first_reduce_prompt())

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)

loader = CSVLoader(file_path="raw_data/new_format_test.csv", encoding='latin-1')
docs = loader.load()

itinerary_summary = map_reduce_chain.run(docs)

In [9]:
print(itinerary_summary)

The itinerary name is "Highlights of Morocco" and the web address is https://www.gadventures.com/trips/highlights-of-morocco/DCMH/. The tour length is 15 days and the travel style is Classic, offering all the highlights, culture, access, and memorable moments at a great price. The physical grading is 2, which means light walking and hiking suitable for most fitness levels. The cost ranges from $1119 to $1500, depending on the departure date. The departure dates are 19/04/2024 and 20/04/2024. The itinerary includes stops in Tangier, Chefchaouen, Fès, the Sahara Desert, Todra Gorge, Aït Ben Haddou Kasbah, the village of Aroumd with views of Mt Toubkal, Essaouira, and Marrakech. It also offers free days for exploration and guided tours of medinas, ancient ruins, and optional cooking classes. The itinerary provides a comprehensive experience of Morocco's highlights and cultural immersion.


In [13]:
print(docs[0].page_content)

tour_name: Highlights of Morocco
itinerary_name: 
visited_countries: Morocco
start_date: 19/04/2024
duration: 15 days
url: https://www.gadventures.com/trips/highlights-of-morocco/DCMH/
Travel Style: Classic - All of the highlights, culture, access, and I-cant-believe-we-did-that moments, all at a great price.
Service Level: Standard - Comfortable tourist-class accommodations with character; mix of public and private transport.
Physical Grading: 2 - Light - Light walking and hiking suitable for most fitness levels. Nothing too challenging.
Trip Type: Small Group - Group trips average 12 travellers per departure, depending on the adventure. The maximum is usually no more than 16, but some can be smaller or bigger, depending on the trip. Check individual trips for details.
Cost: 1119
itinerary: Day 1: Arrive at any time.
Day 2: Stop in Tangier for a guided tour of the medina before continuing on to Chefchaouen for the evening.
Day 3: Enjoy a free day in Chefchaouen to explore. The city, 

KeyError: 'text'