In [1]:
## Details QA

In [2]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import create_tagging_chain_pydantic
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType

import pandas as pd

In [3]:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

In [4]:
# import re
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [5]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.9, model="gpt-3.5-turbo", openai_api_key=api_key)

In [6]:
df = pd.read_csv("raw_data/one_day_test.csv")
all_available_countries = df['visited_countries'].unique()

# stage analysis

In [7]:
from typing import Optional
from pydantic import BaseModel, Field

class TravelDetails(BaseModel):
    introduction: Optional[bool] = Field(
        False,
        description="Has francis introducted himself and asked if the user is interested in a group tour.",
    )
    qualification: Optional[str] = Field(
        ...,
        description="Did the user confirm they are looking for a group tour or answer positivley when asked. If the user asks about a trip assume the answer is yes",
        enum=["Yes", "No", "Unsure"]
    )
    country: Optional[str] = Field(
        "",
        description="This is the name of the country the user is wanting to visit. If they name a place within a country always return the country",
        enum=["Cambodia", "Vietnam", "Morocco", "USA"]
    )
    departing_after: Optional[str] = Field(
        "",
        description="This is the first date from which the user can depart. If the user gives a month assume this is the first of the month. If not year if given return 2023. In the format '%Y-%m-%d'",
    )
    departing_before: Optional[str] = Field(
        "",
        description="This is the last date from which the user can depart. If the user gives a month assume this is the last day of the month. If not year if given return 2023. In the format '%Y-%m-%d'",
    )
    max_budget: Optional[int] = Field(
        0,
        description="This is the maximun amount of money the user is looking to spend on their trip.",
    )
    max_duration: Optional[int] = Field(
        None,
        description="This is the maximum duration of their trip."
    )
    min_duration: Optional[int] = Field(
        None,
        description="This is the minimum duration of their trip.",
    )

In [8]:
ask_for_dict = {"country":"what country are you looking to travel to?",
                "max_budget":"how much are you looking to spend?",
                "departing_after":"when are you looking to travel?",
                "departing_before":"when are you looking to travel?",
                "max_duration":"how long do you want your trip to be?",
                "min_duration":"how long do you want your trip to be?"
               }

In [38]:
def check_what_is_empty(user_travel_details):
    ask_for = []
    # Check if fields are empty
    for field, value in user_travel_details.dict().items():
        if value in [None, "", 0]:  # You can add other 'empty' conditions as per your requirements
            ask_for.append(ask_for_dict[field])
    
    if 'what country are you looking to travel to?' in ask_for:
        ask_for.remove('what country are you looking to travel to?')
        ask_for.insert(0, 'what country are you looking to travel to?')
    elif 'how much are you looking to spend?' in ask_for:
        ask_for.remove('how much are you looking to spend?')
        ask_for.insert(0, 'how much are you looking to spend?')

    
    return ask_for

In [10]:
## checking the response and adding it
def add_non_empty_details(current_details: TravelDetails, new_details: TravelDetails):
    non_empty_details = {k: v for k, v in new_details.dict().items() if v not in [False, None, ""]}
    updated_details = current_details.copy(update=non_empty_details)
    return updated_details

In [11]:
# Define a custom function to find the first non-null value in columns 9 to the end of the df 
# used for finding the costs
def find_first_non_null(row):
    for value in row[12:]:  # Slice from the 9th column to the end
        if not pd.isna(value):
            return value
    return None

In [12]:
def get_filtered_df(df, user_travel_details):
    trip_details_dict = user_travel_details.dict()
    filled_out_dictionary = {k: v for k, v in user_travel_details.dict().items() if v not in [False, None, "",0]}

    # convert dates to datetime format
    df['duration'] = df['duration'].str.replace(' days', '').astype(int)
    df['start_date'] = pd.to_datetime(df['start_date'], format='%Y-%m-%d')
    
    # Apply the custom function to each row to find cost
    df['first_non_null'] = df.apply(find_first_non_null, axis=1)

    # Filtering the DataFrame
    filtered_df = df.copy()  # Make a copy of the original DataFrame to keep it intact
    
    # Iterate through the list of potential inputs
    for input_column in filled_out_dictionary.keys():
        if input_column == 'country':
            filtered_df = filtered_df[filtered_df['visited_countries'] == trip_details_dict["country"]]
        elif input_column == 'max_budget':
            filtered_df = filtered_df[filtered_df['first_non_null'] <= trip_details_dict["max_budget"]]
        elif input_column == 'min_budget':
            filtered_df = filtered_df[filtered_df['first_non_null'] >= trip_details_dict["min_budget"]]
        elif input_column == 'departing_after':
            filtered_df = filtered_df[filtered_df['start_date'] >= trip_details_dict["departing_after"]]
        elif input_column == 'departing_before':
            filtered_df = filtered_df[filtered_df['start_date'] <= trip_details_dict["departing_before"]]
        elif input_column == 'max_duration':
            filtered_df = filtered_df[filtered_df['duration'] <= trip_details_dict["max_duration"]]
        elif input_column == 'min_duration':
            filtered_df = filtered_df[filtered_df['duration'] >= trip_details_dict["min_duration"]]

    return filtered_df

In [13]:
class ItineraryDetails(BaseModel):
    itinerary_name: Optional[str] = Field(
        ...,
        description="The name of the itinerary the user has decided to take not the country",
    )
    itinerary_start_date: Optional[str] = Field(
        ...,
        description="The start date of the itinerary the user has decided to take. This is an exact date",
    )

In [14]:
available_dates = ["06/04/2023","13/04/2023","20/04/2023"]

In [15]:
URL = "www.affiliate_link_to_website.com"

In [16]:
asked_for = []

In [17]:
def prompt_amendments(ask_for, filtered_df, found_itineraries):
    if ask_for[0] == 'how much are you looking to spend?':
        min_budget = filtered_df['first_non_null'].min()
        mean_budget = filtered_df['first_non_null'].mean()
        return f'In your answer tell the user there are {len(found_itineraries)} itineraries that fit their needs.\
        The minimim trip cost is {min_budget} and the average tripcost is {mean_budget} to their destination.'
    if ask_for[0] == 'when are you looking to travel?':
         print('The question is about when to travel')
    if ask_for[0] == 'how long do you want your trip to be?':
         print('The question is about the duration')
    if ask_for[0] == 'what country are you looking to travel to?':
         print('The question is about where they want to travel to')
    
    

In [18]:
# promtps
# Info gathering stage when there are still trip details to ask for
def info_gathering_prompt(ask_for, found_itineraries, filtered_df):
    if len(ask_for) == 6:
        PROMPT_TEMPLATE = f"""You are currently in the detail gathering phase of the conversation and are trying to get detail of the users trip to help find the the perfect trip. 
            If the user has just asked a follow up question in the conversation history, answer it.
            Once you have answered their question ALWAYS ask the user the following question to gather the required information.
            Follow up question:
            {ask_for[0]}"""
    else:
        PROMPT_TEMPLATE = f"""You are currently in the trip detail phase of the conversation and are trying to get detail of the users trip to help find the the perfect trip. 
            If the user has just asked a follow up question in the conversation history, answer it.
            Once you have answered their question ALWAYS ask the user the following question to gather the required information.
            Follow up question:
            {ask_for[0]}
            """
        # {prompt_amendments(ask_for, filtered_df, found_itineraries)} 
    return PROMPT_TEMPLATE
    

# All the details are gathered and we're presenting a solution, list of available itineraries
def solution_presentation_prompt(found_itineraries, df):
    summary = ""
    for itinerary in found_itineraries:
        filtered_df = df[df['tour_name'] == itinerary]
        summary += f'Itinerary: {filtered_df["tour_name"].values[0]}\n'
        summary += f'Tour description: {filtered_df["tour_description"].values[0]}\n'
        summary += f'Link: {filtered_df["url"].values[0]}\n\n'
    
    conversation_stage = f"""Thank the user for providing the details. 
        Based on all the users needs here is the list of itineraries and a summary that fit their needs:\n{summary}
        Present the itinerary or itineraries to the user. The 
        """
    return conversation_stage


# all trip details are gathered and the clients have decided on an itinerary
def available_dates_prompt(dates_text):
        conversation_stage = f"""
            The user has now decided on on the itinerary would like. We now need to check what dates they would like to travel on. The following dates are availab: 
            {dates_text}.
            Ask the user to pick a desired departure date."""
        return conversation_stage

# all trip details are gathered, the clients have decided on an itinerary and have a departure date
def url_presentation_prompt(URL):
        conversation_stage = f"""
        The user now has an itinerary and the dates they want to travel. 
        You are not booking the trip for the user, just share the companies url: {URL}"""
        return conversation_stage


# still in the info gathering stage but there are no available itineraries based on preferences
def no_results_prompt(user_travel_details, new_user_travel_details):
    old_keys = new_user_travel_details.dict().keys()
    new_keys = new_user_travel_details.dict().keys()
    last_question = [x for x in new_keys if x not in old_keys]
    conversation_stage = f"""
        The user has provided details of their trip but unfortunatley there are no itineraries that match their needs. 
        Explain this to the user and higlight they need to look at alternate: {", ".join(last_question)}"""
    return conversation_stage
#

In [19]:
def check_conversation_stage(conversation_history, user_travel_details, user_itinerary_details):

    conversation_history = "\n".join(conversation_history)

    # extract travel details chain
    chain = create_tagging_chain_pydantic(TravelDetails, llm)
    res = chain.run(conversation_history)
    new_user_travel_details = add_non_empty_details(user_travel_details, res)
    ask_for = check_what_is_empty(new_user_travel_details)

    #load df
    df = pd.read_csv("raw_data/one_day_test.csv")
    
    # check number of unique itineraries
    filtered_df = get_filtered_df(df, new_user_travel_details)
    found_itineraries = filtered_df['tour_name'].unique()
    
    if len(filtered_df) == 0:
        conversation_stage = no_results_prompt(user_travel_details, new_user_travel_details)                        
        return conversation_stage, new_user_travel_details


#     if len(ask_for) == 0:
#         # extract itinerary details chain
#         chain = create_tagging_chain_pydantic(ItineraryDetails, llm)
#         res = chain.run(conversation_history)
#         user_itinerary_details = add_non_empty_details(user_itinerary_details, res)
#         print(user_itinerary_details)

    # # check if we have all validated details and if we have an itinerary from the user. 
    # # We then need present the list of dates and have the client decide
    if len(ask_for) == 0 and user_itinerary_details.itinerary_name in found_itineraries and user_itinerary_details.itinerary_start_date in available_dates:
        print('all details gathered, itinerary decided and date sorted')
        conversation_stage = url_presentation_prompt(URL)                        
        return conversation_stage, new_user_travel_details


    # # check if we have all validated details and if we have an itinerary from the user. 
    # # We then need present the list of dates and have the client decide
    elif len(ask_for) == 0 and user_itinerary_details.itinerary_name in found_itineraries:
        print("Itinerary selected! summarise the itineraries...")
        dates_text = "\n".join(available_dates)
        conversation_stage = available_dates_prompt(dates_text)                        
        return conversation_stage, new_user_travel_details

    # if we have all the validated details we need to present the list of itineraries and have the client decide
    elif len(ask_for) == 0 and len(found_itineraries) > 0:
        print("All details gathered! summarise the itineraries...")
        conversation_stage = solution_presentation_prompt(found_itineraries, df)   
        return conversation_stage, new_user_travel_details
    
    if user_travel_details.dict()['qualification'] == 'no':
        conversation_stage = "The user is not interested in a group tour so politely end the conversation. Ask them to come back if they ever are"
    elif user_travel_details.dict()['qualification'] == 'Unsure':
        conversation_stage = "Explain what a group tour is. End by asking if they are interested in a group tour"
    else:
        conversation_stage = info_gathering_prompt(ask_for, found_itineraries, filtered_df)
        asked_for.append(ask_for[0])

    return conversation_stage, new_user_travel_details

# tools

In [20]:
def sumarization_tool(input=""):
    return 'a 15 day itinerary through morocco.'

In [21]:
from langchain.tools import PythonAstREPLTool
df = pd.read_csv("raw_data/one_day_test.csv")

class PythonInputs(BaseModel):
    query: str = Field(description="code snippet to run")
        
repl = PythonAstREPLTool(locals={"df": df}, 
                         name="multi_county_query",
                         description="useful for answering questions about multiple country costs e.g. average trip cost to peru",
                         args_schema=PythonInputs)

In [45]:
import glob
def get_tools():          
    #tru start
    class DocumentInput(BaseModel):
        question: str = Field()


    llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=api_key)

    tools = []
    
    files = []
    for file in glob.glob('raw_data/itinerary_text/*'):
        file_dict = {'name': file.split("/")[-1].replace('.txt','').replace('& ','').replace(':','').replace(' ','_'),
                    "path": file}
        files.append(file_dict)

    for file in files[:10]:
        loader = TextLoader(file["path"])
        pages = loader.load_and_split()
        text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        docs = text_splitter.split_documents(pages)
        embeddings = OpenAIEmbeddings(openai_api_key=api_key)
        retriever = FAISS.from_documents(docs, embeddings).as_retriever()

        # Wrap retrievers in a Tool
        tools.append(
            Tool(
                args_schema=DocumentInput,
                name=file["name"],
                description=f"needed when you want to answer questions about {file['name']} itinerary",
                func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever),
            )
        )
        
#     tools.append(PythonAstREPLTool(locals={"df": df}, 
#                          name="multi_county_query",
#                          description="useful for answering questions about multiple country costs e.g. average trip cost to peru",
#                          args_schema=PythonInputs))
        
    return tools

# Sales Agent

In [23]:
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
from langchain.agents import Tool
from langchain.schema.messages import SystemMessage

In [24]:
SALES_AGENT_TOOLS_PROMPT = """
Never forget your name is Francis. You work as a Travel Agent.
You work at company named Francis. Francis's business is the following: Francis is a context aware AI travel agent that works on finding users their dream group tour holiday.
You are contacting a potential prospect in order to find them a group toup holiday.
Your means of contacting the prospect is live chat.

Keep your responses in short length to retain the user's attention. Never produce lists, just answers.

Always think about the following conversation stage you are at before answering:

 - {conversation_stage}

You MUST respond according to the previous conversation history and the stage of the conversation you are at.
If you get asked about an itinerary use the tools available to you, do not make up an answer. If you do not know the answer tell the user.
Only generate one response at a time and act as Francis only!
Do not add Francis: to your output.

Previous conversation history:
{conversation_history}

Begin!
"""

                                 

In [25]:
def customize_prompt(conversation_history, conversation_stage, SALES_AGENT_TOOLS_PROMPT):

    conversation_history = "\n".join(conversation_history)
    
    
    from langchain import LLMChain, PromptTemplate
    prompt = PromptTemplate(
                template=SALES_AGENT_TOOLS_PROMPT,
                input_variables=[
                    "conversation_stage",
                    "conversation_history"
                ],
            )
    
    SALES_AGENT_TOOLS_PROMPT = prompt.format(conversation_stage=conversation_stage, 
                                     conversation_history=conversation_history)
    
    system_message = SystemMessage(
            content=(SALES_AGENT_TOOLS_PROMPT
            )
    )
    
    prompt = OpenAIFunctionsAgent.create_prompt(
            system_message=system_message
    )

    return prompt
# print(prompt.messages[0].content)

In [26]:
# Define the tools
tools = get_tools()

Created a chunk of size 759, which is longer than the specified 500
Created a chunk of size 917, which is longer than the specified 500
Created a chunk of size 830, which is longer than the specified 500
Created a chunk of size 771, which is longer than the specified 500
Created a chunk of size 559, which is longer than the specified 500
Created a chunk of size 800, which is longer than the specified 500
Created a chunk of size 710, which is longer than the specified 500
Created a chunk of size 724, which is longer than the specified 500
Created a chunk of size 902, which is longer than the specified 500
Created a chunk of size 759, which is longer than the specified 500
Created a chunk of size 762, which is longer than the specified 500


[{'name': 'Lima_to_La_Paz_Sandboarding_Sunsets_Lares_Trek', 'path': 'raw_data/itinerary_text/Lima to La Paz: Sandboarding & Sunsets Lares Trek.txt'}, {'name': 'Inca_Discovery_Inca_Trail', 'path': 'raw_data/itinerary_text/Inca Discovery Inca Trail.txt'}, {'name': 'The_Lares_Trek_Lares_Trek', 'path': 'raw_data/itinerary_text/The Lares Trek Lares Trek.txt'}, {'name': 'Coastal_Morocco_Waves_Market_Stalls_', 'path': 'raw_data/itinerary_text/Coastal Morocco: Waves & Market Stalls .txt'}, {'name': 'Jordan_Multisport_', 'path': 'raw_data/itinerary_text/Jordan Multisport .txt'}, {'name': 'Serengeti_to_Victoria_Falls_Overland_Night_Stars_Spices_', 'path': 'raw_data/itinerary_text/Serengeti to Victoria Falls Overland: Night Stars & Spices .txt'}, {'name': 'Spain_Portugal_Flamenco_Tapas_', 'path': 'raw_data/itinerary_text/Spain & Portugal: Flamenco & Tapas .txt'}, {'name': 'Explore_Southern_Africa_', 'path': 'raw_data/itinerary_text/Explore Southern Africa .txt'}, {'name': 'Inca_Discovery_Plus_Cus

In [27]:
from langchain.agents import OpenAIMultiFunctionsAgent
def run_francis(input, conversation_history, user_travel_details, user_itinerary_details):
    llm = ChatOpenAI(temperature=0.9, model="gpt-3.5-turbo", openai_api_key=api_key)
    
    user_input = f"User: {input}"

    conversation_history.append(user_input)

    conversation_stage, user_travel_details = check_conversation_stage(conversation_history, user_travel_details, user_itinerary_details)
    
    final_prompt = customize_prompt(conversation_history, conversation_stage, SALES_AGENT_TOOLS_PROMPT)
    
    # Create the agent
    agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=final_prompt)
    # agent = OpenAIMultiFunctionsAgent(llm=llm, tools=tools, prompt=final_prompt)
    
    # Run the agent with the actions
    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, max_iterations=5)
    #output = agent_executor.run("")
    
    francis = agent_executor.run(input)
    francis1 = f"Francis: {francis}"
    conversation_history.append(francis1)

    return francis, user_travel_details

In [28]:
ad

NameError: name 'ad' is not defined

# conversation

In [39]:
import langchain
langchain.debug = False

In [46]:
user_travel_details = TravelDetails(introduction=False,
                                qualification="",
                                country="",
                                departing_after=None,
                                departing_before=None,
                                max_budget=None,
                                max_duration=None,
                                min_duration=None)

In [47]:
user_itinerary_details = ItineraryDetails(itinerary_name="",
                                itinerary_start_date="")

In [48]:
conversation_history = ["Francis:  Hello, this is Francis from Francis Travel. Can i help you find a group tour today?"]

In [49]:
human_input = "yes"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

['what country are you looking to travel to?', 'when are you looking to travel?', 'when are you looking to travel?', 'how much are you looking to spend?', 'how long do you want your trip to be?', 'how long do you want your trip to be?']
Great! I'm here to help you find the perfect group tour. What country are you looking to travel to?


In [50]:
human_input = "Portugal"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

['how much are you looking to spend?', 'when are you looking to travel?', 'when are you looking to travel?', 'how long do you want your trip to be?', 'how long do you want your trip to be?']
Great! Portugal is a wonderful destination. How long are you planning to stay in Portugal?


In [52]:
human_input = "£2000"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

['when are you looking to travel?', 'when are you looking to travel?']
Great! With a budget of £2000, there are several group tour options available for you. When are you looking to travel?


In [53]:
human_input = "april 2024"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

[]
All details gathered! summarise the itineraries...
Thank you for choosing April 2024 as your travel date. Based on your preferences and budget, I recommend the "Highlights of Portugal" tour. It is a wonderful itinerary that includes visits to Lisbon, Coimbra, Sintra, and Porto. You will have the opportunity to explore UNESCO World Heritage Sites, enjoy delicious food, and relax in the beautiful Douro Valley. You can find more information and book the tour [here](https://www.gadventures.com/trips/portugal-highlights/EPLP/). Let me know if you have any other questions!


In [51]:
human_input = "between 4 and 30 days"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

['how much are you looking to spend?', 'when are you looking to travel?', 'when are you looking to travel?']
Thank you for providing the duration of your trip. How much are you looking to spend?


In [54]:
human_input = "how long is it?"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

[]
All details gathered! summarise the itineraries...
The "Highlights of Portugal" tour lasts for 7 days.


In [55]:
human_input = "What is the day by day summary?"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

[]
All details gathered! summarise the itineraries...
Here is the day by day summary of the "Highlights of Portugal" tour:

Day 1: Arrive in Lisbon
Day 2: Explore Lisbon
Day 3: Visit Coimbra and Batalha
Day 4: Optional visit to Sintra
Day 5: Travel to Porto
Day 6: Explore Porto and Douro Valley
Day 7: Depart from Porto

You can find more information and book the tour [here](https://www.gadventures.com/trips/portugal-highlights/EPLP/). Let me know if you have any other questions!


In [56]:
human_input = "Can you give me some more details?"
francis_output, user_travel_details = run_francis(human_input, conversation_history, user_travel_details, user_itinerary_details)
print(francis_output)

[]
All details gathered! summarise the itineraries...
Certainly! Here are some more details about the "Highlights of Portugal" tour:

- Day 1: Arrive in Lisbon - You will start your tour in the beautiful city of Lisbon, the capital of Portugal.
- Day 2: Explore Lisbon - Spend the day exploring the hilly streets of Lisbon, sampling local cuisine, and taking in the sun-dappled architecture.
- Day 3: Visit Coimbra and Batalha - Discover the historic city of Coimbra, home to one of the oldest universities in the world. Then, visit the Monastery of Batalha, a UNESCO World Heritage Site.
- Day 4: Optional visit to Sintra - Take an optional trip to the picturesque town of Sintra, known for its stunning palaces and gardens.
- Day 5: Travel to Porto - Journey to the city of Porto and explore its charming streets and vibrant culture.
- Day 6: Explore Porto and Douro Valley - Spend the day discovering Porto's highlights and enjoying the scenic beauty of the Douro Valley. You'll also have the chan