In [None]:
# we can use pinecone to run sematic search to answer questions about the itinerary
# we need an sql agent to answer questions about the start dates and costs as these are dynamic

In [24]:
from google.cloud import bigquery
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
import os
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.agents import AgentExecutor



In [42]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [43]:
service_account_file = "/home/stuart/code/stuhow/gcp/wagon-bootcamp-377120-85e4244eae02.json" # Change to where your service account key file is located

In [50]:
PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"
sqlalchemy_url = f'bigquery://{PROJECT}/{DATASET}?credentials_path={service_account_file}'

In [57]:
db = SQLDatabase.from_uri(sqlalchemy_url, include_tables=[TABLE])
llm = OpenAI(temperature=0, model="text-davinci-003", openai_api_key=api_key)

In [52]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [55]:
agent_executor = create_sql_agent(
llm=llm,
toolkit=toolkit,
verbose=False,
top_k=1000,
)

In [56]:
agent_executor.run("what is the average duration?")

'The average duration is 9.55139442231075 days.'

In [58]:
db = SQLDatabase.from_uri(sqlalchemy_url) # , include_tables=[TABLE]
llm = OpenAI(temperature=0, model="text-davinci-003", openai_api_key=api_key)

In [59]:
import ast
import re

def run_query_save_results(db, query):
    res = db.run(query)
    res = [el for sub in ast.literal_eval(res) for el in sub if el]
    res = [re.sub(r'\b\d+\b', '', string).strip() for string in res]
    return res

itinerary = run_query_save_results(db, "SELECT DISTINCT tour_name FROM one_month")

In [68]:
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS


embeddings = OpenAIEmbeddings()
vector_db = FAISS.from_texts(itinerary, embeddings)
retriever = vector_db.as_retriever()

retriever_tool = create_retriever_tool(
        retriever,
        name='itinerary_search',
        description='use to learn how an itinerary name is actually written'
    )

custom_tool_list = [retriever_tool]

In [74]:
from langchain.agents import create_sql_agent, AgentType
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.utilities import SQLDatabase
from langchain.chat_models import ChatOpenAI

db = SQLDatabase.from_uri(sqlalchemy_url, include_tables=[TABLE])
llm = ChatOpenAI(model_name='gpt-4', temperature=0)

toolkit = SQLDatabaseToolkit(db=db, llm=llm)

custom_suffix = """
You should always first check the itinerary name spelling using the itinerary_search tool.
"""

agent = create_sql_agent(llm=llm,
                         toolkit=toolkit,
                         verbose=True,
                         agent_type=AgentType.OPENAI_FUNCTIONS,
                         extra_tools=custom_tool_list,
                         suffix=custom_suffix
                        )

In [76]:
agent.run("what deperture dates does the italy itinerary have?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with ``


[0m[38;5;200m[1;3mone_month[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `one_month`


[0m[33;1m[1;3m
CREATE TABLE `one_month` (
	`tour_operator` STRING, 
	`tour_name` STRING, 
	`itinerary_name` STRING, 
	`visited_countries` STRING, 
	`start_date` DATE, 
	`duration` INT64, 
	`url` STRING, 
	`itinerary` STRING, 
	`Travel_Style` STRING, 
	`Service_Level` STRING, 
	`Physical_Grading` STRING, 
	`Merchandising` STRING, 
	`Trip_Type` STRING, 
	`currency` STRING, 
	`Deluxe___Double___D___Adult` FLOAT64, 
	`Standard___Twin___D___Adult` FLOAT64, 
	`Standard___Adult` FLOAT64, 
	`En_suite_bathroom___Twin___D___Adult` FLOAT64, 
	`Shared_bathroom___Twin___D___Adult` FLOAT64, 
	`En_suite_bathroom___Double___D___Adult` FLOAT64, 
	`Upper_Deck_Balcony___Single___D___Adult` FLOAT64, 
	`Main_Deck___Double___D___Adult` FLOAT64, 
	`Upper_Deck_Balcony___Double_Twin___D___Adult` FLOAT64, 
	`Main_De

'The Italy itinerary has the following departure dates:\n\n1. 2024-04-02\n2. 2024-04-16'