In [1]:
import pinecone

from google.cloud import bigquery

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import BigQueryLoader

from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")
pinecone_key = os.getenv('PINECONE_API_KEY')
pinecone_env = os.getenv('PINECONE_ENV')

  from tqdm.autonotebook import tqdm


In [2]:
pinecone.init(api_key=pinecone_key,
              environment=pinecone_env)

In [3]:

PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

# query = f"""
#     SELECT 
#       DISTINCT tour_name
#     FROM {PROJECT}.{DATASET}.{TABLE}
#     """

# client = bigquery.Client(project="wagon-bootcamp-377120")
# query_job = client.query(query)
# result = query_job.result()
# df = result.to_dataframe()

In [4]:
# tour_name_list = df['tour_name']

In [5]:
found_itineraries = ['Morocco Kasbahs & Desert', 'Coastal Morocco: Waves & Market Stalls'] # Peru Panorama

In [14]:
query = f"""
    SELECT 
        MAX(tour_operator) AS tour_operator,
        tour_name,
        itinerary_name,
        MAX(visited_countries) AS visited_countries,
        MAX(duration) AS duration,
        MAX(Travel_Style) AS Travel_Style,
        MAX(Service_Level) AS Service_Level,
        MAX(Physical_Grading) AS Physical_Grading,
        MAX(Merchandising) AS Merchandising,
        MAX(Trip_Type) AS Trip_Type,
        MAX(itinerary) AS itinerary,
        MAX(url) AS url
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE 
      tour_name = 'Morocco Kasbahs & Desert'
    GROUP BY
      tour_name, itinerary_name;
    """

# client = bigquery.Client(project="wagon-bootcamp-377120")
# query_job = client.query(query)
# result = query_job.result()
# df = result.to_dataframe()

In [15]:
# Create index if it doesn't exist already
if "chatbot" not in pinecone.list_indexes():
    pinecone.create_index(
        name="chatbot",
        dimension=1536, 
        metric="cosine" 
    )

# Load embeddings model  
embeddings = OpenAIEmbeddings(openai_api_key=api_key)

# Load texts
loader = BigQueryLoader(query, metadata_columns=["tour_name"])

docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)

texts = text_splitter.split_documents(docs)



In [16]:
# Create Pinecone index instance
index = pinecone.Index("chatbot")

# let pinecone know what our metadata is
# metadata = [{'tour_name': 'Peru Panorama'}]

# Upsert texts into index
docsearch = Pinecone.from_documents(texts, embeddings, index_name="chatbot")

In [None]:
# ingestion complete - test Q&A

In [10]:
pinecone_index = pinecone.Index("chatbot")
vectorstore = Pinecone(pinecone_index, embeddings.embed_query, "text")



In [11]:
filters = {"tour_name": "Coastal Morocco: Waves & Market Stalls"}

# completion llm
llm = ChatOpenAI(
    openai_api_key=api_key,
    model_name='gpt-3.5-turbo',
    temperature=0.0
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={'filter': filters})
)

In [13]:
answer = qa.run('what surfing oppertunities are there?')
print(answer)

The itinerary mentions that on Day 3, there is an opportunity to suit up for a surf lesson and hit the waves. After the lesson, there is a free afternoon for surfing or paddle-boarding. So, during this trip, you will have the chance to experience surfing in Taghazout, a fishing village in Morocco.


In [None]:
# function to genterate the sql query for tool creation

In [None]:
# tools = []
# for itinerary in found_itineraries:
#     query = generate_sql_query(user_travel_details, found_itineraries)

#     loader = BigQueryLoader(query)

#     docs = loader.load()
    
#     text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
#     texts = text_splitter.split_documents(docs)

In [None]:
# pinecone.delete_index("chatbot")