In [1]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [2]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader(file_path="raw_data/new_format_test.csv", encoding='latin-1')
docs = loader.load()

In [2]:
from langchain.document_loaders import BigQueryLoader
PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_day"

BASE_QUERY = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE tour_name = 'Highlights of Jordan'
    """

loader = BigQueryLoader(BASE_QUERY)

docs = loader.load()

In [5]:
print(docs[0].page_content)

tour_name: Highlights of Jordan
itinerary_name: None
visited_countries: Jordan
start_date: 2024-04-19
duration: 8
url: https://www.gadventures.com/trips/highlights-of-jordan/DWHJ/
itinerary: "Day 1: Arrive at any time.
Day 2: Guided visit to the ancient Roman town of Jerash followed by a visit to the Dead Sea.
Day 3: Start the day with a guided visit to the intricate Madaba mosaics, enjoying the panoramic views from Mt Nebo and a visit to the crusader castle at Karak. Continue to Wadi Musa for the evening.
Day 4: Start early to visit iconic Petra. Enjoy a guided walking tour with your CEO, then opt to hike up to the top of the treasury or explore  on your own. This evening, opt to visit a Turkish bath or visit Petra by night.
Day 5: Opt to visit Little Petra in the morning before continuing to Wadi Rum. Head out for a 4x4 excursion during the day before a traditional dinner and overnight at a Bedouin desert camp.
Day 6: Journey to the red sea, where you can explore your surroundings or

In [6]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

In [7]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = OpenAIEmbeddings(openai_api_key=api_key)
retriever = FAISS.from_documents(docs, embeddings).as_retriever()


In [8]:
# v2

In [9]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [12]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=api_key), chain_type="stuff", retriever=retriever, chain_type_kwargs=chain_type_kwargs)

In [14]:
query = "how active is this tour?"
result = qa.run(query)
print(result.strip())

This tour is physically graded as 3, which means it includes some light hiking, biking, rafting, or kayaking in addition to walking.
