In [1]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [2]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader(file_path="raw_data/new_format_test.csv", encoding='latin-1')
docs = loader.load()

In [3]:
from langchain.document_loaders import BigQueryLoader
PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

BASE_QUERY = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE tour_name = 'Highlights of Jordan'
    """

loader = BigQueryLoader(BASE_QUERY)

docs = loader.load()

In [4]:
# print(docs[0].page_content)

In [5]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

In [6]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = OpenAIEmbeddings(openai_api_key=api_key)
retriever = FAISS.from_documents(docs, embeddings).as_retriever()


In [7]:
# v2

In [8]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [9]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=api_key), chain_type="stuff", retriever=retriever, chain_type_kwargs=chain_type_kwargs)

In [10]:
query = "What are the departure dates for the tour?"
result = qa.run(query)
print(result.strip())

The departure dates for the tour are April 2nd, April 7th, and April 9th.


In [31]:
from langchain.document_loaders import BigQueryLoader

PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

BASE_QUERY = query = f"""
    SELECT
      MAX(tour_operator) AS tour_operator,
      tour_name,
      itinerary_name,
      MAX(visited_countries) AS visited_countries,
      MAX(currency) AS currency,
      ARRAY_AGG(DISTINCT Standard___Adult) AS Costs,
      MAX(duration) AS duration,
      ARRAY_AGG(CAST(start_date AS STRING) ORDER BY start_date) AS start_dates,
      MAX(Travel_Style) AS Travel_Style,
      MAX(Service_Level) AS Service_Level,
      MAX(Physical_Grading) AS Physical_Grading,
      MAX(Merchandising) AS Merchandising,
      MAX(Trip_Type) AS Trip_Type,
      MAX(itinerary) AS itinerary,
      MAX(url) AS url
    FROM
      {PROJECT}.{DATASET}.{TABLE}
    WHERE 
      tour_name = 'Highlights of Jordan'
    GROUP BY
      tour_name, itinerary_name;
    """

loader = BigQueryLoader(BASE_QUERY)

docs = loader.load()

In [32]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

In [33]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = OpenAIEmbeddings(openai_api_key=api_key)
retriever = FAISS.from_documents(docs, embeddings).as_retriever()

In [41]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [39]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=api_key), chain_type="stuff", retriever=retriever, chain_type_kwargs=chain_type_kwargs)

In [40]:
query = "What are the departure dates for the tour?"
result = qa.run(query)
print(result.strip())

The departure dates for the Highlights of Jordan tour by G Adventures are April 2, April 6, April 7, April 9, April 12, April 13, April 16, April 18, April 19, and April 21 in 2024.


In [37]:
print(docs[0].page_content)

tour_operator: G Adventures
tour_name: Highlights of Jordan
itinerary_name: None
visited_countries: Jordan
currency: GBP
Costs: [1259.0]
duration: 8
start_dates: ['2024-04-02', '2024-04-06', '2024-04-07', '2024-04-09', '2024-04-12', '2024-04-13', '2024-04-16', '2024-04-18', '2024-04-19', '2024-04-21']
Travel_Style: Classic - All of the highlights, culture, access, and I-can’t-believe-we-did-that moments, all at a great price.
Service_Level: Standard - Comfortable tourist-class accommodations with character; mix of public and private transport.
Physical_Grading: 3 - Average - Some tours may include light hiking, biking, rafting, or kayaking in addition to walking.
Merchandising: Top Seller, Planeterra Project, Book Your Bubble - The Book Your Bubble Collection is a collection of 80 G Adventures tours that give travellers and their hand-picked crew the safety of travelling with people they know, combined with extra benefits for booking private travel.
Trip_Type: Small Group - Group trips