In [1]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [2]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader(file_path="raw_data/new_format_test.csv", encoding='latin-1')
docs = loader.load()

In [3]:
from langchain.document_loaders import BigQueryLoader
PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

BASE_QUERY = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE tour_name = 'Highlights of Jordan'
    """

loader = BigQueryLoader(BASE_QUERY)

docs = loader.load()

In [4]:
# print(docs[0].page_content)

In [5]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(docs)

In [6]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = OpenAIEmbeddings(openai_api_key=api_key)
retriever = FAISS.from_documents(docs, embeddings).as_retriever()


In [7]:
# v2

In [8]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [9]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=api_key), chain_type="stuff", retriever=retriever, chain_type_kwargs=chain_type_kwargs)

In [10]:
query = "What are the departure dates for the tour?"
result = qa.run(query)
print(result.strip())

The departure dates for the tour are April 2nd, April 7th, and April 9th.
