In [52]:
from langchain_google_genai import GoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import pandas as pd

In [None]:
file_path="codebasics_faqs.csv"
df = pd.read_csv(file_path)

In [53]:
loader = CSVLoader(file_path=file_path)
data = loader.load()

In [54]:
# embeddings = HuggingFaceInstructEmbeddings(
#     query_instruction="Represent the query for retrieval: "
# )
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vector = embeddings.embed_query("hello, world!")

[0.05636945, 0.0048285457, -0.0762591, -0.023642512, 0.05329321]

In [55]:
vectordb_file_path = "faiss_index"
vectordb = FAISS.from_documents(documents=data, embedding=embeddings)

In [56]:
vectordb.save_local(vectordb_file_path)

In [57]:
db = FAISS.load_local(
    vectordb_file_path, embeddings, allow_dangerous_deserialization=True
)

In [58]:
retriever = vectordb.as_retriever(score_threshold=0.5)

In [72]:
prompt_template = """Given the following context and a question, generate an answer based on this context only.
    In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
    If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

    CONTEXT: {context}

    QUESTION: {question}"""

PROMPT = PromptTemplate(
  template=prompt_template, input_variables=["context", "question"]
)

llm = GoogleGenerativeAI(model="gemini-pro", temperature=0.9)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    input_key="query",
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT},
)

In [79]:
chain("Can I attend this bootcamp while working full time?")

{'query': 'Can I attend this bootcamp while working full time?',
 'result': 'Yes. This bootcamp is self-paced. You can learn on your own schedule.',
 'source_documents': [Document(page_content='prompt: Can I attend this bootcamp while working full time?\nresponse: Yes. This bootcamp is self-paced. You can learn on your own schedule.', metadata={'source': 'codebasics_faqs.csv', 'row': 6}),
  Document(page_content='prompt: What is the duration of this bootcamp? How long will it last?\nresponse: You can complete all courses in 3 months if you dedicate 2-3 hours per day.', metadata={'source': 'codebasics_faqs.csv', 'row': 5}),
  Document(page_content='prompt: Does this bootcamp have lifetime access?\nresponse: Yes', metadata={'source': 'codebasics_faqs.csv', 'row': 4}),
  Document(page_content='prompt: What if I donï¿½t like this bootcamp?\nresponse: As promised we will give you a 100% refund based on the guidelines (Please refer to our course refund policy before enrolling).', metadata={'

In [74]:
chain("Do you provide any job assistance")["result"]

'Yes, We help you with resume and interview preparation along with that we help you in building online credibility, and based on requirements we refer candidates to potential recruiters.'

In [75]:
chain("Is this bootcamp enough for me in Microsoft Power BI and Excel certifications?")["result"]

'Yes, this bootcamp will certainly help because we cover the majority of the skills measured in these exams. However, please be informed that this course focuses on Job ready aspects and not on all aspects required to clear the exams. In addition to this course, you might need to visit the official learning material designed by Microsoft which is available for free on their website.'

In [76]:
chain("What about placement support?")["result"]

"I don't know."

In [78]:
chain("I want to buy this on EMI, Can I?")["result"]

'No'