In [26]:
from langchain.llms import GooglePalm
from dotenv import load_dotenv
load_dotenv()

llm = GooglePalm(temperature=0.1)

In [27]:
poem = llm("Write a 4 line poem of my love for samosa")
print(poem)

**Oh, samosa, my love**
**You are so delicious, so flaky**
**With your filling of potatoes and peas**
**I could eat you every day**


In [12]:
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader("codebasics_faqs.csv", source_column="prompt")
data = loader.load()

In [14]:
data[:4]

[Document(page_content='prompt: I have never done programming in my life. Can I take this bootcamp?\nresponse: Yes, this is the perfect bootcamp for anyone who has never done coding and wants to build a career in the IT/Data Analytics industry or just wants to perform better in your current job or business using data.', metadata={'source': 'I have never done programming in my life. Can I take this bootcamp?', 'row': 0}),
 Document(page_content='prompt: Why should I trust Codebasics?\nresponse: Till now 9000 + learners have benefitted from the quality of our courses. You can check the review section and also we have attached their LinkedIn profiles so that you can connect with them and ask directly.', metadata={'source': 'Why should I trust Codebasics?', 'row': 1}),
 Document(page_content='prompt: Is there any prerequisite for taking this bootcamp ?\nresponse: Our bootcamp is specifically designed for beginners with no prior experience in this field. The only prerequisite is that you ne

In [19]:
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS

instruct_embeddings = HuggingFaceInstructEmbeddings()

vectordb = FAISS.from_documents(documents=data, embedding=instruct_embeddings)

.gitattributes: 100%|██████████| 1.48k/1.48k [00:00<00:00, 2.62MB/s]
1_Pooling/config.json: 100%|██████████| 270/270 [00:00<00:00, 1.02MB/s]
2_Dense/config.json: 100%|██████████| 116/116 [00:00<00:00, 229kB/s]
pytorch_model.bin: 100%|██████████| 3.15M/3.15M [00:00<00:00, 8.17MB/s]
README.md: 100%|██████████| 66.3k/66.3k [00:00<00:00, 24.0MB/s]
config.json: 100%|██████████| 1.53k/1.53k [00:00<00:00, 3.60MB/s]
config_sentence_transformers.json: 100%|██████████| 122/122 [00:00<00:00, 468kB/s]
pytorch_model.bin: 100%|██████████| 1.34G/1.34G [02:46<00:00, 8.04MB/s]
sentence_bert_config.json: 100%|██████████| 53.0/53.0 [00:00<00:00, 51.6kB/s]
special_tokens_map.json: 100%|██████████| 2.20k/2.20k [00:00<00:00, 3.30MB/s]
spiece.model: 100%|██████████| 792k/792k [00:00<00:00, 9.43MB/s]
tokenizer.json: 100%|██████████| 2.42M/2.42M [00:01<00:00, 1.73MB/s]
tokenizer_config.json: 100%|██████████| 2.41k/2.41k [00:00<00:00, 14.4MB/s]
modules.json: 100%|██████████| 461/461 [00:00<00:00, 2.12MB/s]


load INSTRUCTOR_Transformer
max_seq_length  512


In [22]:
retriever = vectordb.as_retriever()
rdocs = retriever.get_relevant_documents("for how long is this course valid?")
rdocs

[Document(page_content='prompt: Once purchased, is this course available for lifetime access?\nresponse: Yes', metadata={'source': 'Once purchased, is this course available for lifetime access?', 'row': 22}),
 Document(page_content='prompt: What is the duration of this bootcamp? How long will it last?\nresponse: You can complete all courses in 3 months if you dedicate 2-3 hours per day.', metadata={'source': 'What is the duration of this bootcamp? How long will it last?', 'row': 8}),
 Document(page_content='prompt: Will the course be upgraded when there are new features in Power BI?\nresponse: Yes, the course will be upgraded periodically based on the new features in Power BI, and learners who have already bought this course will have free access to the upgrades.', metadata={'source': 'Will the course be upgraded when there are new features in Power BI?', 'row': 27}),
 Document(page_content='prompt: Is this bootcamp enough for me in Microsoft Power BI and\n Excel certifications?\nrespo

In [28]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

prompt_template = """Given the following context and a question, generate an answer based on this context only.
In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

CONTEXT: {context}

QUESTION: {question}"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context","question"]
)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    input_key="query",
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

In [29]:
chain("Do you guys provide internship and also do you offer EMI payments?")

{'query': 'Do you guys provide internship and also do you offer EMI payments?',
 'result': "Yes, we provide virtual internship and we don't offer EMI payments.",
 'source_documents': [Document(page_content='prompt: Do you provide any virtual internship?\nresponse: Yes', metadata={'source': 'Do you provide any virtual internship?', 'row': 14}),
  Document(page_content='prompt: Do we have an EMI option?\nresponse: No', metadata={'source': 'Do we have an EMI option?', 'row': 13}),
  Document(page_content='prompt: Do you provide any job assistance?\nresponse: Yes, We help you with resume and interview preparation along with that we help you in building online credibility, and based on requirements we refer candidates to potential recruiters.', metadata={'source': 'Do you provide any job assistance?', 'row': 11}),
  Document(page_content='prompt: How can I contact the instructors for any doubts/support?\nresponse: We have created every lecture with a motive to explain everything in an easy-