In [1]:
import numpy as np
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_google_genai import ChatGoogleGenerativeAI

  from tqdm.autonotebook import tqdm, trange


In [None]:
!pip install langchain_community
!pip install sentence_transformers
!pip install faiss_gpu
!pip install langchain_google_genai

In [None]:
loader = PyPDFDirectoryLoader("/kaggle/input/us-census")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splitted_docs = splitter.split_documents(docs)

In [None]:
embeddings = HuggingFaceBgeEmbeddings(
    model_name = "BAAI/bge-large-en-v1.5",
    model_kwargs = {'device': 'cuda'},
    encode_kwargs = {'normalize_embeddings': True}
)

In [None]:
len(splitted_docs)

In [None]:
text = embeddings.embed_query("Hello My name is HAMZA")
print(text)

In [None]:
db = FAISS.from_documents(splitted_docs, embeddings)

In [None]:
ans = db.similarity_search("WHAT IS HEALTH INSURANCE COVERAGE?")
ans[0].page_content

In [None]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":3})
retriever

In [None]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN']="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"

In [None]:
hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1,"max_length":500},
)
query="What is Cloud Computing?"
hf.invoke(query)

In [None]:
prompt_template = """
Use the following piece of context to answer the question asked.
Please try to answer based only on the context provided.

{context}
Question:{question}

Helpful Answers:
 """

In [None]:
prompt = PromptTemplate(template=prompt_template,input_variable=["context","question"])

In [None]:
import os 
os.environ["GOOGLE_API_KEY"]="AIzaSyB-y1cdX3YJo9erbGEjgioWlC_GpjNcw_g"

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

In [None]:
retrieval_QA = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs={"prompt":prompt}
)

In [None]:
query="What is Household income:"
ans=retrieval_QA.invoke(query)
print(ans['result'])