In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA

In [2]:
## Read the ppdfs from the folder
loader=PyPDFDirectoryLoader("./us_census")

documents=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_documents=text_splitter.split_documents(documents)
final_documents[0]

Document(page_content='00 - Employee Handbook\nOwnerLaura Kuhlmann \xa0use @mention\nLast review07 May 2024 \xa0 use date picker\nStatus PUBLISHED  |  |  | DraftWiP In Review Published | Deprecated\nConfidentiality DCS INTERNAL Public | DCS Internal | DCS and partners | Confidential | Strictly \nconfidential,\nblocked \nURLThe Employee Handbook provides an\xa0 , especially the orientation to all employees\nnewbies!\xa0\nIt provides a short overview of important information regarding\xa0 \xa0and\xa0 people topics b\n. ehavioral guidelines\nThe Handbook is\xa0 \xa0structured and every subject is divided into a description glossary\nof the general framework,\nthe process and the need-to-know with further links and the responsible persons.\xa0\n for further suggestions on topics to be covered, please contact the People & Culture team!\nAB C D E F\nAb\nbre\nvia\ntio\nns\nAc\nce\nss \nCa\nrdBAV-\nAllow\nance\nBudd\ny \nConc\nept\nBEV \nProgr\nam \n(FINN)Certificate of \nEmployment / \nRefere

In [3]:
len(final_documents)

49

In [4]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

  from tqdm.autonotebook import tqdm, trange
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [5]:

import  numpy as np
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape)

[-2.59445831e-02  5.15468307e-02 -9.89440177e-03  1.50767984e-02
  6.52042106e-02  4.49973717e-02  6.33552223e-02  1.87990107e-02
 -3.14946845e-02  5.63665256e-02  2.84940880e-02  2.33569113e-03
  1.36262327e-02 -3.35915796e-02 -1.67123489e-02  2.44941693e-02
  1.33883441e-02 -3.81309427e-02  2.39970554e-02 -6.28772285e-03
  3.80139425e-02 -2.12840401e-02 -2.16505062e-02  2.90671363e-02
 -2.63215564e-02  3.65823088e-03 -4.29292023e-02  7.27340672e-03
 -3.07316463e-02 -9.53067467e-02  8.05057213e-03 -1.59666408e-02
 -2.03691814e-02  2.92067174e-02  3.01305484e-03  8.72481894e-03
  1.89054459e-02  3.01838946e-03  6.58440515e-02  4.51830626e-02
 -9.73373875e-02  2.05465183e-02  5.04652271e-03 -2.64394190e-02
  1.66579504e-02  6.47821138e-03 -4.64136153e-03  1.99217699e-03
 -4.77383547e-02 -3.39184105e-02 -4.71404195e-03 -2.70152409e-02
  1.67670362e-02  2.11288389e-02  2.43268516e-02 -2.28119195e-02
  1.06723055e-01  3.91150303e-02 -5.08910511e-03  2.55724341e-02
  5.77243343e-02 -2.76102

In [22]:
## VectorStore Creation
vectorstore=FAISS.from_documents(final_documents,huggingface_embeddings)

In [27]:
## Query using Similarity Search
query="what is EGYM Wellpass?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)
print('*'*100)
print(relevant_docments[1].page_content)

membership is directly linked to your membership.
Process 1. Go to the EGYM Wellpass lading page
2. Register till 20th of a month
3. Start your training on 1st day of the following month
Need-
to-knowfor further information please see 05-II. EGYM Wellpass - Fitness Cooperation
Your contact person in this matter is Laura 
Gift & Invitation
Frame You can find the gift & invitation policy here: 20-Processes, policies and 
compliance
Need-to-
know
Grading - under construction
Frame New Growth Framework launched in July 2022
Process Get more information here:  04 - New Growth Framework (NGF) OLD
Need-to-know Your contact person in this matter is Kai
****************************************************************************************************
FrameAt DCS, we want to foster a feedback culture as from time to time it is good to 
synchronize views and perspectives. Generally it is recommended to exchange on a 
regular basis.
Proce
ssEspecially for Newbies, feedback helps to align his/her

In [36]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000021089C87D60> search_kwargs={'k': 3}


In [37]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN']="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"

# import os
# from dotenv import load_dotenv
# load_dotenv()

# os.environ['OPENAI_API_KEY']=os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [38]:
from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1,"max_length":500}

)
query="what is EGYM Wellpass?"
hf.invoke(query)

HfHubHTTPError: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1 (Request ID: iYhfTGgYE1-YZzjHFs-2k)

Rate limit reached. You reached free usage limit (reset hourly). Please subscribe to a plan at https://huggingface.co/pricing to use the API at this rate

In [None]:
#Hugging Face models can be run locally through the HuggingFacePipeline class.
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-v0.1",
    task="text-generation",
    pipeline_kwargs={"temperature": 0, "max_new_tokens": 300}
)

llm = hf 
llm.invoke(query)

In [None]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

In [None]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [None]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [None]:
query="""DIFFERENCES IN THE
UNINSURED RATE BY STATE
IN 2022"""

In [None]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])