In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

In [7]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA

import  numpy as np

In [5]:
## Read the ppdfs from the folder
loader=PyPDFDirectoryLoader("./us_census")

documents=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_documents=text_splitter.split_documents(documents)

In [6]:
final_documents[0]

Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 18.2 (Windows)', 'creationdate': '2023-09-09T07:52:17-04:00', 'author': 'U.S. Census Bureau', 'keywords': 'acsbr-015', 'moddate': '2023-09-12T14:44:47+01:00', 'title': 'Health Insurance Coverage Status and Type by Geography: 2021 and 2022', 'trapped': '/false', 'source': 'us_census\\acsbr-015.pdf', 'total_pages': 18, 'page': 0, 'page_label': '1'}, page_content='Health Insurance Coverage Status and Type \nby Geography: 2021 and 2022\nAmerican Community Survey Briefs\nACSBR-015\nIssued September 2023\nDouglas Conway and Breauna Branch\nINTRODUCTION\nDemographic shifts as well as economic and govern-\nment policy changes can affect people’s access to \nhealth coverage. For example, between 2021 and 2022, \nthe labor market continued to improve, which may \nhave affected private coverage in the United States \nduring that time.1 Public policy changes included \nthe renewal of the Public Health Emergency, wh

In [10]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [11]:

print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape)


[-0.07903483 -0.01134115 -0.02312097  0.02844463  0.05053344  0.05317824
 -0.01907787  0.03456024 -0.10211369 -0.02915699  0.0852426   0.05650729
 -0.02545436 -0.0330849  -0.00635736  0.04090863 -0.00628109  0.00356742
 -0.03854132  0.03667684 -0.042898    0.03425251 -0.03116901 -0.03793729
  0.01728393  0.01214924  0.00653118  0.01463566 -0.05529055 -0.15320708
  0.00730848  0.03202943 -0.04701126 -0.01595975  0.01874448  0.02642937
 -0.02306377  0.0843804   0.04182489  0.05278175 -0.03057599  0.01564262
 -0.01689075  0.00529409 -0.02417435  0.00412995 -0.01889935 -0.00150625
 -0.00836943 -0.03390065  0.03515958 -0.00553133  0.04910936  0.05971858
  0.05615959 -0.05105155  0.01475134 -0.01849959 -0.0328464   0.03576626
  0.04947706 -0.00938882 -0.26202118  0.09750332  0.01715692  0.04781384
 -0.00556321 -0.00298306 -0.02207356 -0.04463671 -0.05760485  0.04815874
 -0.05522207  0.01635332  0.03299245  0.02147077  0.01296216  0.01462307
  0.02174953 -0.00202998  0.02099536  0.03353845 -0

In [12]:
## VectorStore Creation
vectorstore=FAISS.from_documents(final_documents[:120],huggingface_embeddings)

In [None]:
## Query using Similarity Search
query="WHAT IS THE AMERICAN COMMUNITY SURVEY?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[1].page_content)


*Georgia
Wisconsin
Tennessee
North Carolina
Florida
*Alabama
South Carolina
Mississippi
* Denotes a statistically signiﬁcant change between 2021 and 2022 at the 90 percent conﬁdence level.
Note: State Medicaid expansion status in 2022 is used to compare change between 2021 and 2022. For more information on expansion states, 
refer to Appendix Table A-1. For information on conﬁdentiality protection, sampling error, nonsampling error, and deﬁnitions in the American 
Community Survey, refer to <https://www2.census.gov/programs-surveys/acs/tech_docs/accuracy/ACS_Accuracy_of_Data_2022.pdf>.
Source: U.S. Census Bureau, 2021 and 2022 American Community Survey, 1-year estimates.
/two.tab/zero.tab/one.tab/zero.tab/zero.tab /three.tab/zero.tab /four.tab/zero.tab /five.tab/zero.tab /six.tab/zero.tab
United States
Expansion states
Nonexpansion states
/two.tab/zero.tab/two.tab/one.tab   /two.tab/zero.tab/two.tab/two.tab


In [18]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)


tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000200803F0460> search_kwargs={'k': 3}


In [46]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"]=os.getenv("HUGGINGFACEHUB_API_TOKEN")


In [30]:
from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    model_kwargs={"temperature":0.1,"max_length":500}

)
query="What is the health insurance coverage?"
hf.invoke(query)




"What is the health insurance coverage?\n\nHealth insurance coverage is a type of insurance that helps pay for medical expenses, such as doctor visits, hospital stays, and prescription drugs. It can also cover preventive care, such as vaccinations and screenings. Health insurance can be provided by an employer, purchased individually, or obtained through a government program, such as Medicare or Medicaid.\n\nThere are different types of health insurance plans, including HMOs, PPOs, and high-deductible health plans. Each type of plan has its own set of benefits, costs, and restrictions. It's important to understand the details of a health insurance plan before enrolling to ensure that it meets your needs and budget.\n\nHealth insurance is important because medical care can be expensive, and without insurance, many people would not be able to afford the care they need. Having health insurance can help protect you from financial hardship in the event of an unexpected illness or injury. It

In [32]:

prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """


In [35]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [36]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)


In [44]:


query="""CHANGES IN PUBLIC COVERAGE BY STATE FROM 2021 TO 2022?"""


In [45]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])





Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

not experience a change to the pri-
vate coverage rate, nonexpansion 
states as a group had an increase 
of 0.8 percentage points.
CHANGES IN PUBLIC 
COVERAGE BY STATE FROM 
2021 TO 2022
In 2022, 13 states saw increases in 
public health insurance coverage 
while only one state, Rhode Island, 
had a decrease. This led to an 
increased public coverage rate for 
the nation overall at 37.2 percent 
in 2022. As with private cover-
age, changes in the distribution 
of public coverage subtypes (e.g., 
Medicaid) may affect the overall 
public coverage rate. Rhode Island 
reported a decrease in public 
coverage of 2.2 percentage points, 
which was driven by a decrease 
in people reporting Medicaid (2.3 
percentage points) (Figure 4 and 
Appendix Table B-3).26 Increases in 
public health insurance coverage 
ranged from 0.6 percentage points 
to 2.3 percentage points acro