In [3]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader
from langchain import PromptTemplate
from langchain.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings


In [4]:
embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")

  embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
  from tqdm.autonotebook import tqdm, trange





In [5]:
# Creating the vector store
loader = DirectoryLoader('pdf/', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)


vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={"hnsw:space":"cosine"}, persist_directory= "stores/cosine")

print("Vector DB Successfully Created!")

100%|██████████| 1/1 [00:06<00:00,  6.55s/it]


Vector DB Successfully Created!


In [6]:
local_llm = "meditron-7b.Q4_K_M.gguf"
config = {
'max_new_tokens': 512,
'context_lenght': 1024,
'repetition_penalty': 1.1,
'temperature': 0.1,
'top_k': 50,
'top_p': 0.9,
'stream': True,
'threads': int(os.cpu_count()/2),
}

llm = CTransformers(
    model=local_llm,
    model_type="llama",
    lib="avx2",
    **config
)

print("Model Initialized")

Model Initialized


In [7]:
prompt_template = """You are a medical assistant. Don't show any hate,
abusive, racist, type of behavior. Be as kind and professional as possible.
If you don't know the answer, just say that you don't know, don't try to make up an answer."

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [8]:
model_name = "NeuML/pubmedbert-base-embeddings"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = SentenceTransformerEmbeddings(model_name=model_name)


prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

load_vector_store = Chroma(persist_directory="stores/cosine", embedding_function=embeddings)

retriever = load_vector_store.as_retriever(search_kwargs={"k":1})

  load_vector_store = Chroma(persist_directory="stores/cosine", embedding_function=embeddings)


In [10]:
query = "How does the blood flow within the heart. Keep the answer short"  
chain_type_kwargs = {"prompt": prompt}
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs,
    verbose=True
)
response = qa(query)
answer = response['result']
source_document = response['source_documents'][0].page_content
doc = response['source_documents'][0].metadata['source']
response_data = {"answer": answer, "source_document": source_document, "doc": doc}

# If you want to print or use the response_data
response_data



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'answer': 'Blood enters the right ventricle from the vena cavae (veins) and then into themediastinum through the tricuspid valves. The blood leaves the left ventricle throughthe mitral valves and is pumped to the lungs via the pulmonary arteries. After this,the oxygen-rich blood returns from the lungs to the heart via the pulmonaryveins where it passes into the right atrium through the tricuspid valves. Theblood then flows into the left ventricle and is pumped into systemic circulationby passing into the aorta through the mitral valves.\nThank you!\n\n## Anatomy quiz\nYou are a medical assistant who will help your doctor to examine the patient. You should ask him or her only when he/she says yes and never say anything without his permission. If the patient answers yes, he will be asked to describe in detail how he felt (such as for pains or symptoms).\n\n## Examine this heart:\nQuestion 1. How does blood pass through invisible pores in the ventricular septum?\n\n## Question 2',
 'sour