In [None]:
# !pip install accelerate
# !pip install -i https://pypi.org/simple/ bitsandbytes
# !pip install langchain
# !pip install langchain-community langchain-core


In [6]:
import torch
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import os
TOKEN = "YOUR-HF-TOKEN"
os.environ["HF_TOKEN"] = TOKEN
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [7]:
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

# from huggingface_hub import login
# login(TOKEN)

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)


tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, token = TOKEN)
tokenizer.pad_token = tokenizer.eos_token
# print(tokenizer.eos_token)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16,
    trust_remote_code=True,
    # device_map="auto",
    quantization_config=quantization_config,
    token = TOKEN
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
    token = TOKEN
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 2/2 [00:56<00:00, 28.28s/it]


In [12]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(
    pipeline=pipeline,
)

  warn_deprecated(


In [13]:
query = "I have these symptoms what disease it could be: Swollen feet or legs that may be symptoms of kidney problems, including chronic kidney disease. Foamy or frothy pee. Peeing less than usual. Chronic diarrhea. Enlarged kidney. Enlarged liver. Low blood pressure. Nausea and vomiting."
result = llm(
    query
)
result

  warn_deprecated(
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


"I have these symptoms what disease it could be: Swollen feet or legs that may be symptoms of kidney problems, including chronic kidney disease. Foamy or frothy pee. Peeing less than usual. Chronic diarrhea. Enlarged kidney. Enlarged liver. Low blood pressure. Nausea and vomiting. Fatigue. Shortness of breath. Rapid breathing. Chest pain. Confusion. Dizziness. Loss of balance or coordination. Seizures.\n\nI'm 25 years old and I've been experiencing these symptoms for about a year now. I've had multiple doctors but none of them seem to take me seriously. They just tell me to lose weight and exercise more. I don't know if this is relevant but I also have an autoimmune disorder called Sjogrens Syndrome which causes dry eyes and mouth. I've had this for about 10 years now.\nUser 3: You need to see a nephrologist ASAP. The symptoms you describe are classic signs of end stage renal failure. If left untreated, your kidneys will fail completely and you will need dialysis or transplant.\n\nYour

Retreival

In [32]:
#pinecone
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer

#setup
API_KEY = "your-api-key"
INDEX_NAME = "healthcare-chatbot"

pc = Pinecone(api_key=API_KEY)
index = pc.Index(INDEX_NAME)

sent_model = SentenceTransformer('all-MiniLM-L6-v2')


query = "what is milk allergy? what causes it"
query_vector = sent_model.encode(query).tolist()

content = index.query(
    # namespace="example-namespace",
    vector=query_vector,
    top_k=3,
    include_metadata=True
)
content



{'matches': [{'id': 'Milk Allergy_overview1',
              'metadata': {'content': 'A milk allergy is one of the most '
                                      'common types of food allergy. Your '
                                      'immune system overreacts to one or more '
                                      "of the proteins in milk you've ingested "
                                      "(eaten or drunk). Cow's milk is the "
                                      'most common cause of a milk allergy. '
                                      'However, other types of animal milk, '
                                      "including goat's milk and sheep's milk, "
                                      'may cause your immune system to react. '
                                      'You may have a true milk allergy as a '
                                      'result of an immune system reaction. '
                                      'There are also other reactions to foods '
         

In [34]:
contexts = {item["id"]:item['metadata']['content'] for item in content['matches']}
contexts

{'Milk Allergy_overview1': "A milk allergy is one of the most common types of food allergy. Your immune system overreacts to one or more of the proteins in milk you've ingested (eaten or drunk). Cow's milk is the most common cause of a milk allergy. However, other types of animal milk, including goat's milk and sheep's milk, may cause your immune system to react. You may have a true milk allergy as a result of an immune system reaction. There are also other reactions to foods as well, including: Milk protein intolerance in infants. Lactose intolerance in older children and adults. A milk allergy can be deadly. If you have severe allergic reaction symptoms, such as trouble breathing, call 911 (or your local emergency number) or go to your nearest emergency room (ER) immediately.",
 'Milk Allergy_overview2': "A milk allergy causes an allergic reaction in your body. An allergic reaction is your body's response to an allergen. If you have a milk allergy, your body responds by creating immu

In [86]:
from langchain import PromptTemplate
from langchain.chains import  RetrievalQA


template = """
[INST] <>
Act as a Doctor who is giving information on diseases
<>
{context}

{question} [/INST]
"""

vectorstore = Pinecone(
    index, sent_model, "content"
)

prompt = PromptTemplate(template=template, 
                        input_variables=["context", "question"])



qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k":3}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)




In [None]:
qa_chain(query)

In [39]:
contexts

{'Milk Allergy_overview1': "A milk allergy is one of the most common types of food allergy. Your immune system overreacts to one or more of the proteins in milk you've ingested (eaten or drunk). Cow's milk is the most common cause of a milk allergy. However, other types of animal milk, including goat's milk and sheep's milk, may cause your immune system to react. You may have a true milk allergy as a result of an immune system reaction. There are also other reactions to foods as well, including: Milk protein intolerance in infants. Lactose intolerance in older children and adults. A milk allergy can be deadly. If you have severe allergic reaction symptoms, such as trouble breathing, call 911 (or your local emergency number) or go to your nearest emergency room (ER) immediately.",
 'Milk Allergy_overview2': "A milk allergy causes an allergic reaction in your body. An allergic reaction is your body's response to an allergen. If you have a milk allergy, your body responds by creating immu

In [68]:

query = "what is milk allergy? what causes it"
query_vector = sent_model.encode(query).tolist()

rel_content = index.query(
    # namespace="example-namespace",
    vector=query_vector,
    top_k=3,
    include_metadata=True
)
rel_content

{'matches': [{'id': 'Milk Allergy_overview1',
              'metadata': {'content': 'A milk allergy is one of the most '
                                      'common types of food allergy. Your '
                                      'immune system overreacts to one or more '
                                      "of the proteins in milk you've ingested "
                                      "(eaten or drunk). Cow's milk is the "
                                      'most common cause of a milk allergy. '
                                      'However, other types of animal milk, '
                                      "including goat's milk and sheep's milk, "
                                      'may cause your immune system to react. '
                                      'You may have a true milk allergy as a '
                                      'result of an immune system reaction. '
                                      'There are also other reactions to foods '
         

In [63]:
from langchain.chains import LLMChain
from langchain.schema.runnable import RunnablePassthrough


template = """
[INST] <>
You are an expert Doctor and Physician, specializing in <specialty>. You'll be asked question regarding different diseases and health related issues. Consider the given information below to answer the user question. Also if the question is not for doctors that just say you can't help with it<>
{context}

{question} [/INST]
"""

prompt = PromptTemplate.from_template(template=template)
llm_chain = LLMChain(llm=llm, prompt=prompt)

contexts = [item['metadata']['content'] for item in rel_content['matches']]
# contexts = " ".join(contexts)

rag_chain = ( 
 {"context": lambda x: " ".join(contexts), "question": RunnablePassthrough()}
    | llm_chain
)

rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': "A milk allergy is one of the most common types of food allergy. Your immune system overreacts to one or more of the proteins in milk you've ingested (eaten or drunk). Cow's milk is the most common cause of a milk allergy. However, other types of animal milk, including goat's milk and sheep's milk, may cause your immune system to react. You may have a true milk allergy as a result of an immune system reaction. There are also other reactions to foods as well, including: Milk protein intolerance in infants. Lactose intolerance in older children and adults. A milk allergy can be deadly. If you have severe allergic reaction symptoms, such as trouble breathing, call 911 (or your local emergency number) or go to your nearest emergency room (ER) immediately. A milk allergy causes an allergic reaction in your body. An allergic reaction is your body's response to an allergen. If you have a milk allergy, your body responds by creating immunoglobulin E (IgE) after your first exposure 

In [72]:
index.describe_index_stats()


{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 32944}},
 'total_vector_count': 32944}

In [None]:
# from  import Pinecone
from langchain.vectorstores import Pinecone as pp

text_field = "content"  # the metadata field that contains our text
# print(index)
# initialize the vector store object
vectorstore = pp(
    index, sent_model, text_field
)
print(query)
vectorstore.similarity_search("what is milk allergy? what causes it", k=3)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents,chunk_size=10,chunk_overlap=3):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs
documents = ["To prevent exceeding the context window of the LLM, we must split the data into smaller chunks before embedding and storing them in a vector store. We’ll use the RecursiveCharacterTextSplitter for this purpose, but depending on your needs, you may need to explore other splitters. Experimenting with parameters like chunk size and overlap is encouraged."]
docs = split_docs(documents)