In [16]:
import os
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter

In [17]:
def load_data(filepath):
    docs=SimpleDirectoryReader(filepath).load_data()
    parser=SentenceSplitter(chunk_size=500,chunk_overlap=20)
    chunks=parser.get_nodes_from_documents(docs)
    return chunks

In [21]:
chunks=load_data('data')

In [23]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext,Settings
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

In [24]:
def vector_store(chunks):
    embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
    Settings.embed_model=embed_model
    
    db=chromadb.PersistentClient(path="./chroma_db")
    chroma_collection=db.get_or_create_collection("clinical_data")
    
    vectorstore=ChromaVectorStore(chroma_collection=chroma_collection)
    
    Storage=StorageContext.from_defaults(vector_store=vectorstore)
    index=VectorStoreIndex.from_documents(documents=chunks,storage_context=Storage)
    return index

In [26]:
index=vector_store(chunks)

In [10]:
embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model=embed_model
db=chromadb.PersistentClient(path="./chroma_db")
chroma_collection=db.get_or_create_collection("clinical_data")
vector_store=ChromaVectorStore(chroma_collection=chroma_collection)
Storage=StorageContext.from_defaults(vector_store=vector_store)
index=VectorStoreIndex.from_vector_store(vector_store)

In [11]:
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
import torch.cuda

llm_model=HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B",
    tokenizer_name="meta-llama/Llama-3.2-1B",
    context_window=8192,
    device_map="auto",
    model_kwargs={"torch_dtype":torch.bfloat16}
)
Settings.llm=llm_model

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

In [27]:
query_engine=index.as_query_engine(similarity_top_k=3)
response=query_engine.query("What are the side effects of Lisinopril?")
print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1. Headache
2. Nausea
3. Fatigue
4. Dizziness
5. Drowsiness
6. Loss of appetite
7. Swelling of the face, lips, tongue, throat, arms, legs, hands, or feet
8. Itchy skin
9. Sudden breathing or swallowing problems
10. Chest pain
11. Hoarseness
12. Stomach pain
13. Yellow eyes or skin
14. Yellowing of the skin or white of the eyes
15. Weak or heavy feeling in the legs
16. Skin rash
17. Irregular heartbeat
18. Shortness of breath or other breathing problems
19. Joint pain
20. Numbness or tingling in hands, feet, or lips
21. Weakness
22. Irritation of the throat
23. Frequent urge to urinate
24. Fever and chills
25. Difficulty swallowing
26. Difficulty breathing
27. Hives
28. Swelling of the hands, ankles, feet, or lower legs
29. Fluid retention
30. Loss of sexual ability
31. Dizziness
32. Abnormal heart rhythm
33. Severe allergic reaction
34. Un
