In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceLLM








In [2]:
# %%time
# # load documents
# documents = SimpleDirectoryReader("data-pdf/").load_data()

In [3]:
reader = SimpleDirectoryReader(input_dir="data-pdf/")

In [4]:
%%time
documents = reader.load_data(num_workers=16)

CPU times: user 54.7 ms, sys: 31.7 ms, total: 86.4 ms
Wall time: 23.8 s


In [4]:
len(documents)

272

In [13]:
type(documents)

list

In [None]:
documents[0]

In [5]:
import os
os.environ["HF_TOKEN"] = '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'

In [7]:
from llama_index.llms.huggingface import HuggingFaceLLM


def messages_to_prompt(messages):
    prompt = ""
    system_found = False
    for message in messages:
        if message.role == "system":
            prompt += f"<|system|>\n{message.content}<|end|>\n"
            system_found = True
        elif message.role == "user":
            prompt += f"<|user|>\n{message.content}<|end|>\n"
        elif message.role == "assistant":
            prompt += f"<|assistant|>\n{message.content}<|end|>\n"
        else:
            prompt += f"<|user|>\n{message.content}<|end|>\n"

    # trailing prompt
    prompt += "<|assistant|>\n"

    if not system_found:
        prompt = (
            "<|system|>\nYou are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided.<|end|>\n" + prompt
        )

    return prompt


llm = HuggingFaceLLM(
    model_name="microsoft/Phi-3-mini-4k-instruct",
    model_kwargs={
        "trust_remote_code": True,
    },
    generate_kwargs={"do_sample": True, "temperature": 0.1},
    tokenizer_name="microsoft/Phi-3-mini-4k-instruct",
    query_wrapper_prompt=(
        "<|system|>\n"
        "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided<|end|>\n"
        "<|user|>\n"
        "{query_str}<|end|>\n"
        "<|assistant|>\n"
    ),
    messages_to_prompt=messages_to_prompt,
    is_chat_model=True,
)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
import torch

torch.cuda.get_device_name(0)

'NVIDIA RTX A5000'

In [9]:
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

Settings.llm = llm
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_documents(documents)

In [30]:
# index = VectorStoreIndex.from_documents(documents)

In [43]:
user_input = "What are the signs that a gastric ulcer is bleeding?"

In [44]:
query_engine_c = vector_index.as_query_engine(response_mode='compact')

In [45]:
reponse_c = query_engine.query(user_input)

In [46]:
reponse_c.response

'The signs that a gastric ulcer is bleeding include pallor, a systolic blood pressure of less than or equal to 100 mm Hg, and a pulse rate of greater than or equal to 100/minute. Additionally, the presence of active bleeding, oozing bleeding, stigmata of recent bleeding such as a visible vessel or adherent clot, and a flat pigmented spot on the ulcer base may indicate bleeding.'

In [47]:
query_engine_r = vector_index.as_query_engine(response_mode='refine')

In [48]:
reponse_r = query_engine.query(user_input)

In [49]:
reponse_r.response

'The signs that a gastric ulcer is bleeding include pallor, a systolic blood pressure of less than 100 mm Hg, and a pulse rate of more than 100/minute. Additionally, the presence of active bleeding, oozing bleeding, visible vessel, or adherent clot on endoscopy are indicative of a bleeding gastric ulcer.'

In [50]:
query_engine_t = vector_index.as_query_engine(response_mode='tree_summarize')

In [51]:
reponse_t = query_engine.query(user_input)

In [52]:
reponse_t.response

'The signs that a gastric ulcer is bleeding include pallor, a systolic blood pressure of less than 100 mm Hg, and a pulse rate of more than 100/minute. Additionally, the presence of active bleeding, oozing bleeding, visible vessel, or adherent clot on endoscopy are indicators of a bleeding gastric ulcer.'

In [53]:
query_engine_c.

<llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine at 0x7f7c11d78490>

In [54]:
questions_text = """
What is a gastric ulcer?
What causes gastric ulcers?
How does H. pylori infection contribute to gastric ulcers?
What are the common symptoms of a gastric ulcer?
How are gastric ulcers and duodenal ulcers different?
What risk factors increase the likelihood of developing a gastric ulcer?
Can stress cause gastric ulcers?
How is a gastric ulcer diagnosed?
What tests are used to detect H. pylori infection?
Are gastric ulcers hereditary?
Can smoking contribute to gastric ulcers?
How do NSAIDs (nonsteroidal anti-inflammatory drugs) cause gastric ulcers?
Can alcohol consumption lead to gastric ulcers?
What lifestyle changes can help prevent gastric ulcers?
How does diet affect gastric ulcers?
What are the common treatments for gastric ulcers?
How effective are antibiotics in treating H. pylori-related ulcers?
How long does it typically take for a gastric ulcer to heal?
Are there any dietary changes recommended for someone with a gastric ulcer?
What foods should be avoided if you have a gastric ulcer?
Can lifestyle changes such as quitting smoking help with gastric ulcers?
What are the potential side effects of ulcer medications?
How often should I follow up with my doctor if I have a gastric ulcer?
Can gastric ulcers recur after treatment?
How do proton pump inhibitors (PPIs) work to treat gastric ulcers?
Is surgery ever required to treat a gastric ulcer?
What are the long-term effects of untreated gastric ulcers?
What are the signs that a gastric ulcer is bleeding?
How serious is a bleeding gastric ulcer?
What should I do if I suspect my gastric ulcer is bleeding?
Can a gastric ulcer lead to other complications such as perforation or obstruction?
How is a bleeding gastric ulcer treated in an emergency?
What are the symptoms of a perforated gastric ulcer?
How is a perforated gastric ulcer treated?
How does anemia relate to gastric ulcers?
Can chronic stomach ulcers lead to gastric cancer?
Are there any ways to prevent gastric ulcers?
How can I reduce my risk of developing a gastric ulcer?
Can long-term use of NSAIDs cause gastric ulcers?
What are the alternatives to NSAIDs if I have a history of gastric ulcers?
How often should I get screened for H. pylori if I've had a gastric ulcer before?
Are there any new treatments or research developments for gastric ulcers?
How does gastric acid secretion influence the formation of stomach ulcers?
What role do prostaglandins play in the pathophysiology of stomach ulcers?
How is the urea breath test used to diagnose H. pylori infection?
What are the mechanisms of action of proton pump inhibitors (PPIs) in ulcer treatment?
How do NSAIDs induce gastric mucosal injury leading to ulcers?
What is the role of endoscopy in the management of peptic ulcer disease?
How do you assess the severity of bleeding in a peptic ulcer patient?
What are the indications for surgical intervention in peptic ulcer disease?
How does H. pylori eradication therapy affect the recurrence of stomach ulcers?
How are refractory peptic ulcers managed clinically?
What are the long-term outcomes of patients with peptic ulcer bleeding?
How does chronic use of corticosteroids influence peptic ulcer formation?
How do you differentiate between benign and malignant gastric ulcers during endoscopy?
How do you manage patients with non-healing gastric ulcers despite standard treatment?
What are the indications for endoscopic biopsy in patients with suspected gastric ulcers?
How do you approach the treatment of gastric ulcers in patients with concurrent liver disease?
How do you tailor the treatment of peptic ulcers in elderly patients with multiple comorbidities?
What is your approach to managing patients with peptic ulcers who are on anticoagulant therapy?
What are the current guidelines for the use of proton pump inhibitors (PPIs) in peptic ulcer disease?
How do you handle cases of refractory H. pylori infection that do not respond to standard eradication regimens?
Is a bleeding gastric ulcer a sign of stomach cancer?
Where does acute pain from a gastric ulcer typically manifest?
What are the differences between gastritis, gastric erosion, and gastric ulcers?
Can black stool indicate a gastric ulcer?
What is the purpose of performing a tissue biopsy in cases of gastric ulcers?
What is the Sakita classification in gastric ulcers?
Could vomiting blood indicate a gastric ulcer?
Are there any significant blood test results when a patient has a gastric ulcer?
"""

In [55]:
questions_list = [question.strip() for question in questions_text.strip().split('\n') if question]

In [56]:
answers_dict = {}

In [57]:
for question in questions_list:
    result = query_engine_t.query(question)
    answers_dict[question] = result.response

In [58]:
import pandas as pd
df = pd.DataFrame(list(answers_dict.items()), columns=['Question', 'Answer'])

In [59]:
df.to_csv('results-phi3/gastric_ulcer_questions_answers_mistralai_csv.csv', index=False)

In [60]:
df.to_excel('results-phi3/gastric_ulcer_questions_answers_mistralai_excel.xlsx', index=False)

In [61]:
# df.to_csv('results-mistralai/gastric_ulcer_questions_answers_mistralai_csv.csv', index=False)

In [62]:
df

Unnamed: 0,Question,Answer
0,What is a gastric ulcer?,A gastric ulcer is a mucosal defect at least 0...
1,What causes gastric ulcers?,Gastric ulcers are primarily caused by Helicob...
2,How does H. pylori infection contribute to gas...,H. pylori infection contributes to gastric ulc...
3,What are the common symptoms of a gastric ulcer?,The common symptoms of a gastric ulcer include...
4,How are gastric ulcers and duodenal ulcers dif...,Gastric ulcers and duodenal ulcers are differe...
...,...,...
65,Can black stool indicate a gastric ulcer?,"Yes, black stool can indicate a gastric ulcer...."
66,What is the purpose of performing a tissue bio...,The purpose of performing a tissue biopsy in c...
67,What is the Sakita classification in gastric u...,The Sakita classification is a system used to ...
68,Could vomiting blood indicate a gastric ulcer?,"Yes, vomiting blood, also known as hematemesis..."
