# Load from HF Repo

In [1]:
from llama_cpp import Llama

In [2]:
# llm = Llama.from_pretrained(#
#    repo_id="QuantFactory/Meta-Llama-3-70B-Instruct-GGUF",
#    filename="Meta-Llama-3-70B-Instruct.Q2_K.gguf",
#    n_ctx=2048,
# )

# Load from Local File

In [None]:
# model_file = "models/Meta-Llama-3-70B-Instruct.Q4_K_S.gguf"
model_file = "../models/Llama-3.2-3B-Instruct-Q6_K_L.gguf"  # pretty good, about 4gb
# model_file = "models/Llama-3.2-8B.F16.gguf" #requires more than 15gb vram
# model_file = "models/Llama-3.2-3B-Instruct-f16.gguf"#8gb vram appears to be worse than the q6kl version
# model_file = "models/CalmeRys-78B-Orpo-v0.1-IQ4_XS.gguf"  # somehow does not use gpu? pretty good results but 30min for generation.
# model_file = "models/CalmeRys-78B-Orpo-v0.1.i1-IQ3_S.gguf" #uses 10Gb with 20/87 layers to GPU, best performer for now

In [12]:
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = Llama(
    model_path=model_file,  # Download the model file first
    n_ctx=5000,  # The max sequence length to use - note that longer sequence lengths require much more resources
    n_threads=12,  # The number of CPU threads to use, tailor to your system and the resulting performance
    n_gpu_layers=87,  # The number of layers to offload to GPU, if you have GPU acceleration available
    temperature=0.2,
    repetition_penalty=1.11,
    top_p=0.95,
)

llama_model_loader: loaded meta data with 35 key-value pairs and 255 tensors from ../models/Llama-3.2-3B-Instruct-Q6_K_L.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.2 3B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Llama-3.2
llama_model_loader: - kv   5:                         general.size_label str              = 3B
llama_model_loader: - kv   6:                            general.license str              = llama3.2
llama_model_loader: - kv   7:           

# Test Functioning

In [13]:
llm.create_chat_completion(
    messages=[{"role": "user", "content": "What is the capital of France?"}]
)

llama_perf_context_print:        load time =      55.56 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    42 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     7 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =     146.00 ms /    49 tokens


{'id': 'chatcmpl-4da4a7c3-70c0-4b1e-8d55-8070045f5908',
 'object': 'chat.completion',
 'created': 1731879824,
 'model': '../models/Llama-3.2-3B-Instruct-Q6_K_L.gguf',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': 'The capital of France is Paris.'},
   'logprobs': None,
   'finish_reason': 'stop'}],
 'usage': {'prompt_tokens': 42, 'completion_tokens': 7, 'total_tokens': 49}}

In [14]:
context = """
Below are some blood test results for a dog.
They contain information on the tested molecule, the detected level in the blood and the normal range of values.

# BLOOD_TEST_DIAGNOSIS_DOCUMENT

Certainly, here are the blood test chemical names, their detected levels, and the corresponding normal ranges:

1. CPK (IU/L): Detected level: 217, Normal range: 45 - 155
2. AST (IU/L): Detected level: 36, Normal range: 10 - 45
3. ALT (IU/L): Detected level: 83, Normal range: 10 - 60
4. ALP (IU/L): Detected level: 2780, Normal range: 45 - 152
5. GGT (IU/L): Detected level: 3,0, Normal range: 0,1 - 0,13
6. Bilirubina Tot (mg/dl): Detected level: 0,10, Normal range: 0,10 - 0,44
7. Proteine Tot (g/dl): Detected level: 7,2, Normal range: 5,8 - 8,0
8. Albumine (g/dl): Detected level: 3,26, Normal range: 2,6 - 3,8
9. Globuline (g/dl): Detected level: 3,90, Normal range: 2,6 - 4,5
10. Rapp. Alb/Glob: Detected level: 0,84, Normal range: 0,84 - 1,91
11. Colesterolo (mg/dl): Detected level: 201, Normal range: 120 - 300
12. Trigliceridi (mg/dl): Detected level: 158, Normal range: 30 - 95
13. Amilasi (IU/L): Detected level: 836, Normal range: 200 - 1900
14. Lipasi (IU/L): Detected level: 64, Normal range: 10 - 350
15. UREA (mg/dl): Detected level: 21, Normal range: 15 - 45
16. CREA (mg/dl): Detected level: 0,69, Normal range: 0,60 - 1,80
17. Glucosio (mg/dl): Detected level: 88, Normal range: 70 - 110
18. Calcio (mg/dl): Detected level: 9,80, Normal range: 8,0 - 12,0
19. Acido Urico (mg/dl): Detected level: -, Normal range: 0,2 - 1
20. Fosforo (mg/dl): Detected level: 3,90, Normal range: 2,5 - 5,6
21. Sodio (mEq/L): Detected level: 150, Normal range: 144 - 155
22. Potassio (mEq/L): Detected level: 4,9, Normal range: 3,3 - 5,4
23. Rapp. Na/K: Detected level: 30,61, Normal range: > 27
24. Cloro corr. (mEq/L): Detected level: 108,04, Normal range: 98 - 118
25. HCO3 (mEq/L): Detected level: 21, Normal range: 17,0 - 25,2
26. Div. anionico: Detected level: 23, Normal range: 12,0 - 24,0
27. Osmolarità cal. (mOsm): Detected level: 322, Normal range: 314 - 335
28. Ferro (μg/dl): Detected level: 114, Normal range: 100 - 280
29. UBC (μg/dl): Detected level: 330, Normal range: 150 - 350
30. TIBC (μg/dl): Detected level: 444, Normal range: 300 - 510
31. Saturazione (%): Detected level: 25,68, Normal range: 30 - 60
32. Ferritina (ng/ml): Detected level: -, Normal range: 21 - 78
33. Aptoglobina HPT (mg/dl): Detected level: -, Normal range: 20 - 60
34. Ac. Billari pre (μmol/L): Detected level: 0,3, Normal range: 0,3 - 9
35. Ac. Billari post (μmol/L): Detected level: 0,6, Normal range: 0,6 - 30
36. Alfa 2 Macroglobulina (g/L): Detected level: -, Normal range: 0,02 - 0,65
37. Cistatina (mg/L): Detected level: 2282, Normal range: 3350 - 6550
38. Colinesterasi (IU/L): Detected level: -, Normal range: 30 - 398
39. LDH (IU/L): Detected level: -, Normal range: 188 - 351
40. Fruttosamina (μmol/L): Detected level: -, Normal range: 0,2 - 9,0
41. Lattato (mg/dl): Detected level: -, Normal range: 0,5 - 4,95
42. Ceruloplasmina (mg/dl): Detected level: -, Normal range: 2,5 - 0,10
43. Proteina C Reatt. (mg/dl): Detected level: -, Normal range: 0,01 - 0,10
44. AGP (mg/ml): Detected level: -, Normal range: 0,01 - 0,10
45. Magnesio (mg/dl): Detected level: 111, Normal range: 1,60 - 2,48
46. Calcio Iónico (mmol/L): Detected level: 1,50, Normal range: 0,97 - 1,34"""

In [15]:
# Define the general prompt structure as a template
general_structure = """
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
You are an assistant for question-answering tasks. Use the following pieces of retrieved context in the section demarcated by "```" to answer the question.
The context may contain multiple question answer pairs as an example, just answer the final question provided after the context.
If you don't know the answer just say that you don't know. Use three sentences maximum and keep the answer concise.

{context}
Question: {input}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

# Define the specific question as a string
input_question = """
1. For each tested molecule check the detected level.
2. For each tested molecule analyze the normal range.
3. For each tested molecule infer whether the detected level is outside the normal range.
4. Select and return exclusively the molecules that fall outside the normal range, citing both the etected level and the range.
5. Provide a comprehensive health diagnosis for the dog based on detected abnormalities.
"""

# Combine everything by formatting the general structure with context and input
llama3_prompt = general_structure.format(context=context, input=input_question)

In [16]:
output = llm(
    llama3_prompt,
    max_tokens=None,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
    echo=True,
)  # Generate a completion, can also call create_completion

Llama.generate: 1 prefix-match hit, remaining 1479 prompt tokens to eval
llama_perf_context_print:        load time =      55.56 ms
llama_perf_context_print: prompt eval time =       0.00 ms /  1479 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /  1833 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   28698.74 ms /  3312 tokens


In [17]:
print(output["choices"][0]["text"][len(llama3_prompt) :])

Based on the provided context, I will answer the questions and provide the required information.

The detected levels and normal ranges for each molecule are:

1. CPK (IU/L): Detected level: 217, Normal range: 45 - 155
	* Out of range: High
2. AST (IU/L): Detected level: 36, Normal range: 10 - 45
	* In range: Low
3. ALT (IU/L): Detected level: 83, Normal range: 10 - 60
	* Out of range: High
4. ALP (IU/L): Detected level: 2780, Normal range: 45 - 152
	* Out of range: High
5. GGT (IU/L): Detected level: 3,0, Normal range: 0,1 - 0,13
	* In range: Low
6. Bilirubina Tot (mg/dl): Detected level: 0,10, Normal range: 0,10 - 0,44
	* In range: Low
7. Proteine Tot (g/dl): Detected level: 7,2, Normal range: 5,8 - 8,0
	* In range: High
8. Albumine (g/dl): Detected level: 3,26, Normal range: 2,6 - 3,8
	* In range: High
9. Globuline (g/dl): Detected level: 3,90, Normal range: 2,6 - 4,5
	* In range: High
10. Rapp. Alb/Glob: Detected level: 0,84, Normal range: 0,84 - 1,91
	* In range: High
11. Colester

# Few Shot Prompting

In [None]:
# Few-shot prompt focused on detecting only abnormal values
few_shot_prompt = """
### Example 1
Input:
BLOOD_TEST_DIAGNOSIS_DOCUMENT
1. CPK (IU/L): Detected level: 217, Normal range: 45 - 155
2. AST (IU/L): Detected level: 36, Normal range: 10 - 45
3. ALT (IU/L): Detected level: 83, Normal range: 10 - 60

Expected Output:
Abnormal Results:
1. CPK: High (Detected level 217, above normal range of 45 - 155).
3. ALT: High (Detected level 83, above normal range of 10 - 60).

### Example 2
Input:
BLOOD_TEST_DIAGNOSIS_DOCUMENT
4. ALP (IU/L): Detected level: 2780, Normal range: 45 - 152
5. GGT (IU/L): Detected level: 0.01, Normal range: 0.1 - 0.13
6. Bilirubina Tot (mg/dl): Detected level: 0.10, Normal range: 0.10 - 0.44

Expected Output:
Abnormal Results:
4. ALP: High (Detected level 2780, above normal range of 45 - 152).
5. GGT: Low (Detected level 0.01, below normal range of 0.1 - 0.13).

### Your Input:
BLOOD_TEST_DIAGNOSIS_DOCUMENT
"""


user_input = """Analyze the following blood test results and report only the abnormal values.\n"""

context = """# BLOOD_TEST_DIAGNOSIS_DOCUMENT

Certainly, here are the blood test chemical names, their detected levels, and the corresponding normal ranges:

1. CPK (IU/L): Detected level: 217, Normal range: 45 - 155
2. AST (IU/L): Detected level: 36, Normal range: 10 - 45
3. ALT (IU/L): Detected level: 83, Normal range: 10 - 60
4. ALP (IU/L): Detected level: 2780, Normal range: 45 - 152
5. GGT (IU/L): Detected level: 3,0, Normal range: 0,1 - 0,13
6. Bilirubina Tot (mg/dl): Detected level: 0,10, Normal range: 0,10 - 0,44
7. Proteine Tot (g/dl): Detected level: 7,2, Normal range: 5,8 - 8,0
8. Albumine (g/dl): Detected level: 3,26, Normal range: 2,6 - 3,8
9. Globuline (g/dl): Detected level: 3,90, Normal range: 2,6 - 4,5
10. Rapp. Alb/Glob: Detected level: 0,84, Normal range: 0,84 - 1,91
11. Colesterolo (mg/dl): Detected level: 201, Normal range: 120 - 300
12. Trigliceridi (mg/dl): Detected level: 158, Normal range: 30 - 95
13. Amilasi (IU/L): Detected level: 836, Normal range: 200 - 1900
14. Lipasi (IU/L): Detected level: 64, Normal range: 10 - 350
15. UREA (mg/dl): Detected level: 21, Normal range: 15 - 45
16. CREA (mg/dl): Detected level: 0,69, Normal range: 0,60 - 1,80
17. Glucosio (mg/dl): Detected level: 88, Normal range: 70 - 110
18. Calcio (mg/dl): Detected level: 9,80, Normal range: 8,0 - 12,0
19. Acido Urico (mg/dl): Detected level: -, Normal range: 0,2 - 1
20. Fosforo (mg/dl): Detected level: 3,90, Normal range: 2,5 - 5,6
21. Sodio (mEq/L): Detected level: 150, Normal range: 144 - 155
22. Potassio (mEq/L): Detected level: 4,9, Normal range: 3,3 - 5,4
23. Rapp. Na/K: Detected level: 30,61, Normal range: > 27
24. Cloro corr. (mEq/L): Detected level: 108,04, Normal range: 98 - 118
25. HCO3 (mEq/L): Detected level: 21, Normal range: 17,0 - 25,2
26. Div. anionico: Detected level: 23, Normal range: 12,0 - 24,0
27. Osmolarità cal. (mOsm): Detected level: 322, Normal range: 314 - 335
28. Ferro (μg/dl): Detected level: 114, Normal range: 100 - 280
29. UBC (μg/dl): Detected level: 330, Normal range: 150 - 350
30. TIBC (μg/dl): Detected level: 444, Normal range: 300 - 510
31. Saturazione (%): Detected level: 25,68, Normal range: 30 - 60
32. Ferritina (ng/ml): Detected level: -, Normal range: 21 - 78
33. Aptoglobina HPT (mg/dl): Detected level: -, Normal range: 20 - 60
34. Ac. Billari pre (μmol/L): Detected level: 0,3, Normal range: 0,3 - 9
35. Ac. Billari post (μmol/L): Detected level: 0,6, Normal range: 0,6 - 30
36. Alfa 2 Macroglobulina (g/L): Detected level: -, Normal range: 0,02 - 0,65
37. Cistatina (mg/L): Detected level: 2282, Normal range: 3350 - 6550
38. Colinesterasi (IU/L): Detected level: -, Normal range: 30 - 398
39. LDH (IU/L): Detected level: -, Normal range: 188 - 351
40. Fruttosamina (μmol/L): Detected level: -, Normal range: 0,2 - 9,0
41. Lattato (mg/dl): Detected level: -, Normal range: 0,5 - 4,95
42. Ceruloplasmina (mg/dl): Detected level: -, Normal range: 2,5 - 0,10
43. Proteina C Reatt. (mg/dl): Detected level: -, Normal range: 0,01 - 0,10
44. AGP (mg/ml): Detected level: -, Normal range: 0,01 - 0,10
45. Magnesio (mg/dl): Detected level: 111, Normal range: 1,60 - 2,48
46. Calcio Iónico (mmol/L): Detected level: 1,50, Normal range: 0,97 - 1,34"""
# Combine the few-shot prompt with the user's input
few_shot_prompt = few_shot_prompt + context + user_input

In [None]:
output = llm(
    full_prompt,
    max_tokens=2048,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
    echo=True,
)  # Generate a completion, can also call create_completion

In [None]:
print(output["choices"][0]["text"][len(full_prompt) :])

# Chain of Thought Prompting

In [None]:
# Chain-of-Thought prompt with an example
cot_prompt = """
Analyze the following blood test results, identify abnormal values, and explain the reasoning step by step.

### Example 1
Input:
BLOOD_TEST_DIAGNOSIS_DOCUMENT
1. AST (IU/L): Detected level: 50, Normal range: 10 - 45
2. ALT (IU/L): Detected level: 25, Normal range: 10 - 60
3. ALP (IU/L): Detected level: 200, Normal range: 45 - 152
4. Glucosio (mg/dl): Detected level: 65, Normal range: 70 - 110
5. STI (IU/L): Detected level: -, Normal range: 10 - 100


Let's think step by step:
1. AST: Detected level is 50. The normal range is 10 - 45. Since 50 is higher than 45, AST is abnormal (High).
2. ALT: Detected level is 25. The normal range is 10 - 60. Since 25 is within this range, ALT is normal.
3. ALP: Detected level is 200. The normal range is 45 - 152. Since 200 is higher than 152, ALP is abnormal (High).
4. Glucosio: Detected level is 65. The normal range is 70 - 110. Since 65 is lower than 70, Glucosio is abnormal (Low).
5. STI: Detected level is -. The normal range is 10 - 100. This molecule was not tested.
Abnormal Results:
1. AST: High (Detected level 50, above normal range of 10 - 45).
3. ALP: High (Detected level 200, above normal range of 45 - 152).
4. Glucosio: Low (Detected level 65, below normal range of 70 - 110).

### New Input:
"""

# Combine prompt with new input
full_prompt = cot_prompt + user_input + context + "\nLet's think step by step:\n"

In [None]:
output = llm(
    full_prompt,
    max_tokens=4096,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
    echo=True,
)  # Generate

In [None]:
print(output["choices"][0]["text"][len(full_prompt) :])

# Other Prompt Tests

In [None]:
# Define a more detailed system prompt to guide the LLM effectively
DEFAULT_SYSTEM_PROMPT = """
You are analyzing blood test results. Your task is to detect abnormalities and provide a health diagnosis.
For each abnormal result, provide the chemical name, its detected level, and indicate whether it's above or below the normal range.
An abnormal value is defined as a value which is below the minimum normal range or above the maximum normal range.
If no abnormal values are found, respond with "All values are within the normal range."
Do not include any irrelevant details or chemicals that are within the normal range.
""".strip()


# Function to generate the prompt using the enhanced system and user-specific inputs
def generate_prompt(
    context: str, question: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
    return f"""
[INST] <<SYS>>
{system_prompt}
<</SYS>>

{context}

Question: {question} [/INST]
""".strip()


# Question is focused and clear
question = "Extract all abnormal chemical names with their detected levels and indicate if they are above or below the normal range."

# Generate the refined prompt
final_prompt = generate_prompt(context=context, question=question)

# Print the final prompt to see how it looks
print(final_prompt)

In [None]:
output = llm(
    final_prompt,
    max_tokens=4096,  # Generate up to 32 tokens, set to None to generate up to the end of the context window
    echo=True,
)  # Generate

In [None]:
print(output["choices"][0]["text"][len(final_prompt) :])