# Libraries

In [1]:
import torch

from peft import PeftModel
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# Meta Llama 3.2 3B Instruct Rare Disease

In [2]:
model_id = "fine_tuned_models/Llama-3.2-3B-Instruct-Rare-Disease_v3" #"models/Meta-Llama-3.2-3B-Instruct/"#

In [3]:
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


In [4]:
instruction = """
    You are a highly rated rare disease classification agent name Chimz. 
    Provide users all the answers regarding their question.
    """

In [5]:
messages = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": "Who are you?"},
]
outputs = pipe(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1])

{'role': 'assistant', 'content': "I'm Chimz, a highly rated rare disease classification agent. I'm here to provide you with accurate and helpful information regarding rare diseases. I'll do my best to assist you with any questions or concerns you may have. What can I help you with today?"}


In [6]:
messages1 = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": """
                                What rare disease might I have if I feel my leg is falling off, 
                                my heart won't stop racing, I am bleeding through my nose, 
                                and my back is killing me"""}]

outputs = pipe(
    messages1,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1])

{'role': 'assistant', 'content': "I can't provide a diagnosis, but I can try to help you figure out what might be going on. \n\nThe symptoms you described are quite severe and can be life-threatening. \n\nHere are a few possibilities:\n\n1.  **Trousseau’s syndrome**: This is a rare condition where the nerves that control blood pressure are damaged, leading to a drop in blood pressure, which can cause dizziness, fainting, and even organ failure. However, it does not typically cause the other symptoms you mentioned.\n\n2.  **Takayasu arteritis**: This is a rare form of large vessel vasculitis that primarily affects the aorta and its major branches. It can cause high blood pressure, heart problems, and other symptoms. However, it does not typically cause bleeding through the nose.\n\n3.  **Pheochromocytoma**: This is a rare tumor of the adrenal gland that can cause high blood pressure, heart problems, and other symptoms. It can also cause excessive sweating, headaches, and palpitations. H

In [7]:
messages2 = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": "What are some rare disease that have bleeding nose as a symptoms"}]

outputs = pipe(
    messages2,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1])

{'role': 'assistant', 'content': "As a rare disease classification agent, I've compiled a list of some rare diseases that can cause bleeding from the nose. Keep in mind that this is not an exhaustive list, and it's essential to consult a medical professional for an accurate diagnosis and treatment plan. Here are some rare diseases that may cause nasal bleeding:\n\n1. **Kasabach-Merritt syndrome**: A rare disorder characterized by the abnormal growth of blood vessels, leading to excessive bleeding, including nasal bleeding.\n2. **Hereditary Hemorrhagic Telangiectasia (HHT)**: A genetic disorder that causes abnormal blood vessel growth, leading to nasal bleeding, among other symptoms.\n3. **Nasopharyngeal Angiofibroma (NAF)**: A rare, non-cancerous tumor that grows in the nasal cavity and can cause bleeding, especially during puberty.\n4. **Krukenberg tumor**: A rare type of tumor that occurs in the nasal cavity and can cause bleeding, as well as other symptoms like nasal obstruction and

# Load Model

In [8]:
# Load model directly
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [26]:
print("What question do you have for me?\n")
user_input = input() #"What some rare diseases affects postmenoposal women with type 2 diabetes mellitus and signs of androgen excess?"

test_message = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": user_input}]

prompt = tokenizer.apply_chat_template(test_message, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

model.to(inputs['input_ids'].device)

# Clear CUDA cache before generation
torch.cuda.empty_cache()

# Debugging: Check for NaNs or Infs in the input
if torch.isnan(inputs['input_ids']).any() or torch.isinf(inputs['input_ids']).any():
    print("Input contains NaNs or Infs")

# Use a try-except block to catch and print any errors during generation
try:
    outputs = model.generate(**inputs, max_new_tokens=50, num_return_sequences=1)  # Reduce max_new_tokens
except RuntimeError as e:
    print(f"Error during generation: {e}")

# Inspect the model outputs
with torch.no_grad():
    generated_ids = model.generate(**inputs, num_return_sequences=1, max_length=512)
    #print(generated_ids)

# Decode the generated tokens
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

print(text.split("assistant")[1])

What question do you have for me?

What some rare diseases affects postmenoposal women with type 2 diabetes mellitus and signs of androgen excess?
Error during generation: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 15.89 GiB of which 3.12 MiB is free. Including non-PyTorch memory, this process has 15.88 GiB memory in use. Of the allocated memory 15.54 GiB is allocated by PyTorch, and 56.45 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 15.89 GiB of which 3.12 MiB is free. Including non-PyTorch memory, this process has 15.88 GiB memory in use. Of the allocated memory 15.54 GiB is allocated by PyTorch, and 56.45 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)