In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch

In [None]:
base_model = "meta-llama/Meta-Llama-3.1-8B-instruct"
fine_tuned_model = "models/llama-3.1-fine-tuned-model"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [None]:
base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

In [None]:
model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)
model = model.merge_and_unload()

In [None]:
## Testing 
text = "Gender: Female, Age: 55, Appointment Date: 2016-05-06T14:50:43Z, Reason: 2016-05-06T00:00:00Z, Hypertension: NO, Diabetes: NO, Alcoholism: NO, Handicap: NO, SMS Received: NO."
prompt = f"""Classify the text into Yes, No and return the answer as the corresponding mental health disorder label.
text: {text}
label: """.strip()

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs[0]["generated_text"].split("label: ")[-1].strip())

In [None]:
# Saving Locally
model_dir = "models/Llama-3.1-8B-Instruct-Patient-Attendence"
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)

In [None]:
model.push_to_hub(model_dir, use_temp_dir=False)
tokenizer.push_to_hub(model_dir, use_temp_dir=False)