In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

# Path to your fine-tuned model directory
model_path = "llama7b-lora-sql"

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",  # base model
    torch_dtype=torch.float16,
    device_map="auto"
)

# Load LoRA adapter on top of base model
model = PeftModel.from_pretrained(base_model, model_path)
model.eval()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Set pad token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token




  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/paperspace/Desktop/text-to-omop/model-fine-tuning/env/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda121.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 121
CUDA SETUP: Loading binary /home/paperspace/Desktop/text-to-omop/model-fine-tuning/env/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda121.so...


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.42s/it]


In [None]:
# Inference prompt
question = "Question: What are the top 4 frequent prescribed drugs for patients who were also prescribed drug_concept_id 19041823 at the same time on the last hospital visit?\n\nSchema:\nperson.person_id: Unique identifier for each patient\nperson.year_of_birth: Year of birth of the person\nperson.gender_concept_id: Gender as concept ID\nperson.race_concept_id: Race as concept ID\ndrug_exposure.drug_exposure_id: Unique ID for the drug exposure record\ndrug_exposure.person_id: Foreign key to person table\ndrug_exposure.drug_concept_id: Concept ID of the prescribed drug\ndrug_exposure.drug_exposure_start_date: Start date of drug exposure\ncondition_occurrence.condition_occurrence_id: Unique ID for condition record\ncondition_occurrence.person_id: Foreign key to person table\ncondition_occurrence.condition_concept_id: Concept ID for condition\ncondition_occurrence.condition_start_date: Date of condition onset\nvisit_occurrence.visit_occurrence_id: Unique ID for visit record\nvisit_occurrence.person_id: Foreign key to person table\nvisit_occurrence.visit_concept_id: Concept ID for type of visit (e.g., outpatient, inpatient)\nvisit_occurrence.visit_start_date: Date when the visit started\nobservation.observation_id: Unique ID for observation\nobservation.person_id: Foreign key to person table\nobservation.observation_concept_id: Concept ID for observation\nobservation.observation_date: Date when observation was made\nmeasurement.measurement_id: Unique ID for measurement record\nmeasurement.person_id: Foreign key to person table\nmeasurement.measurement_concept_id: Concept ID for the measurement (e.g., blood pressure)\nmeasurement.measurement_date: Date of the measurement\nprocedure_occurrence.procedure_occurrence_id: Unique ID for procedure\nprocedure_occurrence.person_id: Foreign key to person table\nprocedure_occurrence.procedure_concept_id: Concept ID for performed procedure\nprocedure_occurrence.procedure_date: Date of the procedure\nconcept.concept_id: Unique concept identifier across vocabularies\nconcept.concept_name: Human-readable name of the concept\nconcept.domain_id: Domain classification (e.g., Condition, Drug, Measurement)\nconcept.vocabulary_id: Vocabulary source (e.g., SNOMED, RxNorm)\nconcept_ancestor.ancestor_concept_id: Higher-level (parent) concept\nconcept_ancestor.descendant_concept_id: Lower-level (child) concept\n\nSQL:"
prompt = f"### Question:\n{question}\n\n### SQL:\n"

# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate output
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=False,  # deterministic output
        temperature=0.7,
        top_p=0.9
    )

# Decode and print result
result = tokenizer.decode(output[0], skip_special_tokens=True)
print(result)