In [None]:
!pip install transformers datasets peft accelerate bitsandbytes -q

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [None]:
!pip install --upgrade datasets fsspec


Collecting fsspec
  Using cached fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)


In [None]:

from datasets import load_dataset, DownloadConfig

dataset = load_dataset(
    "medalpaca/medical_meadow_mediqa",
    split="train",
    cache_dir="./cache",

    token=True
)

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(example):
    text = f"Instruction: {example['instruction']}\nResponse: {example['output']}"
    encodings = tokenizer(text, padding="max_length", truncation=True, max_length=512, return_tensors='pt')
    encodings['attention_mask'] = encodings['attention_mask'].unsqueeze(0)
    return encodings

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)

tokenized_dataset.set_format("torch")

medical_meadow_mediqa.json:   0%|          | 0.00/15.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2208 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Map:   0%|          | 0/2208 [00:00<?, ? examples/s]

In [None]:
print(tokenizer.model_max_length)


1000000000000000019884624838656


In [None]:
import torch
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    device_map="auto"
)

lora_config = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05, bias="none",
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

trainable params: 6,815,744 || all params: 7,248,547,840 || trainable%: 0.0940


In [None]:

training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    save_strategy="no",
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",
    fp16=True
)


In [None]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        """
        Custom loss computation to incorporate the training logic from `training_step`.
        """

        input_ids = inputs.get("input_ids")
        attention_mask = inputs.get("attention_mask")

        input_ids = input_ids.to(model.device)
        attention_mask = attention_mask.to(model.device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = input_ids[..., 1:].contiguous()

        loss_fn = torch.nn.CrossEntropyLoss()
        loss = loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        return (loss, outputs) if return_outputs else loss

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()



Step,Training Loss


TrainOutput(global_step=1, training_loss=1.76652193069458, metrics={'train_runtime': 4.886, 'train_samples_per_second': 0.614, 'train_steps_per_second': 0.205, 'total_flos': 65594657341440.0, 'train_loss': 1.76652193069458, 'epoch': 1.0})

In [None]:

model.save_pretrained("./fine_tuned_lora")

tokenizer.save_pretrained("./fine_tuned_lora")

print("LoRA adapters saved successfully!")


LoRA adapters saved successfully!


In [None]:
from peft import PeftModel

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    device_map="cuda:0",
)

model = PeftModel.from_pretrained(model, "./fine_tuned_lora", device_map="cuda:0") # specify device_map here as well
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_lora")

print("Fine-tuned model with LoRA adapters loaded successfully!")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Fine-tuned model with LoRA adapters loaded successfully!


In [None]:
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():  # Reduce memory usage
        output = model.generate(**inputs, max_length=300)
    return tokenizer.decode(output[0].cpu(), skip_special_tokens=True)

# Test response generation
print(generate_response("Symptoms of headache ? and solution"))


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Symptoms of headache ? and solution

Headache is a common health problem that can affect people of all ages. It can be caused by a variety of factors, including stress, lack of sleep, dehydration, certain foods or drinks, changes in weather, hormonal fluctuations, and more.

There are many different types of headaches, including tension headaches, migraines, cluster headaches, and more. Symptoms of headache can vary depending on the type of headache, but some common symptoms include:

* Pain on one or both sides of the head
* Throbbing, pulsing, or stabbing pain
* Sensitivity to light, sound, or touch
* Nausea or vomiting
* Fatigue or weakness
* Difficulty concentrating or remembering things

If you are experiencing a headache, there are several things you can do to help relieve the pain:

1. Rest: Try to find a quiet, dark place to rest and relax.
2. Hydrate: Drink plenty of water to help prevent dehydration, which can cause headaches.
3. Avoid certain triggers: If you know what is ca