In [1]:
# --- Hugging Face Login using Kaggle Secrets ---
# Make sure you have set 'HF_TOKEN' in Kaggle Secrets before running this.

from huggingface_hub import login
import os
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
secret_value = user_secrets.get_secret("HF_TOKEN")

try:
    login(token=secret_value)
    print("\nSuccessfully logged in to Hugging Face using Kaggle Secret.")
except:
    print("\nError: 'HF_TOKEN' not found in Kaggle Secrets.")
    print("You can manually log in by uncommenting the line below.")
    # login()  # Uncomment to use manual login



Successfully logged in to Hugging Face using Kaggle Secret.


In [2]:
from datasets import load_dataset

print("Loading the gaokerena/MF3QA dataset from Hugging Face Hub...")

try:
    dataset = load_dataset("gaokerena/MF3QA")
    print("Dataset loaded successfully.")
    print("\nDataset structure:")
    print(dataset)

    # Get training split
    if 'train' in dataset:
        train_dataset = dataset['train']
    else:
        train_dataset = dataset[list(dataset.keys())[0]]

    print(f"\nNumber of examples in the training split: {len(train_dataset)}")
    print("\nFirst 5 raw examples from the dataset:")
    for i in range(min(5, len(train_dataset))):
        print(f"--- Example {i+1} ---")
        print(f"Question: {train_dataset[i]['Question']}")
        print(f"Answer: {train_dataset[i]['Answer']}")

    # Format dataset for instruction tuning
    def format_example(example):
        question = str(example.get('Question', '')).strip()
        answer = str(example.get('Answer', '')).strip()
        return {"text": f"سوال: {question}\nپاسخ: {answer}"}

    print("\nFormatting the dataset into 'text' column...")
    formatted_dataset = train_dataset.map(format_example, remove_columns=train_dataset.column_names)

    print("\nFirst 3 formatted examples:")
    for i in range(min(3, len(formatted_dataset))):
        print(f"--- Formatted Example {i+1} ---")
        print(formatted_dataset[i]['text'])

    print("\nDataset preparation for fine-tuning is complete.")

except Exception as e:
    print(f"An error occurred during dataset loading or preparation: {e}")
    print("Make sure you are logged into Hugging Face and your internet connection is stable.")


Loading the gaokerena/MF3QA dataset from Hugging Face Hub...


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

dev-00000-of-00001.parquet:   0%|          | 0.00/1.10M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/796k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/20000 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Dataset loaded successfully.

Dataset structure:
DatasetDict({
    train: Dataset({
        features: ['Question', 'Answer', 'Source'],
        num_rows: 20000
    })
    dev: Dataset({
        features: ['Question', 'Answer', 'Source'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['Question', 'Answer', 'Source'],
        num_rows: 2000
    })
})

Number of examples in the training split: 20000

First 5 raw examples from the dataset:
--- Example 1 ---
Question: ۱ماهپیش از خواب پریدم از ترس شدید بعداز اون ترس،فشارم بالا میرفت ۱۴ ۱۵ با سر درد و درد قفسه سینه همراه بود با پرانول کنترش میکردم ولی الان ۲ ۳ روزه فشارم میاد پاین ۸ ۹ روی ۳ ۴ وقتایی که میرم پیاده روی یا کلا فعالیتی دارم بعدش ضربانم تا ۶ ۷ ساعت حتی بیشتر تنده و کند و طبیعی نمیشه خستهه شدم دیگهمیترسم اتفاقی برام بیوفته فشار ۸ ۹ روری ۴ ۵ خطرناکه؟
Answer: تجربه‌ای که شرح داده‌اید، نشان‌دهنده‌ی رخدادهای فیزیولوژیک و احتمالاً پاتولوژیک در بدن شما است. ترس شدید و پرش از خواب می‌تواند منجر به افزایش موقت فشار خو

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]


First 3 formatted examples:
--- Formatted Example 1 ---
سوال: ۱ماهپیش از خواب پریدم از ترس شدید بعداز اون ترس،فشارم بالا میرفت ۱۴ ۱۵ با سر درد و درد قفسه سینه همراه بود با پرانول کنترش میکردم ولی الان ۲ ۳ روزه فشارم میاد پاین ۸ ۹ روی ۳ ۴ وقتایی که میرم پیاده روی یا کلا فعالیتی دارم بعدش ضربانم تا ۶ ۷ ساعت حتی بیشتر تنده و کند و طبیعی نمیشه خستهه شدم دیگهمیترسم اتفاقی برام بیوفته فشار ۸ ۹ روری ۴ ۵ خطرناکه؟
پاسخ: تجربه‌ای که شرح داده‌اید، نشان‌دهنده‌ی رخدادهای فیزیولوژیک و احتمالاً پاتولوژیک در بدن شما است. ترس شدید و پرش از خواب می‌تواند منجر به افزایش موقت فشار خون و تپش قلب شود، که این واکنشی طبیعی است. با این حال، ادامه‌ی این علائم و تغییرات فشار خون از بالا به پایین نیازمند بررسی بیشتر است. فشار خون شما که گاهی به 8/9 می‌رسد و ضربان قلب بالا پس از فعالیت، ممکن است نشانه‌ای از اختلال در تنظیم فشار خون یا مشکلات قلبی باشد. مراجعه به پزشک، انجام آزمایش‌های تخصصی قلب و عروق و شاید مشاوره با یک روانپزشک یا روانشناس برای مدیریت استرس و ترس شدید پیشنهاد می‌شود. اطمینان از کنترل درست فشار 

In [3]:
print("Installing required libraries...")

# Install/update essential libraries quietly
!pip install -q -U transformers peft trl bitsandbytes scipy datasets
!pip install -q -U "huggingface_hub[cli]"

!git config --global user.name "lbehradl"
!git clone https://github.com/unslothai/unsloth.git
!pip install -q -U ./unsloth
!pip install -q -U unsloth_zoo

print("Libraries installation/update complete.")


Installing required libraries...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m84.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.3/472.3 kB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m376.2/376.2 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.3/35.3 MB[0m [31m51.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.8/494.8 kB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m

In [4]:
!pip install -U "scipy==1.11.4"

Collecting scipy==1.11.4
  Downloading scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Downloading scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.4/36.4 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.16.0
    Uninstalling scipy-1.16.0:
      Successfully uninstalled scipy-1.16.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
tsfresh 0.21.0 requires scipy>=1.14.0; python_version >= "3.10",

In [5]:
import unsloth
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

try:
    from unsloth import FastLanguageModel
    print("Unsloth detected. Using FastLanguageModel for optimized loading.")
    USE_UNSLOTH = True
except ImportError:
    print("Unsloth not found. Using standard Hugging Face loading.")
    USE_UNSLOTH = False

print('Imports done')


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-07-21 21:01:16.443266: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753131676.645887      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753131676.707983      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!
Unsloth detected. Using FastLanguageModel for optimized loading.
Imports done


In [6]:
model_name = "google/medgemma-4b-it"  # or "google/medgemma-27b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",            # quantization type
    bnb_4bit_compute_dtype=torch.bfloat16,  # compute dtype
    bnb_4bit_use_double_quant=True,       # double quantization to save memory
)

# --- Load model and tokenizer ---
if USE_UNSLOTH:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=512,   # max input sequence length, adjust as needed
        dtype=None,            # let unsloth choose dtype (usually bfloat16)
        load_in_4bit=True,     # enable 4-bit quantization
    )
else:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token  # set pad token if missing

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",          # auto GPU mapping
        torch_dtype=torch.bfloat16, # use bfloat16 for computation
    )
    model = prepare_model_for_kbit_training(model)  # prepare for 4-bit LoRA training

print(f"\nModel '{model_name}' and Tokenizer loaded successfully.")
print("\nModel structure (partial):")
print(model)

# --- Tokenizer training settings ---
tokenizer.padding_side = "right"  # pad on the right for decoder-only models

print("\nTokenizer padding side set to 'right'.")
print("Ready for LoRA config and training.")


==((====))==  Unsloth 2025.7.6: Fast Gemma3 patching. Transformers: 4.53.2.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


model.safetensors:   0%|          | 0.00/4.12G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.json: 0.00B [00:00, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]


Model 'google/medgemma-4b-it' and Tokenizer loaded successfully.

Model structure (partial):
Gemma3ForConditionalGeneration(
  (model): Gemma3Model(
    (vision_tower): SiglipVisionModel(
      (vision_model): SiglipVisionTransformer(
        (embeddings): SiglipVisionEmbeddings(
          (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
          (position_embedding): Embedding(4096, 1152)
        )
        (encoder): SiglipEncoder(
          (layers): ModuleList(
            (0-15): 16 x SiglipEncoderLayer(
              (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
              (self_attn): SiglipAttention(
                (k_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (v_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (q_proj): Linear(in_features=1152, out_features=1152, bias=True)
                (out_proj): Linear(in_features=1152, out_features=1152, b

In [7]:
# --- Step 4: LoRA and Trainer Setup ---
# Run this in a new Kaggle notebook cell
# Assumes 'model', 'tokenizer', and 'formatted_dataset' are available

from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments
from trl import SFTTrainer

print("Configuring LoRA and training arguments...")

# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA to model
# If using Unsloth and get "Adapter already exists" error, comment out this line
model = get_peft_model(model, lora_config)

print("LoRA adapters attached.")
output_dire = "/kaggle/working/fine_tuned"
os.makedirs(output_dire, exist_ok=True)

# Training arguments
training_arguments = TrainingArguments(
    output_dir="/kaggle/working/fine_tuned",
    num_train_epochs=3,
    per_device_train_batch_size=2,       # Increased batch size for better GPU use
    gradient_accumulation_steps=4,       # Effective batch size = 8
    optim="paged_adamw_8bit",             # Optimizer for QLoRA
    save_steps=100,
    logging_steps=50,
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    report_to="tensorboard",
    disable_tqdm=False,
    torch_compile=False,                  # Disabled for P100 compatibility
    # Uncomment to push to HuggingFace Hub
    # push_to_hub=True,
    # hub_model_id="your-username/medgemma-fa-medical-qa",
    # hub_private_repo=False,
    # hub_strategy="every_save",
)

# Initialize SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,
    peft_config=lora_config,
    args=training_arguments,
    tokenizer=tokenizer,
    max_seq_length=512,   # shorter sequence length for faster training
    dataset_text_field="text",
    packing=False,
)

print("\nLoRA and training args set.")
print("Trainer ready to start training!")


Configuring LoRA and training arguments...
LoRA adapters attached.
Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"]:   0%|          | 0/20000 [00:00<?, ? examples/s]


LoRA and training args set.
Trainer ready to start training!


In [8]:
# --- Step 5: Run Fine-tuning ---
# Run this in a new Kaggle notebook cell
# Make sure 'trainer' from Step 4 is available

print("Starting fine-tuning. This may take a while depending on GPU and dataset size.")
print("Training logs will appear per 'logging_steps' in TrainingArguments.")

trainer.train()

print("\nFine-tuning completed!")

# --- Step 6: Save Fine-tuned Model ---
# Save LoRA adapters and tokenizer after training
output_dir_model = "/kaggle/working/fine_tuned/final_model"
os.makedirs(output_dir_model, exist_ok=True)

print(f"\nSaving fine-tuned model to: {output_dir_model}")
trainer.model.save_pretrained(output_dir_model)
print("LoRA adapters saved.")

tokenizer.save_pretrained(output_dir_model)
print("Tokenizer saved.")

# --- Optional: Merge LoRA adapters with base model (requires more GPU RAM) ---
# Useful for final deployment
# Uncomment and use if needed

# if USE_UNSLOTH:
#     print("\nMerging LoRA with base model using Unsloth...")
#     trainer.model.save_pretrained_merged(output_dir_model, tokenizer, save_method="merged_4bit")
#     print("Merged model saved.")
# else:
#     print("\nTo merge LoRA adapters without Unsloth (high RAM needed), run:")
#     print("from peft import PeftModel")
#     print("from transformers import AutoModelForCausalLM")
#     print(f"base_model = AutoModelForCausalLM.from_pretrained('{model_name}', torch_dtype=torch.float32, device_map='auto')")
#     print(f"merged_model = PeftModel.from_pretrained(base_model, '{output_dir_model}')")
#     print("merged_model = merged_model.merge_and_unload()")
#     print("merged_model.save_pretrained('./results/merged_model')")
#     print("tokenizer.save_pretrained('./results/merged_model')")

print("\nFine-tuning and saving complete. Model ready for inference.")


Starting fine-tuning. This may take a while depending on GPU and dataset size.
Training logs will appear per 'logging_steps' in TrainingArguments.


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 20,000 | Num Epochs = 3 | Total steps = 3,750
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 32,788,480 of 4,332,867,952 (0.76% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
50,3.2817
100,3.2518
150,3.3242
200,3.3021
250,3.2961
300,3.2974
350,3.2807
400,3.3065
450,3.2664
500,3.3303



Fine-tuning completed!

Saving fine-tuned model to: /kaggle/working/fine_tuned/final_model
LoRA adapters saved.
Tokenizer saved.

Fine-tuning and saving complete. Model ready for inference.
