In [1]:
# # # # # Install Hugging Face libraries
# %pip install  --upgrade \
#   "evaluate" \
#   "tensorboard" \
#   "flash-attn" \
#   "liger-kernel" \
#   "setuptools" \
#   "deepspeed" \
#   "lm-eval[api]" \
#   "torch"\
#   "torchvision" \
#   "transformers" \
#   "datasets" \
#   "accelerate" \
#   "bitsandbytes" \
#   "trl" \
#   "peft" \
#   "lighteval" \
#   "hf-transfer"

### Import libraries and frameworks

In [2]:
import torch
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, BitsAndBytesConfig
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import is_liger_kernel_available
from datasets import load_dataset
from trl import SFTTrainer, TrlParser, ModelConfig, SFTConfig, get_peft_config
from peft import AutoPeftModelForCausalLM

# Use BitsAndBytesConfig for quantization that helps to reduce model size
from peft import LoraConfig, get_peft_model
from peft.optimizers import create_lorafa_optimizer
from datasets import load_from_disk

import pandas as pd
from datasets import Dataset, DatasetDict

In [3]:
import os
import gc
os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Fixes the warning
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Clear cache before training
torch.cuda.empty_cache()
gc.collect()

80

### Check device

In [4]:
device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
print(f"Device: {device}")

Device: cuda


### Process dataset

In [5]:
processed_data_path = "./data/processed/radiology_datasets"
dataset = load_from_disk(processed_data_path)

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['findings', 'impression', 'text', 'clinic_id', 'modality', 'clinic_modality'],
        num_rows: 8865
    })
    validation: Dataset({
        features: ['findings', 'impression', 'text', 'clinic_id', 'modality', 'clinic_modality'],
        num_rows: 1901
    })
    test: Dataset({
        features: ['findings', 'impression', 'text', 'clinic_id', 'modality', 'clinic_modality'],
        num_rows: 1915
    })
})

### Load model

In [7]:
# 1. APPLY QUANTIZATION (This was missing!)
nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16  # Use bfloat16 for better memory efficiency
)

# %%
# Load model WITH quantization
model_name = "microsoft/MediPhi-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=nf4_config,  
    dtype=torch.bfloat16,      # Use bfloat16 for memory efficiency
    device_map="auto",               # Automatically distribute across GPUs
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
    padding_side="right"  # Ensure consistent padding
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Understand model architechture

In [8]:
model

Phi3ForCausalLM(
  (model): Phi3Model(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear4bit(in_features=3072, out_features=9216, bias=False)
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear4bit(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLU()
        )
        (input_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
      )
    )
    (norm): Phi3RMSNorm((3072,), eps=1e-05)
    (rotary_emb): Phi3RotaryEmbedding()
  )
  (lm_head): Linear(in_features=3072, out_

In [9]:
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
        trainable_params += param.numel()
print(
    f"trainable params: {trainable_params} || "
    f"all params: {all_param} || "
    f"trainable%: {100 * trainable_params / all_param:.2f}%"
)

trainable params: 197200896 || all params: 2009140224 || trainable%: 9.82%


### Before training test the model

In [10]:
from transformers import pipeline, StoppingCriteria

In [11]:
findings = dataset['test'][0]['findings']
impression = dataset['test'][0]['impression']

print(f"Findings: {findings}\n")
print(f"Impressions: {impression}")

Findings: [CLINIC: clinic_1] [MODALITY: MR] FINDINGS: No abnormality along the sacral plexus presacral. Left sciatic nerve is normal in the greater sciatic foramen to the mid thigh. There is no mass or compression or edema along this nerve. Beginning distal mid thigh axial 36 and 35, there is intense edema of the peroneal branch of the sciatic nerve extending to the inferior margin lateral femoral condyle level left side only. There is no soft tissue mass or cyst along this nerve. No notable edema at the level of the fibular head. There is muscle denervation edema anterior and peroneal muscle compartments of the proximal leg left side only. No abnormality along the tibiofibular joint and specifically no marginating cyst. There is no other muscle denervation edema. The tibial branch of this left sciatic nerve is normal. Remaining muscles are normal. Left hamstring origin is intact with no marginating inflammation. Sacrum and sacroiliac joints are normal. Pubic symphysis has no marginati

In [12]:
# Check what token ID 32007 represents
print(f"Token 32007: '{tokenizer.decode([32007])}'")

Token 32007: '<|end|>'


In [13]:
# https://huggingface.co/microsoft/MediPhi-Instruct
# Radiology-specific system message
system_message = """You are an expert radiologist assistant specializing in generating accurate and concise medical impressions from radiology
       findings.
    
      Your task is to:
      1. **Analyze the findings**: Carefully review all clinical findings, history, and technique information
      2. **Generate focused impressions**: Create clear, prioritized conclusions that directly address the clinical question
      3. **Maintain clinical accuracy**: Ensure all significant findings are appropriately characterized
      4. **Use appropriate medical terminology**: Follow standard radiological reporting conventions
      5. **Adapt communication style**: Match the institutional reporting style and level of detail expected
    
      Generate only the IMPRESSION section based on the provided clinical information."""

# Hugging Face pipeline for text generation does apply apply_chat_template under the hood. 
# So we do not need to process for the text generation
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": findings},
]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

#  stops generation when the model generates token ID 32007
class EosListStoppingCriteria(StoppingCriteria):
  def __init__(self, eos_sequence = [32007]):
      self.eos_sequence = eos_sequence

  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
      last_ids = input_ids[:,-len(self.eos_sequence):].tolist()
      return self.eos_sequence in last_ids

generation_args = {
    "max_new_tokens": 200,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
    "stopping_criteria": [EosListStoppingCriteria()]

}
output = pipe(messages, **generation_args)
print(f"AI: {output[0]['generated_text']}")

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


AI:  IMPRESSION:

The MR imaging reveals intense edema of the peroneal branch of the left sciatic nerve extending to the inferior margin of the lateral femoral condyle, with no associated soft tissue mass or cyst. There is also muscle denervation edema in the anterior and peroneal muscle compartments of the proximal left leg. The tibiofibular joint and the left hip joint are normal, with no evidence of effusion or subchondral edema. The right superior pubic ramus shows a healed fracture with deformity, and the right inferior pubic ramus has mild deformity from a prior healed fracture. The right hip joint has moderate effusion, while the left hip joint has no effusion. There is mild chondral thinning on the right side of the hip joint, with no acute chondral


In [14]:
print(f"{impression}")

1. Long segment intense neuritis with edema peroneal branch left sciatic nerve beginning distal mid femoral shaft and extending to the knee joint margin. Associated denervation edema anterior and peroneal muscle compartments of the proximal left leg. No mass or cyst or compression along this nerve. No other abnormality along the left sciatic nerve.


In [15]:
# From the above testing, it is clear that Medphi is generating more or less similar text generation.
# WIth fine tiuning the model might learn more numances of the dataset provided. 

### Model training

In [16]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    bias="none",
    target_modules = ['o_proj', 'qkv_proj', 'gate_up_proj', 'down_proj'],
    task_type="CAUSAL_LM"
)

In [17]:
peft_model = get_peft_model(model, lora_config)

In [18]:
peft_model.print_trainable_parameters()

trainable params: 12,582,912 || all params: 3,833,662,464 || trainable%: 0.3282


In [19]:
import evaluate
import numpy as np
from sklearn.metrics import accuracy_score

def preprocess_logits_for_metrics(logits, labels):
    """
    Safely preprocess logits with proper bounds checking
    IMPORTANT: Must return tensors, not numpy arrays!
    """
    if isinstance(logits, tuple):
        logits = logits[0]
    
    # Convert logits to predicted token IDs
    pred_ids = torch.argmax(logits, dim=-1)
    
    # Clamp token IDs to valid vocabulary range (keep as tensor)
    vocab_size = tokenizer.vocab_size
    pred_ids = torch.clamp(pred_ids, 0, vocab_size - 1)
    
    # CRITICAL: Return tensors, not numpy arrays
    return pred_ids, labels

def compute_metrics(pred):
    """
    Robust compute_metrics function with error handling
    """
    labels_ids = pred.label_ids
    pred_ids = pred.predictions
    
    if isinstance(pred_ids, tuple):
        pred_ids = pred_ids[0]
    
    # Convert to numpy if needed
    if isinstance(pred_ids, torch.Tensor):
        pred_ids = pred_ids.cpu().numpy()
    if isinstance(labels_ids, torch.Tensor):
        labels_ids = labels_ids.cpu().numpy()
    
    # Ensure predictions are integers and within valid range
    pred_ids = pred_ids.astype(np.int32)
    labels_ids = labels_ids.astype(np.int32)
    
    # Clamp to vocabulary range to prevent overflow
    vocab_size = tokenizer.vocab_size
    pred_ids = np.clip(pred_ids, 0, vocab_size - 1)
    
    try:
        # Attempt to decode predictions
        pred_str = []
        for seq in pred_ids:
            try:
                # Additional safety: filter out any remaining invalid tokens
                valid_seq = [int(token) for token in seq if 0 <= int(token) < vocab_size]
                decoded = tokenizer.decode(valid_seq, skip_special_tokens=True)
                pred_str.append(decoded)
            except (OverflowError, ValueError) as e:
                print(f"Warning: Failed to decode sequence, using empty string. Error: {e}")
                pred_str.append("")
        
        # Process labels
        label_str = []
        for seq in labels_ids:
            try:
                # Replace -100 with pad token
                clean_seq = np.where(seq != -100, seq, tokenizer.pad_token_id)
                # Ensure valid range
                clean_seq = np.clip(clean_seq, 0, vocab_size - 1)
                valid_seq = [int(token) for token in clean_seq]
                decoded = tokenizer.decode(valid_seq, skip_special_tokens=True)
                label_str.append(decoded)
            except (OverflowError, ValueError) as e:
                print(f"Warning: Failed to decode label sequence, using empty string. Error: {e}")
                label_str.append("")
        
        # Debug information
        print(f"Successfully decoded {len(pred_str)} predictions and {len(label_str)} labels")
        print(f"Sample prediction: {pred_str[0][:100] if pred_str[0] else 'EMPTY'}...")
        print(f"Sample label: {label_str[0][:100] if label_str[0] else 'EMPTY'}...")
        
        # Compute ROUGE metrics
        rouge = evaluate.load("rouge")
        rouge_output = rouge.compute(
            predictions=pred_str,
            references=label_str,
            rouge_types=["rouge1", "rouge2", "rougeL", "rougeLsum"],
        )
        
        return {
            "R1": round(rouge_output["rouge1"], 4),
            "R2": round(rouge_output["rouge2"], 4),
            "RL": round(rouge_output["rougeL"], 4),
            "RLsum": round(rouge_output["rougeLsum"], 4),
        }
        
    except Exception as e:
        print(f"Error in ROUGE computation: {e}")
        # Fallback to simple metrics
        return simple_token_metrics(pred_ids, labels_ids)

### Style Metrics

In [20]:
def check_structured_format(text):
    """Check for numbered list (e.g., "1. ... 2. ...")"""
    numbered_pattern = r"^\d+\.\s+.*?(?:\n\d+\.\s+.*?)*$"
    return bool(re.match(numbered_pattern, text, re.MULTILINE))

def check_bullet_format(text):
    """Check for bullet points (e.g., "- ... - ...")"""
    bullet_pattern = r"^[-*•]\s+.*?(?:\n[-*•]\s+.*?)*$"
    return bool(re.match(bullet_pattern, text, re.MULTILINE))

# Safe preprocessing function (keeping what works)
def safe_preprocess_logits_for_metrics(logits, labels):
    """
    Safely preprocess logits - must return tensors!
    """
    if isinstance(logits, tuple):
        logits = logits[0]
    
    # Convert logits to predicted token IDs
    pred_ids = torch.argmax(logits, dim=-1)
    
    # Clamp token IDs to valid vocabulary range (keep as tensor)
    vocab_size = tokenizer.vocab_size
    pred_ids = torch.clamp(pred_ids, 0, vocab_size - 1)
    
    return pred_ids, labels

def compute_style_metrics(eval_pred):
    """
    Simple compute metrics with just structured and bullet format checking
    """
    try:
        labels_ids = eval_pred.label_ids
        pred_ids = eval_pred.predictions
        
        if isinstance(pred_ids, tuple):
            pred_ids = pred_ids[0]
        
        # Convert to numpy if needed
        if isinstance(pred_ids, torch.Tensor):
            pred_ids = pred_ids.cpu().numpy()
        if isinstance(labels_ids, torch.Tensor):
            labels_ids = labels_ids.cpu().numpy()
        
        # Ensure valid token IDs
        vocab_size = tokenizer.vocab_size
        pred_ids = np.clip(pred_ids, 0, vocab_size - 1)
        
        # Safe decoding with error handling
        decoded_preds = []
        decoded_labels = []
        
        for pred_seq in pred_ids:
            try:
                decoded = tokenizer.decode(pred_seq, skip_special_tokens=True)
                decoded_preds.append(decoded)
            except Exception as e:
                print(f"Warning: Failed to decode prediction: {e}")
                decoded_preds.append("")
        
        for label_seq in labels_ids:
            try:
                # Replace -100 with pad token
                clean_seq = np.where(label_seq != -100, label_seq, tokenizer.pad_token_id)
                clean_seq = np.clip(clean_seq, 0, vocab_size - 1)
                decoded = tokenizer.decode(clean_seq, skip_special_tokens=True)
                decoded_labels.append(decoded)
            except Exception as e:
                print(f"Warning: Failed to decode label: {e}")
                decoded_labels.append("")
        
        # Calculate only the two style metrics you want
        structured_count = sum(1 for pred in decoded_preds if check_structured_format(pred))
        bullet_count = sum(1 for pred in decoded_preds if check_bullet_format(pred))
        
        # Simple metrics
        metrics = {
            "structured_format_ratio": structured_count / len(decoded_preds),
            "bullet_format_ratio": bullet_count / len(decoded_preds),
            "num_samples": len(decoded_preds),
        }
        
        # Try to compute ROUGE (with fallback)
        try:
            rouge = evaluate.load("rouge")
            rouge_output = rouge.compute(
                predictions=decoded_preds,
                references=decoded_labels,
                rouge_types=["rouge1", "rouge2", "rougeL"],
            )
            metrics.update({
                "rouge1": round(rouge_output["rouge1"], 4),
                "rouge2": round(rouge_output["rouge2"], 4),
                "rougeL": round(rouge_output["rougeL"], 4),
            })
            print(f"✅ Successfully computed metrics for {len(decoded_preds)} samples")
        except Exception as e:
            print(f"⚠️ ROUGE computation failed: {e}")
            metrics["rouge_error"] = str(e)[:50]
        
        # Debug sample outputs
        if len(decoded_preds) > 0:
            print(f"📝 Sample prediction: {decoded_preds[0][:100]}...")
            print(f"📋 Sample label: {decoded_labels[0][:100]}...")
        
        return metrics
        
    except Exception as e:
        print(f"❌ Style metrics computation failed: {e}")
        return {
            "error": str(e)[:50],
            "num_samples": 0,
        }

In [21]:
from transformers import TrainingArguments
from trl import SFTTrainer, SFTConfig
import os 

sft_config = SFTConfig(
    # Basic training parameters
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=16,
    
    # Optimization
    learning_rate=2e-4,
    weight_decay=0.001,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    optim="adamw_torch",
    
    # Evaluation and saving
    eval_strategy="steps",
    eval_steps=30,
    save_strategy="steps",
    save_steps=30,
    save_total_limit=2,
    greater_is_better=False,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",

    # Evaluation settings
    eval_accumulation_steps=1,  # Process eval in smaller chunks
    prediction_loss_only=False,
    
    # Logging
    logging_dir="./logs",
    logging_strategy="steps",
    logging_steps=20,

    # assistant_only_loss=True,

    # Memory and performance
    dataloader_drop_last=True,
    dataloader_num_workers=4,
    remove_unused_columns=False,
    
    # Mixed precision training
    bf16=True if torch.cuda.is_bf16_supported() else False,
    
    # SFT-specific parameters
    max_length=1024,
    packing=True,  # Pack multiple short sequences into one
    dataset_text_field="text",
    
    # Gradient settings
    max_grad_norm=0.3,
    gradient_checkpointing=True,
)

In [22]:
# Create Trainer object
trainer = SFTTrainer(
    model=peft_model,
    args=sft_config,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    peft_config=lora_config,
    compute_metrics=compute_style_metrics,
    preprocess_logits_for_metrics=safe_preprocess_logits_for_metrics
)

Padding-free training is enabled, but the attention implementation is not set to 'flash_attention_2'. Padding-free training flattens batches into a single sequence, and 'flash_attention_2' is the only known attention mechanism that reliably supports this. Using other implementations may lead to unexpected behavior. To ensure compatibility, set `attn_implementation='flash_attention_2'` in the model configuration, or verify that your attention mechanism can handle flattened sequences.
You are using packing, but the attention implementation is not set to 'flash_attention_2' or 'kernels-community/vllm-flash-attn3'. Packing flattens batches into a single sequence, and Flash Attention is the only known attention mechanisms that reliably support this. Using other implementations may lead to cross-contamination between batches. To avoid this, either disable packing by setting `packing=False`, or set `attn_implementation='flash_attention_2'` or `attn_implementation='kernels-community/vllm-flash

In [23]:
train_result = trainer.train()

  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Structured Format Ratio,Bullet Format Ratio,Num Samples,Rouge1,Rouge2,Rougel,Entropy,Num Tokens,Mean Token Accuracy
30,1.112,0.640237,0.0,0.0,614,0.8026,0.6483,0.7602,0.643234,828119.0,0.846181
60,0.5514,0.511141,0.0,0.0,614,0.8354,0.6969,0.7988,0.50421,1651047.0,0.873085
90,0.5097,0.466889,0.0,0.0,614,0.8472,0.7129,0.8117,0.495526,2473316.0,0.881857
120,0.4432,0.441129,0.0,0.0,614,0.854,0.7242,0.8204,0.442003,3297876.0,0.887456
150,0.4465,0.430123,0.0,0.0,614,0.8579,0.7296,0.8243,0.425535,4122039.0,0.890209


✅ Successfully computed metrics for 614 samples
📝 Sample prediction: You are an expert radiologist assistant specializing in generating accurate and concise medical impr...
📋 Sample label: 给 You are an expert radiologist assistant specializing in generating accurate and concise medical im...


  return fn(*args, **kwargs)


✅ Successfully computed metrics for 614 samples
📝 Sample prediction: You are an expert radiologist assistant specializing in generating accurate and concise medical impr...
📋 Sample label: 给 You are an expert radiologist assistant specializing in generating accurate and concise medical im...


  return fn(*args, **kwargs)


✅ Successfully computed metrics for 614 samples
📝 Sample prediction: You are an expert radiologist assistant specializing in generating accurate and concise medical impr...
📋 Sample label: 给 You are an expert radiologist assistant specializing in generating accurate and concise medical im...


  return fn(*args, **kwargs)


✅ Successfully computed metrics for 614 samples
📝 Sample prediction: You are an expert radiologist assistant specializing in generating accurate and concise medical impr...
📋 Sample label: 给 You are an expert radiologist assistant specializing in generating accurate and concise medical im...


  return fn(*args, **kwargs)


✅ Successfully computed metrics for 614 samples
📝 Sample prediction: You are an expert radiologist assistant specializing in generating accurate and concise medical impr...
📋 Sample label: 给 You are an expert radiologist assistant specializing in generating accurate and concise medical im...


  return fn(*args, **kwargs)


### Save the adapter

In [24]:
def save_lora_adapter(trainer, save_path="./lora_adapter"):
    
    # Save the adapter
    trainer.model.save_pretrained(save_path)
    trainer.tokenizer.save_pretrained(save_path)
    
    print(f"LoRA adapter saved to: {save_path}")
    print(f"Adapter size: {get_directory_size(save_path):.2f} MB")
    
    return save_path

In [25]:
def get_directory_size(path):
    total = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total += os.path.getsize(fp)
    return total / (1024 * 1024)

In [26]:
adapter_path = save_lora_adapter(trainer)

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


LoRA adapter saved to: ./lora_adapter
Adapter size: 51.97 MB


## Push to Huggingface

In [27]:
from huggingface_hub import HfApi, create_repo

In [28]:
token = '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'

In [29]:
api = HfApi(token=token)

In [30]:
repo_id="sabber/medphi-radiology-summary-adapter"

In [31]:
create_repo(repo_id=repo_id, token=token, exist_ok=True)

RepoUrl('https://huggingface.co/sabber/medphi-radiology-summary-adapter', endpoint='https://huggingface.co', repo_type='model', repo_id='sabber/medphi-radiology-summary-adapter')

In [32]:
files_to_upload = [
    "adapter_config.json",
    "adapter_model.safetensors",  # or adapter_model.bin
    "tokenizer.json",
    "tokenizer_config.json",
    "special_tokens_map.json"
]

for file in files_to_upload:
    file_path = os.path.join(adapter_path, file)
    if os.path.exists(file_path):
        api.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=file,
            repo_id=repo_id,
            token=token
        )
        print(f"Uploaded: {file}")

Uploaded: adapter_config.json


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Uploaded: adapter_model.safetensors
Uploaded: tokenizer.json
Uploaded: tokenizer_config.json
Uploaded: special_tokens_map.json


### Test model with trained adapter

In [33]:
os.listdir("./")

['lora_adapter',
 'logs',
 'results',
 '.ipynb_checkpoints',
 'data',
 '00_eda.ipynb',
 '02_preprocess_.ipynb',
 '03_impression_model_ft.ipynb',
 '04_model_evaluation.ipynb',
 'requirements.txt',
 'processed_data.zip']

In [34]:
os.listdir("./lora_adapter")

['tokenizer.json',
 'tokenizer.model',
 'added_tokens.json',
 'special_tokens_map.json',
 'tokenizer_config.json',
 'chat_template.jinja',
 'adapter_config.json',
 'adapter_model.safetensors',
 'README.md']

In [35]:
# import json

In [36]:
# # 1. First, let's check the current adapter config
# with open("./lora_adapter/adapter_config.json", "r") as f:
#     adapter_config = json.load(f)

# print("Current adapter config:")
# print(adapter_config)

# # 2. Add the missing base model path if it's not there
# if "base_model_name_or_path" not in adapter_config or adapter_config["base_model_name_or_path"] is None:
#     adapter_config["base_model_name_or_path"] = "microsoft/MediPhi-Instruct"
    
#     # Save the fixed config
#     with open("./lora_adapter/adapter_config.json", "w") as f:
#         json.dump(adapter_config, f, indent=2)
    
#     print("✅ Fixed adapter_config.json with base model path")
# else:
#     print("✅ Base model path already exists")

In [37]:
# from peft import AutoPeftModelForCausalLM
# from transformers import AutoTokenizer

# ft_model = AutoPeftModelForCausalLM.from_pretrained(
#     "./lora_adapter",
#     torch_dtype="auto",
#     device_map="auto"
# )
# ft_tokenizer = AutoTokenizer.from_pretrained("./lora_adapter")