# Merge `peleke-mistral-7b-instruct-v0.2_merge` Fine-Tuned LoRA Weights with Base Model

In [1]:
import gc
import torch
# Clear any existing models from GPU memory
torch.cuda.empty_cache()
gc.collect()

# Check current GPU memory usage
print(f"GPU Memory before: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
print(f"GPU Memory reserved: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB reserved")
# If you have a model loaded, delete it first
try:
    del model
    torch.cuda.empty_cache()
    gc.collect()
    print("Previous model cleared from memory")
except:
    print("No previous model to clear")

GPU Memory before: 0.00 GB allocated
GPU Memory reserved: 0.00 GB reserved
No previous model to clear


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import torch

In [None]:
model_path = 'silicobio/peleke-mistral-7b-instruct-v0.2'
merged_model_output_dir = "./peleke-mistral-7b-instruct-v0.2_merged"

In [None]:
# Load configuration and tokenizer
config = PeftConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

In [None]:
# Set pad token if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    device_map="auto",  # Automatically handle device placement
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True
)

In [None]:
# IMPORTANT: Resize embeddings to match the fine-tuned vocabulary size
expected_vocab_size = 32005  # The fine-tuned model has 5 additional tokens
base_model.resize_token_embeddings(expected_vocab_size)

In [None]:
# Load PEFT adapters
model = PeftModel.from_pretrained(
    base_model, 
    model_path,
    is_trainable=False  # Set to False for inference
)

# model.eval()

In [10]:
## Merge Weights
model = model.merge_and_unload()

In [11]:
## Save Merged Model
model.save_pretrained(merged_model_output_dir)
tokenizer.save_pretrained(merged_model_output_dir)

('./peleke-mistral-7b-instruct-v0.2_merged/tokenizer_config.json',
 './peleke-mistral-7b-instruct-v0.2_merged/special_tokens_map.json',
 './peleke-mistral-7b-instruct-v0.2_merged/chat_template.jinja',
 './peleke-mistral-7b-instruct-v0.2_merged/tokenizer.json')