In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
from peft import PeftModel
import torch

In [2]:
print(torch.cuda.is_available())  # Should return True if CUDA is available
print(torch.cuda.device_count())  # Number of GPUs detected
print(torch.cuda.get_device_name(0))

True
1
NVIDIA GeForce GTX 1650


## Configurations

In [None]:
model_id = "mistralai/Mistral-7B-v0.1"
peft_model_gsm8k_id = "predibase/gsm8k"
peft_model_magicoder_id = "predibase/magicoder"
peft_model_gluecola_id = "predibase/glue_cola"
peft_model_hellaswag_id = "predibase/hellaswag"

adapter_name_gsm8k = "gsm8k"
adapter_name_magicoder = "magicoder"
adapter_name_gluecola = "glue_cola"
adapter_name_hellaswag = "hellaswag"

merged_adapter_name = "gsm8k_magicoder_gluecola_hellaswag_avg"

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    compute_dtype = torch.bfloat16 # Or torch.float16 depending on your GPU
else:
    compute_dtype = torch.float32

## Load model and Adaptors

In [5]:
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

In [8]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=compute_dtype,
    device_map={"":"cuda"}, # Automatically distributes across GPUs if available/needed
    # offload_folder='offload/'
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [12]:
model = PeftModel.from_pretrained(
    base_model,
    peft_model_gsm8k_id,
    adapter_name=adapter_name_gsm8k, # You can name the first adapter here
    # device_map="auto", # Apply device mapping here if needed
    low_cpu_mem_usage=True,
    offload_folder='offload/'
)
model.load_adapter(peft_model_magicoder_id, adapter_name=adapter_name_magicoder)
model.load_adapter(peft_model_gluecola_id, adapter_name=adapter_name_gluecola)
model.load_adapter(peft_model_hellaswag_id, adapter_name=adapter_name_hellaswag)
# model.to("cuda")



_IncompatibleKeys(missing_keys=['base_model.model.model.layers.0.self_attn.q_proj.lora_A.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_A.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_B.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_A.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_B.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_A.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_B.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model.layers.2.self_attn.q_proj.lora_A.gsm8k_magicoder_gluecola_hellaswag_avg.weight', 'base_model.model.model

In [13]:
model.add_weighted_adapter(
    adapters=[adapter_name_gsm8k, adapter_name_gluecola, adapter_name_magicoder, adapter_name_hellaswag],
    weights=[0.25, 0.25, 0.25, 0.25],
    adapter_name=merged_adapter_name,
    combination_type="svd" # 'linear' is the default for weighted sum
)

In [14]:
model.set_adapter(merged_adapter_name)

save_directory = f"weights/{merged_adapter_name}_svd"
print(f"\nSaving the merged adapter '{merged_adapter_name}' to {save_directory}...")
model.save_pretrained(save_directory, selected_adapters=[merged_adapter_name])
tokenizer.save_pretrained(save_directory) 


Saving the merged adapter 'gsm8k_magicoder_gluecola_hellaswag_avg' to weights/gsm8k_magicoder_gluecola_hellaswag_avg_svd...


('weights/gsm8k_magicoder_gluecola_hellaswag_avg_svd\\tokenizer_config.json',
 'weights/gsm8k_magicoder_gluecola_hellaswag_avg_svd\\special_tokens_map.json',
 'weights/gsm8k_magicoder_gluecola_hellaswag_avg_svd\\tokenizer.json')

## Testing

In [10]:
print("\n--- Testing the merged model ---")
# Make sure the merged adapter is active (we did this in step 5)
print(f"Current active adapter: {model.active_adapter}") # Verify it's the merged one

prompt_gsm8k = "What is 5 * 8 + 3?" # Example GSM8K style
prompt_magicoder = "def fibonacci(n):" # Example Magicoder style
prompt_gluecola = 'Determine if the sentence below is syntactically and semantically correct. If it is syntactically and semantically correct, respond "1". Otherwise, respond "0".\n\nSentence: Every senator seems to become more corrupt, as he talks to more lobbyists.\n\nLabel: '
prompt_hellaswag = 'You are provided with an incomplete passage below as well as 4 endings in quotes and separated by commas, with only one of them being the correct ending. Treat the endings as being labelled 0, 1, 2, 3 in order. Please respond with the number corresponding to the correct ending for the passage.\n\n### Passage: The mother instructs them on how to brush their teeth while laughing. The boy helps his younger sister brush his teeth. she\n\n### Endings: [\'shows how to hit the mom and then kiss his dad as well.\' \'brushes past the camera, looking better soon after.\' \'glows from the center of the camera as a reaction.\' \'gets them some water to gargle in their mouths.\']\n\n### Correct Ending Number: '
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

for prompt in [prompt_magicoder, prompt_hellaswag]:
    print(f"\nPrompt: {prompt}")
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device)
    with torch.no_grad(): 
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.7,
            pad_token_id=tokenizer.pad_token_id # Important for generation
            )
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Generated Output:\n{decoded_output}")


--- Testing the merged model ---
Current active adapter: magicoder_hellaswag_avg

Prompt: def fibonacci(n):




Generated Output:
def fibonacci(n):
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fibonacci(n-1) + fibonacci(n-2)

Prompt: You are provided with an incomplete passage below as well as 4 endings in quotes and separated by commas, with only one of them being the correct ending. Treat the endings as being labelled 0, 1, 2, 3 in order. Please respond with the number corresponding to the correct ending for the passage.

### Passage: The mother instructs them on how to brush their teeth while laughing. The boy helps his younger sister brush his teeth. she

### Endings: ['shows how to hit the mom and then kiss his dad as well.' 'brushes past the camera, looking better soon after.' 'glows from the center of the camera as a reaction.' 'gets them some water to gargle in their mouths.']

### Correct Ending Number: 
Generated Output:
You are provided with an incomplete passage below as well as 4 endings in quotes and separated by commas, with only one o