In [4]:
# actually do merge
import torch
import yaml

from mergekit.config import MergeConfiguration
from mergekit.merge import MergeOptions, run_merge

In [5]:
OUTPUT_PATH = "../models/merged-test"  # folder to store the result in
LORA_MERGE_CACHE = "/tmp"  # change if you want to keep these for some reason
CONFIG_YML = "slerp-config.yaml"  # merge configuration file
COPY_TOKENIZER = True  # you want a tokenizer? yeah, that's what i thought
LAZY_UNPICKLE = False  # experimental low-memory model loader
LOW_CPU_MEMORY = False  # enable if you somehow have more VRAM than RAM+swap

In [6]:
with open(CONFIG_YML, "r", encoding="utf-8") as fp:
    merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp))

options=MergeOptions(
	lora_merge_cache=LORA_MERGE_CACHE,
	# cuda=torch.cuda.is_available(),
	copy_tokenizer=COPY_TOKENIZER,
	lazy_unpickle=LAZY_UNPICKLE,
	low_cpu_memory=LOW_CPU_MEMORY,
)

In [7]:
import importlib
import importlib.resources
import logging
import os
import shutil
from collections import Counter
from typing import Optional

import tqdm
import transformers

from mergekit._data import chat_templates
from mergekit.architecture import ArchitectureInfo, get_architecture_info
from mergekit.card import generate_card
from mergekit.config import MergeConfiguration
from mergekit.graph import Executor
from mergekit.io.tasks import LoaderCache
from mergekit.options import MergeOptions
from mergekit.plan import MergePlanner
from mergekit.tokenizer import TokenizerInfo

In [8]:
from mergekit.merge import (
	_model_out_config
)

In [9]:
if options.random_seed is not None:
	transformers.trainer_utils.set_seed(options.random_seed)

if not merge_config.models and not merge_config.slices:
	raise RuntimeError("No output requested")

model_arch_info = [
	get_architecture_info(m.config(trust_remote_code=options.trust_remote_code))
	for m in merge_config.referenced_models()
]
if not options.allow_crimes:
	if not all(a == model_arch_info[0] for a in model_arch_info[1:]):
		raise RuntimeError(
			"Must specify --allow-crimes to attempt to mix different architectures"
		)
arch_info = model_arch_info[0]

In [11]:
# initialize loader cache and set options
loader_cache = LoaderCache()
loader_cache.setup(options=options)

# create config for output model
cfg_out = _model_out_config(
	merge_config, arch_info, trust_remote_code=options.trust_remote_code
)

# warm up loader cache
for model in (
	pbar := tqdm.tqdm(
		merge_config.referenced_models(),
		desc="Warmup loader cache",
		disable=options.quiet,
	)
):
	loader_cache.get(model)
del pbar

Warmup loader cache: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 665.92it/s]


In [12]:
planner = MergePlanner(
	merge_config,
	arch_info,
	options=options,
	out_model_config=cfg_out,
)

targets = planner.plan_to_disk(out_path=OUTPUT_PATH)

In [13]:
name = ""
for target in targets[:-1]:
	new_name = target.tensor_name.split(".")[:3]
	t_rev = target.tensor_task.t
	if new_name != name:
		print("---" * 20)
		name = new_name
	print(target.tensor_name, t_rev)

------------------------------------------------------------
model.embed_tokens.weight 0.5
------------------------------------------------------------
model.layers.0.input_layernorm.weight 0.5
model.layers.0.self_attn.q_proj.weight 0.0
model.layers.0.self_attn.k_proj.weight 0.0
model.layers.0.self_attn.v_proj.weight 0.0
model.layers.0.self_attn.o_proj.weight 0.0
model.layers.0.post_attention_layernorm.weight 0.5
model.layers.0.mlp.up_proj.weight 1.0
model.layers.0.mlp.gate_proj.weight 1.0
model.layers.0.mlp.down_proj.weight 1.0
------------------------------------------------------------
model.layers.1.input_layernorm.weight 0.5
model.layers.1.self_attn.q_proj.weight 0.06451612903225806
model.layers.1.self_attn.k_proj.weight 0.06451612903225806
model.layers.1.self_attn.v_proj.weight 0.06451612903225806
model.layers.1.self_attn.o_proj.weight 0.06451612903225806
model.layers.1.post_attention_layernorm.weight 0.5
model.layers.1.mlp.up_proj.weight 0.935483870967742
model.layers.1.mlp.gate