In [8]:
import yaml
import os
def to_yaml(data, filepath):
    with open(filepath, 'w') as file:
        yaml.dump(data, file, default_flow_style=False)

In [14]:
config_dir = "/workspace/logits-guided-merger/dev/configs/baselines/"
os.makedirs(config_dir, exist_ok=True)

In [10]:
base_model = "/workspace/models/Llama-3.2-3B/"
models = [
    "/workspace/models/llama-3.2-3b-wizard/",
    "/workspace/models/experts/llama-3.2-3b-wizard-expert-math-100k/checkpoint-264"
]
task_vector_dict = {
    "models": [
        {
            "model": model, 
            "parameters": {
                "weight": 0.5
            }
        } 
        for model in models
    ],
    "merge_method": "task_arithmetic",
    "base_model": base_model,
    "dtype": "bfloat16"
}

filepath = os.path.join(config_dir, "task_arithmetic.yaml")
to_yaml(task_vector_dict, filepath)

In [15]:
base_model = "/workspace/models/Llama-3.2-3B/"
models = [
    "/workspace/models/llama-3.2-3b-wizard/",
    "/workspace/models/experts/llama-3.2-3b-wizard-expert-math-100k/checkpoint-264"
]

top_ks = [0.1, 0.2, 0.3]
for top_k in top_ks:
    ties_dict = {
        "models": [{
            "model": model, 
            "parameters": {
                "weight": 0.5, 
                "density": 1 - top_k
            }
        } for model in models],
        "merge_method": "ties",
        "base_model": base_model
    }
    filename = "ties_topK_" + str(top_k) + ".yaml"
    filepath = os.path.join(config_dir, filename)
    to_yaml(ties_dict, filepath)

In [31]:
# base_model = "gpt2"
# models = ["gpt2", "gpt2"]
# top_ks = [0.5, 0.3, 0.1]
# lambs = range(5, 11, 1)
# for top_k in top_ks:
#     for lamb in lambs:
#         dare_ties_dict = {
#             "models": [{"model": model, "parameters": {"weight": lamb / 10, "density": 1 - top_k}} for model in models],
#             "merge_method": "dare_ties",
#             "base_model": "gpt2"
#         }
#         name = "dare_ties_topK_" + str(top_k) + "lamb_" + str(lamb) + ".yml"
#         to_yml(ties_dict, name)

In [16]:
# OUTPUT_PATH = "./merged"  # folder to store the result in
LORA_MERGE_CACHE = "./tmp"  # change if you want to keep these for some reason
# CONFIG_YML = "./examples/ties.yml"  # merge configuration file
COPY_TOKENIZER = True  # you want a tokenizer? yeah, that's what i thought
LAZY_UNPICKLE = False  # experimental low-memory model loader
LOW_CPU_MEMORY = False  # enable if you somehow have more VRAM than RAM+swap

In [17]:
import os
import torch
import yaml

from mergekit.config import MergeConfiguration
from mergekit.merge import MergeOptions, run_merge

for config_name in os.listdir(config_dir):
    CONFIG_YML = os.path.join(config_dir, config_name)
    OUTPUT_PATH = os.path.join(
        "/workspace/logits-guided-merger/results/baselines/",
        ".".join(config_name.split(".")[:-1])
    )
    with open(CONFIG_YML, "r", encoding="utf-8") as fp:
        merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp))

    run_merge(
        merge_config,
        out_path=OUTPUT_PATH,
        options=MergeOptions(
            lora_merge_cache=LORA_MERGE_CACHE,
            cuda=torch.cuda.is_available(),
            copy_tokenizer=COPY_TOKENIZER,
            lazy_unpickle=LAZY_UNPICKLE,
            low_cpu_memory=LOW_CPU_MEMORY,
        ),
    )
    print("Done: ", CONFIG_YML)

Warmup loader cache: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1997.29it/s]
Executing graph: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1526/1526 [00:21<00:00, 71.31it/s]


Done:  /workspace/logits-guided-merger/dev/configs/baselines/ties_topK_0.1.yaml


Warmup loader cache: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 48395.82it/s]
Executing graph: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1526/1526 [00:15<00:00, 99.97it/s]


Done:  /workspace/logits-guided-merger/dev/configs/baselines/ties_topK_0.3.yaml


Warmup loader cache: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 36792.14it/s]
Executing graph: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1526/1526 [00:15<00:00, 101.61it/s]


Done:  /workspace/logits-guided-merger/dev/configs/baselines/ties_topK_0.2.yaml


Warmup loader cache: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 56173.71it/s]
Executing graph: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1526/1526 [00:13<00:00, 116.76it/s]

Done:  /workspace/logits-guided-merger/dev/configs/baselines/task_arithmetic.yaml





In [12]:
from transformers import AutoTokenizer
import os
tokenizer = AutoTokenizer.from_pretrained(
    "/workspace/models/llama-3.2-3b-wizard/"
)

In [8]:
# tokenizer = AutoTokenizer.from_pretrained(
#     "/workspace/models/Llama-3.2-3B-Instruct/"
# )

In [15]:
for config_name in os.listdir(config_dir):
    CONFIG_YML = os.path.join(config_dir, config_name)
    OUTPUT_PATH = os.path.join(
        "/workspace/logits-guided-merger/results/baselines/",
        ".".join(config_name.split(".")[:-1])
    )
    tokenizer.save_pretrained(OUTPUT_PATH)