# Installation MergeKit

In [1]:
!git clone https://github.com/cg123/mergekit.git
!cd mergekit && pip install -U -q -e .

Cloning into 'mergekit'...
remote: Enumerating objects: 939, done.[K
remote: Counting objects: 100% (399/399), done.[K
remote: Compressing objects: 100% (172/172), done.[K
remote: Total 939 (delta 300), reused 291 (delta 225), pack-reused 540[K
Receiving objects: 100% (939/939), 251.48 KiB | 6.13 MiB/s, done.
Resolving deltas: 100% (630/630), done.
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m381.9/381.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m10

# Config MergeKit
- layer_range: [0, 48] / 10.7B Model`s layer range
- tokenizer_source: union / Using base model`s tokenizer

In [2]:
import yaml

MODEL_NAME = "SOLAR-10.7B-slerp"
yaml_config = """
slices:
  - sources:
      - model: LDCC/LDCC-SOLAR-10.7B
        layer_range: [0, 48]
      - model: upstage/SOLAR-10.7B-Instruct-v1.0
        layer_range: [0, 48]
merge_method: slerp
base_model: upstage/SOLAR-10.7B-Instruct-v1.0
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5
tokenizer_source: union
dtype: float16

"""

# Save config as yaml file
with open('config.yaml', 'w', encoding="utf-8") as f:
    f.write(yaml_config)

In [3]:
# !rm -rf merge

In [8]:
!mergekit-yaml config.yaml merge --out-shard-size 1B

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
100% 2/2 [00:00<00:00,  4.99it/s]
Fetching 11 files: 100% 11/11 [00:00<00:00, 1020.58it/s]
Fetching 11 files: 100% 11/11 [00:00<00:00, 5758.53it/s]
100% 435/435 [12:56<00:00,  1.78s/it]


# Upload to huggingface

In [9]:

!pip install -qU huggingface_hub

from huggingface_hub import ModelCard, ModelCardData
from jinja2 import Template

username = "SJ-Donald"

template_text = """
---
license: cc-by-nc-4.0
tags:
- merge
- mergekit
- lazymergekit
{%- for model in models %}
- {{ model }}
{%- endfor %}
---

# {{ model_name }}

{{ model_name }} is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):

{%- for model in models %}
* [{{ model }}](https://huggingface.co/{{ model }})
{%- endfor %}

## 🧩 Configuration

```yaml
{{- yaml_config -}}
```
"""

# Create a Jinja template object
jinja_template = Template(template_text.strip())

# Get list of models from config
data = yaml.safe_load(yaml_config)
if "models" in data:
    models = [data["models"][i]["model"] for i in range(len(data["models"])) if "parameters" in data["models"][i]]
elif "parameters" in data:
    models = [data["slices"][0]["sources"][i]["model"] for i in range(len(data["slices"][0]["sources"]))]
elif "slices" in data:
    models = [data["slices"][i]["sources"][0]["model"] for i in range(len(data["slices"]))]
else:
    raise Exception("No models or slices found in yaml config")

# Fill the template
content = jinja_template.render(
    model_name=MODEL_NAME,
    models=models,
    yaml_config=yaml_config,
    username=username,
)

# Save the model card
card = ModelCard(content)
card.save('merge/README.md')

In [10]:
from google.colab import userdata
from huggingface_hub import HfApi

username = "SJ-Donald"
token = userdata.get('HF_WRITE_TOKEN')

# Defined in the secrets tab in Google Colab
api = HfApi(token=token)

# api.create_repo(
#     repo_id=f"{username}/{MODEL_NAME}",
#     repo_type="model"
# )
api.upload_folder(
    repo_id=f"{username}/{MODEL_NAME}",
    folder_path="merge",
)

model-00002-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00003-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00001-of-00012.safetensors:   0%|          | 0.00/1.90G [00:00<?, ?B/s]

model-00004-of-00012.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00012.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

Upload 12 LFS files:   0%|          | 0/12 [00:00<?, ?it/s]

model-00006-of-00012.safetensors:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

model-00007-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00012.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00009-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00010-of-00012.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00011-of-00012.safetensors:   0%|          | 0.00/1.94G [00:00<?, ?B/s]

model-00012-of-00012.safetensors:   0%|          | 0.00/201M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SJ-Donald/SOLAR-10.7B-slerp/commit/4794c864c5d7ae35220e5c5d4c1dc340acc4785e', commit_message='Upload folder using huggingface_hub', commit_description='', oid='4794c864c5d7ae35220e5c5d4c1dc340acc4785e', pr_url=None, pr_revision=None, pr_num=None)