# Convert HuggingFace models to GGUF format

This notebook converts HuggingFace models to GGUF format that's supported
by llama.cpp. The notebook also supports downloading a model from HuggingFace
directly by setting the `download_model_id` param in Substratus.

Load the params provided by substratus

In [None]:
import json
from pathlib import Path

params = {}
params_path = Path("/content/params.json")
if params_path.is_file():
    with params_path.open("r", encoding="UTF-8") as params_file:
        params = json.load(params_file)
if 'name' not in params:
    raise Exception("Missing required param `name`")

name = params["name"]

output_path = params.get("output_path", "/content/model")

Download the model from huggingFace if `download_model_id` params is set. Otherwise
this expects HuggingFace model to be present at `/content/saved-model`

In [None]:
from huggingface_hub import snapshot_download

model_path = "/content/saved-model"

download_model_id = params.get("download_model_id")
if download_model_id:
    model_path = "/content/downloaded-model"
    snapshot_download(repo_id=download_model_id, local_dir=model_path,
                      local_dir_use_symlinks=False, revision="main")

Convert the model to GGUF 16 bit so it can be further used with `llama.cpp/example/quantize` tool

In [None]:
import os
# have to use this hack otherwise the python3 command won't work
os.environ["MODEL_PATH"] = model_path
outfile = f"{output_path}/{name}-f16.gguf"
os.environ["OUTFILE"] = outfile

! mkdir -p {output_path}
! ls -lash {model_path}
! python3 /content/llama.cpp/convert.py \
  --outfile $OUTFILE \
  --outtype f16 $MODEL_PATH

! ls -lash {output_path}

Upload the model if param `push_to_hub` was set

In [None]:
from huggingface_hub import HfApi
from pathlib import Path

push_to_hub = params.get("push_to_hub")
if push_to_hub:
    hf_api = HfApi()
    model_id = push_to_hub
    print(f"Creating HuggingFace repo {model_id}")
    hf_api.create_repo(model_id, exist_ok=True, repo_type="model")

def push_to_huggingface(file):
    hf_api.upload_file(
        path_or_fileobj=file,
        path_in_repo=Path(file).name,
        repo_id=model_id,
    )

In [None]:
if push_to_hub:
    push_to_huggingface(outfile)
    readme_path = Path(model_path) / "README.md"
    if readme_path.exists():
        push_to_huggingface(readme_path)

Optionally create additional quantized models

In [None]:
! quantize -h

In [None]:
quantize = params.get("quantize")
if quantize:
    quantize = [q.strip() for q in quantize.split(",")]
    for quantize_type in quantize:
        filename = f"{output_path}/{name}-{quantize_type}.gguf"
        os.environ["filename"] = filename
        os.environ["quantize_type"] = quantize_type
        print(f"Running {quantize_type} quantization and writing to {filename}")
        ! quantize $OUTFILE $filename $quantize_type
        if push_to_hub:
            push_to_huggingface(filename)
    ! ls -lash {output_path}