# Installs 

In [None]:
!git clone https://github.com/ggerganov/llama.cpp

Cloning into 'llama.cpp'...
remote: Enumerating objects: 24633, done.[K
remote: Counting objects: 100% (5178/5178), done.[K
remote: Compressing objects: 100% (273/273), done.[K
remote: Total 24633 (delta 5030), reused 4946 (delta 4903), pack-reused 19455[K
Receiving objects: 100% (24633/24633), 44.34 MiB | 8.82 MiB/s, done.
Resolving deltas: 100% (17466/17466), done.


In [None]:
!cd llama.cpp && LLAMA_CUBLAS=1 make && pip install -r requirements/requirements-convert-hf-to-gguf.txt

In [None]:
!apt-get update;
!wget https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda-repo-ubuntu1604-10-0-local-10.0.130-410.48_1.0-1_amd64 -O cuda-repo-ubuntu1604-10-0-local-10.0.130-410.48_1.0-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1604-10-0-local-10.0.130-410.48_1.0-1_amd64.deb
!apt-key add /var/cuda-repo-10-0-local/7fa2af80.pub
!apt-get update
!apt-get -y install gcc-7 g++-7
!apt-get -y install cuda

!export PATH=/usr/local/cuda/bin${PATH:+:${PATH}}
!export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}

# Download Model from HF

##### To save the model for transformation, it is necessary to use either `save_pretrain` for a pre-trained model or merge it with a checkpoint.

In [None]:
from huggingface_hub import snapshot_download

model_name = "paulo037/stable-code-instruct-3b-spider2-1500-steps"
base_model = "./original_model/"
quantized_path = "./quantized_model/"

In [None]:
snapshot_download(repo_id=model_name, local_dir=base_model , local_dir_use_symlinks=False)
original_model = quantized_path+'/FP16.gguf'

In [None]:
!mkdir ./quantized_model/

# Convert Model to GGUF

In [None]:
!python llama.cpp/convert-hf-to-gguf.py ./original_model/ --outtype f16 --outfile ./quantized_model/FP16.gguf

In [None]:
import os

## Quantize models

In [None]:
methods = ["Q4_K"]

for m in methods:
    qtype = f"{quantized_path}{m.upper()}.gguf"
    os.system("./llama.cpp/quantize "+quantized_path+"FP16.gguf "+qtype+" "+m)

./quantized_model/Q4_K.gguf
./llama.cpp/quantize ./quantized_model/FP16.gguf ./quantized_model/Q4_K.gguf Q4_K


# Push GGUF Model to HF

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from huggingface_hub import HfApi, HfFolder, create_repo, upload_file

In [None]:
repo_name = "stable-code-instruct-3b-spider-1500-steps"  # Desired HF Hub repository name
repo_url = create_repo(repo_name, private=False)

In [None]:
api = HfApi()

models = [
    {
        "path": "./quantized_model/FP16.gguf",
        "repo_path": "Q4_K.gguf"
    },
    {
        "path": "./quantized_model/Q4_K.gguf",
        "repo_path": "Q4_K.gguf"
    },
]

for archive in models:
    api.upload_file(
        path_or_fileobj= archive['path'],
        path_in_repo= archive['repo_path'],
        repo_id=repo_name,
        repo_type="model",
    )

Q4_K.gguf:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/paulo037/stable-code-instruct-3b-spider-1500-steps-Q4/commit/d1bf2b3d37de0aca7588a755281ae7b212a7b598', commit_message='Upload Q4_K.gguf with huggingface_hub', commit_description='', oid='d1bf2b3d37de0aca7588a755281ae7b212a7b598', pr_url=None, pr_revision=None, pr_num=None)