<a href="https://colab.research.google.com/github/mzwing/AI-related/blob/master/notebooks/bling_phi_2_v0_GGUF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# create repo
from huggingface_hub import create_repo
create_repo("mzwing/bling-phi-2-v0-GGUF")

!rm -rf sample_data
!mkdir -p bling-phi-2-v0-GGUF

In [None]:
# get original model
!sudo apt-get install aria2 -y
!GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/llmware/bling-phi-2-v0 --depth 1
!cd bling-phi-2-v0 && rm -rf pytorch_model.bin .git

!aria2c -c -x16 -d bling-phi-2-v0 https://huggingface.co/llmware/bling-phi-2-v0/resolve/main/pytorch_model.bin?download=true -o pytorch_model.bin

In [None]:
# prepare for llama.cpp quantise
!pip3 install sentencepiece gguf
!mkdir -p bling-phi-2-v0
# see this: https://huggingface.co/kroonen/phi-2-GGUF/discussions/1
!git clone https://github.com/ebeyabraham/llama.cpp --depth 1

In [None]:
# convert to fp32
!cd llama.cpp && python3 ./convert-hf-to-gguf.py --outtype f32 --outfile ../bling-phi-2-v0-GGUF/bling-phi-2-v0.F32.gguf ../bling-phi-2-v0/

In [None]:
# convert to fp16
!cd llama.cpp && python3 ./convert-hf-to-gguf.py --outtype f16 --outfile ../bling-phi-2-v0-GGUF/bling-phi-2-v0.F16.gguf ../bling-phi-2-v0/

In [None]:
# uninstall to save space
!pip3 uninstall sentencepiece gguf -y
!rm -rf bling-phi-2-v0 llama.cpp

In [None]:
# compile
!cd llama.cpp && mkdir build && cd build && cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DLLAMA_AVX2=ON -DCMAKE_BUILD_TYPE=Release && cmake --build . --config Release
!cp llama.cpp/build/bin/quantize ./

# store
!mkdir -p /content/drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!mkdir /content/drive/MyDrive/llama.cpp-cache/
!cp quantize /content/drive/MyDrive/llama.cpp-cache/

drive.flush_and_unmount()

In [None]:
# get compile result
!mkdir -p /content/drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!cp /content/drive/MyDrive/llama.cpp-cache/quantize ./

drive.flush_and_unmount()

!chmod +x quantize

In [None]:
# quantize
import concurrent.futures
import os
from tqdm import tqdm

parameters = [ "Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0", "Q3_K_L", "Q3_K_M", "Q3_K_S", "Q2_K" ]

def run_command(param):
    os.system(f"cd bling-phi-2-v0-GGUF/ && ../quantize bling-phi-2-v0.F16.gguf bling-phi-2-v0.{param}.gguf {param}")

# 使用ThreadPoolExecutor创建一个线程池，最大线程数为5
with concurrent.futures.ThreadPoolExecutor(max_workers=12) as executor:
    # 利用map函数，将命令列表和函数进行匹配执行
    list(tqdm(executor.map(run_command, parameters), total=len(parameters)))

In [None]:
# http upload
import concurrent.futures
from huggingface_hub import HfApi

api = HfApi()

parameters = [ "F32", "F16", "Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0", "Q3_K_L", "Q3_K_M", "Q3_K_S", "Q2_K" ]

def upload(params):
    api.upload_file(
        path_or_fileobj=f"bling-phi-2-v0-GGUF/bling-phi-2-v0.{params}.gguf",
        path_in_repo=f"bling-phi-2-v0.{params}.gguf",
        repo_id="mzwing/bling-phi-2-v0-GGUF",
    )

with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    # Directly consume the iterator to ensure all futures are executed
    list(executor.map(upload, parameters))

In [None]:
# git merge history
!git config --global credential.helper store
!huggingface-cli login
!git config --global user.email 'mzwing@mzwing.eu.org'
!git config --global user.name 'mzwing'
!rm -rf bling-phi-2-v0-GGUF/
!GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/mzwing/bling-phi-2-v0-GGUF
!cd bling-phi-2-v0-GGUF && git lfs install
!huggingface-cli lfs-enable-largefiles bling-phi-2-v0-GGUF/
!cd bling-phi-2-v0-GGUF/ && git branch backup-main
!cd bling-phi-2-v0-GGUF/ && git checkout --orphan new-main
!cd bling-phi-2-v0-GGUF/ && git add -A
!cd bling-phi-2-v0-GGUF/ && git commit -m "GGUF model commit (made with llama.cpp commit 26d6076)"
!cd bling-phi-2-v0-GGUF/ && git branch -D main
!cd bling-phi-2-v0-GGUF/ && git branch -m main
!cd bling-phi-2-v0-GGUF/ && git push -f origin main