<a href="https://colab.research.google.com/github/mzwing/AI-related/blob/master/notebooks/AquilaChat2_7B_16K_GGUF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# prepare
!rm -rf sample_data
!mkdir -p AquilaChat2-7B-16K-GGUF

In [None]:
# get original model
!sudo apt-get install aria2 -y
!GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/BAAI/AquilaChat2-7B-16K --depth 1
!cd AquilaChat2-7B-16K && rm -rf pytorch_model-00001-of-00003.bin pytorch_model-00002-of-00003.bin pytorch_model-00003-of-00003.bin .git

!echo -e "https://huggingface.co/BAAI/AquilaChat2-7B-16K/resolve/main/pytorch_model-00001-of-00003.bin?download=true\n out=pytorch_model-00001-of-00003.bin\nhttps://huggingface.co/BAAI/AquilaChat2-7B-16K/resolve/main/pytorch_model-00002-of-00003.bin?download=true\n out=pytorch_model-00002-of-00003.bin\nhttps://huggingface.co/BAAI/AquilaChat2-7B-16K/resolve/main/pytorch_model-00003-of-00003.bin?download=true\n out=pytorch_model-00003-of-00003.bin" > download.txt
!aria2c -c -x16 -d AquilaChat2-7B-16K --input-file=download.txt

!rm -rf download.txt

In [None]:
# prepare for llama.cpp quantise
!pip3 install sentencepiece gguf
!mkdir -p AquilaChat2-7B-16K
!git clone https://github.com/ggerganov/llama.cpp --depth 1

In [None]:
# convert to fp32
!cd llama.cpp && python3 ./convert.py --outtype f32 --outfile ../AquilaChat2-7B-16K-GGUF/AquilaChat2-7B-16K.F32.gguf ../AquilaChat2-7B-16K/ --ctx 16384 --vocab-type hfft

In [None]:
# convert to fp16
!cd llama.cpp && python3 ./convert.py --outtype f16 --outfile ../AquilaChat2-7B-16K-GGUF/AquilaChat2-7B-16K.F16.gguf ../AquilaChat2-7B-16K/ --ctx 16384 --vocab-type hfft

In [None]:
# uninstall to save space
!pip3 uninstall sentencepiece gguf -y
!rm -rf AquilaChat2-7B-16K llama.cpp

In [None]:
# rm original model
!rm -rf AquilaChat2-7B-16K

In [None]:
# compile
!cd llama.cpp && mkdir build && cd build && cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DLLAMA_AVX2=ON -DCMAKE_BUILD_TYPE=Release && cmake --build . --config Release
!cp llama.cpp/build/bin/quantize ./

# store
!mkdir -p /content/drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!mkdir /content/drive/MyDrive/llama.cpp-cache/
!cp quantize /content/drive/MyDrive/llama.cpp-cache/

drive.flush_and_unmount()

In [None]:
# get compile result
!mkdir -p /content/drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!cp /content/drive/MyDrive/llama.cpp-cache/quantize ./

drive.flush_and_unmount()

!chmod +x quantize

In [None]:
# quantize
import concurrent.futures
import os
from tqdm import tqdm

parameters = [ "Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0", "Q3_K_L", "Q3_K_M", "Q3_K_S", "Q2_K" ]

def run_command(param):
    os.system(f"cd AquilaChat2-7B-16K-GGUF/ && ../quantize AquilaChat2-7B-16K.F16.gguf AquilaChat2-7B-16K.{param}.gguf {param}")

# 使用ThreadPoolExecutor创建一个线程池，最大线程数为5
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    # 利用map函数，将命令列表和函数进行匹配执行
    list(tqdm(executor.map(run_command, parameters), total=len(parameters)))

In [None]:
import concurrent.futures
from huggingface_hub import HfApi, create_repo

# create repo
# create_repo("mzwing/AquilaChat2-7B-16K-GGUF")

# http upload
api = HfApi()

parameters = [ "F32", "F16", "Q8_0", "Q6_K", "Q5_K_M", "Q5_K_S", "Q5_0", "Q4_K_M", "Q4_K_S", "Q4_0", "Q3_K_L", "Q3_K_M", "Q3_K_S", "Q2_K" ]

def upload(params):
    print(f"Uploading {params}...")
    api.upload_file(
        path_or_fileobj=f"AquilaChat2-7B-16K-GGUF/AquilaChat2-7B-16K.{params}.gguf",
        path_in_repo=f"AquilaChat2-7B-16K.{params}.gguf",
        repo_id="mzwing/AquilaChat2-7B-16K-GGUF",
    )
    print(f"Finished uploading {params}.")

with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    # Directly consume the iterator to ensure all futures are executed
    list(executor.map(upload, parameters))

In [None]:
# git merge history
!git config --global credential.helper store
!huggingface-cli login
!git config --global user.email 'mzwing@mzwing.eu.org'
!git config --global user.name 'mzwing'
!rm -rf AquilaChat2-7B-16K-GGUF/
!GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/mzwing/AquilaChat2-7B-16K-GGUF
!cd AquilaChat2-7B-16K-GGUF && git lfs install
!huggingface-cli lfs-enable-largefiles AquilaChat2-7B-16K-GGUF/
!cd AquilaChat2-7B-16K-GGUF/ && git branch backup-main
!cd AquilaChat2-7B-16K-GGUF/ && git checkout --orphan new-main
!cd AquilaChat2-7B-16K-GGUF/ && git add -A
!cd AquilaChat2-7B-16K-GGUF/ && git commit -m "GGUF model commit (made with llama.cpp commit 26d6076)"
!cd AquilaChat2-7B-16K-GGUF/ && git branch -D main
!cd AquilaChat2-7B-16K-GGUF/ && git branch -m main
!cd AquilaChat2-7B-16K-GGUF/ && git push -f origin main