<a href="https://colab.research.google.com/github/mzwing/AI-related/blob/master/notbooks/stable-diffusion.cpp/SDXL_Lightning_GGUF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U --force-reinstall huggingface_hub[hf_transfer]

In [1]:
# prepare
!rm -rf sample_data
!mkdir -p SDXL-Lightning-GGUF

In [None]:
!huggingface-cli login

In [None]:
# get original model
!sudo apt-get install aria2 -y
!mkdir -p SDXL-Lightning
!echo -e "https://huggingface.co/ByteDance/SDXL-Lightning/resolve/main/sdxl_lightning_1step_x0.safetensors?download=true\n out=sdxl_lightning_1step_x0.safetensors\nhttps://huggingface.co/ByteDance/SDXL-Lightning/resolve/main/sdxl_lightning_2step.safetensors?download=true\n out=sdxl_lightning_2step.safetensors\nhttps://huggingface.co/ByteDance/SDXL-Lightning/resolve/main/sdxl_lightning_4step.safetensors?download=true\n out=sdxl_lightning_4step.safetensors\nhttps://huggingface.co/madebyollin/sdxl-vae-fp16-fix/resolve/main/sdxl_vae.safetensors?download=true\n out=sdxl_vae.safetensors" > download.txt
!aria2c -c -x16 -d SDXL-Lightning --input-file=download.txt
!rm -rf download.txt

In [None]:
# get compile result (CPU)
!aria2c -c -x16 https://github.com/MZWNET/actions/releases/download/sd-master-48bcce4/sd-master-48bcce4-bin-linux-avx2-openblas-x64.zip
!unzip sd-master-48bcce4-bin-linux-avx2-openblas-x64.zip -d .
!rm -rf sd-master-48bcce4-bin-linux-avx2-openblas-x64.zip

In [None]:
# prepare for GPU
!sudo apt-get install nvidia-cuda-toolkit -y

In [None]:
# get compile result (GPU)
!aria2c -c -x16 https://github.com/MZWNET/actions/releases/download/sd-master-48bcce4/sd-master-48bcce4-bin-linux-avx2-cublas-cu121-x64.zip
!unzip sd-master-48bcce4-bin-linux-avx2-cublas-cu121-x64.zip -d .
!rm -rf sd-master-48bcce4-bin-linux-avx2-cublas-cu121-x64.zip

In [None]:
# quantise
import concurrent.futures
import os
from tqdm import tqdm

parameters = [ "q8_0", "q5_1", "q5_0", "q4_1", "q4_0" ]
models = [ "sdxl_lightning_1step_x0.safetensors" ]

tasks = [(model, param) for model in models for param in parameters]

def run_command(task):
    model, param = task
    model_name = model.split('.')[0]
    os.system(f"./sd -M convert -m SDXL-Lightning/{model} --vae SDXL-Lightning/sdxl_vae.safetensors -o SDXL-Lightning-GGUF/{model_name}.{param}.gguf --type {param}")

with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    list(tqdm(executor.map(run_command, tasks), total=len(tasks)))

In [None]:
# check model weights
!ls -alh SDXL-Lightning-GGUF

In [None]:
# test
!./sd -m SDXL-Lightning-GGUF/sdxl_lightning_1step_x0.q8_0.gguf -p "1girl, extremely beautiful, miniskirt, nsfw" -H 1024 -W 1024 --cfg-scale 1 --steps 1 -s -1 -v -o test.png

In [None]:
# upload model
!HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload --repo-type model --commit-message "GGUF model commit (made with stable-diffusion.cpp commit 583cc5b)" SDXL-Lightning-GGUF ./SDXL-Lightning-GGUF