In [None]:
# Install llama.cpp
! git clone https://github.com/ggerganov/llama.cpp
! cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make
! pip install -r llama.cpp/requirements.txt

In [None]:
MODEL_ID = "MadMarx37/deepseek-coder-1.3b-python-peft"

# Download model
! curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
! apt-get install git-lfs
#the two commands above (bash and apt-get) might need sudo based on where you're running them
! git lfs install
! git clone https://huggingface.co/{MODEL_ID}

In [None]:
MODEL_NAME = MODEL_ID.split('/')[-1]

# Convert to fp16
fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"
! python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}

In [None]:
QUANTIZATION_METHODS = ["q5_k_m"]

for method in QUANTIZATION_METHODS:
    qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf"
    !./llama.cpp/quantize {fp16} {qtype} {method}

In [None]:
import os

model_list = [file for file in os.listdir(MODEL_NAME) if "gguf" in file]

prompt = input("Enter your prompt: ")
chosen_method = input("Name of the model (options: " + ", ".join(model_list) + "): ")

# Verify the chosen method is in the list
if chosen_method not in model_list:
    print("Invalid name")
else:
    qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf"
    !./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p "{prompt}"

In [None]:
! pip install -q huggingface_hub
from huggingface_hub import create_repo, HfApi
from google.colab import userdata

# Defined in the secrets tab in Google Colab
hf_token = userdata.get('huggingface')

api = HfApi()
username = "MadMarx37"

# Create empty repo
create_repo(
    repo_id = f"{username}/{MODEL_NAME}-GGUF",
    repo_type="model",
    exist_ok=True,
    token=hf_token
)

# Upload gguf files
api.upload_folder(
    folder_path=MODEL_NAME,
    repo_id=f"{username}/{MODEL_NAME}-GGUF",
    allow_patterns=f"*.gguf",
    token=hf_token
)