# AI agent prompt design for llama_cpp_canister

# Setup

## Verify we're in the Conda environment

In [None]:
import sys

print(sys.executable)

## Import python packages

In [14]:
import os
import sys
import json
import base64
import io
from dotenv import load_dotenv
import requests
import pprint
from pathlib import Path
import subprocess
import jupyter_black
import textwrap

# Activate the jupyter_black extension, which reformats code cells with black
# https://github.com/n8henrie/jupyter-black
jupyter_black.load()

In [15]:
# Helper function to run llama.cpp


def run_llama_cpp(
    llama_cli_path,
    model,
    prompt,
    num_tokens,
    seed,
    temp,
    # top_k,
    # top_p,
    # min_p,
    # tfs,
    # typical,
    # mirostat,
    # mirostat_lr,
    # mirostat_ent,
    repeat_penalty,
):

    command = [
        llama_cli_path,
        "-m",
        model,
        "--no-warmup",  # needed when running from CLI. Is default for llama_cpp_canister
        "-no-cnv",  # needed when running from CLI. Is default for llama_cpp_canister
        # "--simple-io",
        # "--no-display-prompt",  # only return the generated text, without special characters
        "-sp",  # output special tokens
        "-n",
        f"{num_tokens}",
        "--seed",
        f"{seed}",
        "--temp",
        f"{temp}",
        # "--top-k",
        # f"{top_k}",
        # "--top-p",
        # f"{top_p}",
        # "--min-p",
        # f"{min_p}",
        # "--tfs",
        # f"{tfs}",
        # "--typical",
        # f"{typical}",
        # "--mirostat",
        # f"{mirostat}",
        # "--mirostat-lr",
        # f"{mirostat_lr}",
        # "--mirostat-ent",
        # f"{mirostat_ent}",
        "--repeat-penalty",
        f"{repeat_penalty}",
        "-p",
        prompt,
    ]

    # Print the command on a single line for terminal use, preserving \n
    print(
        "\nCommand:\n",
        f"{llama_cli_path} -m {model} --no-warmup -no-cnv -sp -n {num_tokens} --seed {seed} --temp {temp} --repeat-penalty {repeat_penalty}  -p '{prompt}'".replace(
            "\n", "\\n"
        ),
    )

    subprocess.run(command, text=True)

In [None]:
# ################################################################ #
# Define where the llama-cli is located, relative to this notebook #
# ################################################################ #

# Using the latest version of llama.cpp
# LLAMA_CLI_PATH = "../../ggerganov_llama_latest.cpp/build/bin/llama-cli"

# lama.cpp git sha 615212 is used by icpp-pro 5.0.2 and later
LLAMA_CLI_PATH = "../../ggerganov_llama_615212.cpp/build/bin/llama-cli"

# lama.cpp git sha b841d0 is used by icpp-pro 5.0.1 and earlier
# LLAMA_CLI_PATH = "../../ggerganov_llama_b841d0.cpp/llama-cli" # Current llama_cpp_canister version



# ####################################################################### #
# Define the MODEL_TYPE and MODEL (location is relative to this notebook) #
# ####################################################################### #

# ------------------------------------------------------------------------------------------
# MODEL_TYPE = "gpt2"

# https://huggingface.co/tensorblock/gpt2-GGUF (124M)
# MODEL = "../models/tensorblock/gpt2-GGUF/gpt2-Q8_0.gguf"

# https://huggingface.co/tensorblock/gpt2-medium-GGUF (355M)
# MODEL = "../models/tensorblock/gpt2-medium-GGUF/gpt2-medium-Q8_0.gguf"

# https://huggingface.co/tensorblock/gpt2-large-GGUF (774M)
# MODEL = "../models/tensorblock/gpt2-medium-GGUF/gpt2-large-Q8_0.gguf"

# https://huggingface.co/PrunaAI/gpt2-GGUF-smashed (163M)
# MODEL = "../models/PrunaAI/gpt2-GGUF-smashed/gpt2.Q5_K_M.gguf"
# MODEL = "../models/PrunaAI/gpt2-GGUF-smashed/gpt2.Q8_0.gguf"

# ------------------------------------------------------------------------------------------
# MODEL_TYPE = "SmolLM2"

# https://huggingface.co/tensorblock/SmolLM2-135M-Instruct-GGUF
# MODEL = (
#     "../models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q4_K_M.gguf"
# )
# MODEL = "../models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q8_0.gguf"

# ------------------------------------------------------------------------------------------
MODEL_TYPE = "Qwen"

# https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF
MODEL = "../models/Qwen/Qwen2.5-0.5B-Instruct-GGUF/qwen2.5-0.5b-instruct-q8_0.gguf"

# ------------------------------------------------------------------------------------------
# MODEL_TYPE = "DeepSeek-R1-Distill-Qwen"
#
# https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
# MODEL = "../models/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf"

# ------------------------------------------------------------------------------------------
# MODEL_TYPE = "Llama-3.2"
#
# https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF
# MODEL = "../models/unsloth/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct-Q2_K.gguf"

# https://huggingface.co/unsloth/Llama-3.2-3B-Instruct-GGUF
# MODEL = "../models/unsloth/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q2_K.gguf"


seed = 42
num_tokens = 1024
temp = 0.7
# top_k = 50
# top_p = 0.95
# min_p = 0.05
# tfs = 0.9
# typical = 0.9
# mirostat = 2
# mirostat_lr = 0.1
# mirostat_ent = 5.0
repeat_penalty = 1.1

prompt = ""
# question = "What is the Internet Computer Protocol?"
question = "What is a blockchain?"
if MODEL_TYPE == "gpt2":
    prompt = f"{question}."
elif MODEL_TYPE in ["Qwen", "SmolLM2", "DeepSeek-R1-Distill-Qwen"]:
    prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"
elif MODEL_TYPE == "Llama-3.2":
    system_prompt = ""
    prompt = f"<|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
else:
    print(f"Model type {MODEL_TYPE} not recognized")
    exit(1)

print("\nprompt:\n", textwrap.fill(prompt, width=80))

run_llama_cpp(
    LLAMA_CLI_PATH,
    MODEL,
    prompt,
    num_tokens,
    seed,
    temp,
    # top_k,
    # top_p,
    # min_p,
    # tfs,
    # typical,
    # mirostat,
    # mirostat_lr,
    # mirostat_ent,
    repeat_penalty,
)