# AI agent prompt design for llama_cpp_canister

# Setup

## Verify we're in the Conda environment

In [None]:
import sys

print(sys.executable)

## Import python packages

In [6]:
import os
import sys
import json
import base64
import io
from dotenv import load_dotenv
import requests
import pprint
from pathlib import Path
import subprocess
import jupyter_black
import textwrap

# Activate the jupyter_black extension, which reformats code cells with black
# https://github.com/n8henrie/jupyter-black
jupyter_black.load()

In [None]:
# Define where the llama-cli is located, relative to this notebook
# LLAMA_CLI_PATH = "../../ggerganov_llama_b841d0.cpp/llama-cli" # Current llama_cpp_canister version
LLAMA_CLI_PATH = "../../ggerganov_llama_latest.cpp/build/bin/llama-cli"

# Select a model to use
# MODEL = "../models/Qwen/Qwen2.5-0.5B-Instruct-GGUF/qwen2.5-0.5b-instruct-q8_0.gguf"
# MODEL = "../models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q8_0.gguf"
# MODEL = (
#     "../models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q4_K_M.gguf"
# )
MODEL = "../models/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf"
# MODEL = "../models/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q2_K.gguf"


def run_llama_cpp(
    prompt,
    num_tokens,
    seed,
    temp,
    # top_k,
    # top_p,
    # min_p,
    # tfs,
    # typical,
    # mirostat,
    # mirostat_lr,
    # mirostat_ent,
):

    command = [
        LLAMA_CLI_PATH,
        "-m",
        MODEL,
        "--no-warmup", # needed when running from CLI. Is default for llama_cpp_canister
        "-no-cnv", # needed when running from CLI. Is default for llama_cpp_canister
        # "--simple-io",
        # "--no-display-prompt",  # only return the generated text, without special characters
        "-sp",  # output special tokens
        "-n",
        f"{num_tokens}",
        "--seed",
        f"{seed}",
        "--temp",
        f"{temp}",
        # "--top-k",
        # f"{top_k}",
        # "--top-p",
        # f"{top_p}",
        # "--min-p",
        # f"{min_p}",
        # "--tfs",
        # f"{tfs}",
        # "--typical",
        # f"{typical}",
        # "--mirostat",
        # f"{mirostat}",
        # "--mirostat-lr",
        # f"{mirostat_lr}",
        # "--mirostat-ent",
        # f"{mirostat_ent}",
        "-p",
        prompt,
    ]

    # Print the command on a single line for terminal use, preserving \n
    print(
        "\nCommand:\n",
        f"{LLAMA_CLI_PATH} -m {MODEL} --no-warmup -no-cnv -sp -n {num_tokens} --seed {seed} --temp {temp} -p '{prompt}'".replace(
            "\n", "\\n"
        ),
    )

    # Run the command and capture the output
    result = subprocess.run(
        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
    output = result.stdout
    return output


seed = 42
num_tokens = 1024
temp = 0.7
# top_k = 50
# top_p = 0.95
# min_p = 0.05
# tfs = 0.9
# typical = 0.9
# mirostat = 2
# mirostat_lr = 0.1
# mirostat_ent = 5.0

prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nWhat is the Proof-of-AI-Work Protocol?<|im_end|>\n<|im_start|>assistant\n"
response = run_llama_cpp(
    prompt,
    num_tokens,
    seed,
    temp,
    # top_k,
    # top_p,
    # min_p,
    # tfs,
    # typical,
    # mirostat,
    # mirostat_lr,
    # mirostat_ent,
)

import textwrap

print("\nprompt:\n", textwrap.fill(prompt, width=80))
print("\nresponse:\n", textwrap.fill(response, width=80))