# Setup

## Verify we're in the Conda environment

In [None]:
import sys

print(sys.executable)

## Import python packages

In [12]:
import os
import sys
import json
from PIL import Image
import base64
import io
from dotenv import load_dotenv
import requests
import pprint
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import subprocess
import jupyter_black

# Activate the jupyter_black extension, which reformats code cells with black
# https://github.com/n8henrie/jupyter-black
jupyter_black.load()

In [None]:
import hashlib


def calculate_hash_seed(sentence):
    # Encode the sentence to bytes
    sentence_bytes = sentence.encode("utf-8")

    # Create a hash object
    hash_object = hashlib.sha256()

    # Update the hash object with the bytes
    hash_object.update(sentence_bytes)

    # Get the hexadecimal representation of the hash
    hash_hex = hash_object.hexdigest()

    # Convert the hexadecimal hash to a decimal number
    hash_decimal = int(hash_hex, 16)

    # Limit the hash value to the range of a 32-bit signed integer
    seed = hash_decimal % (2**31 - 1)

    return seed


# Example usage
sentence = "This is a sample sentence."
seed_value = calculate_hash_seed(sentence)
print(f"The hash of the sentence as a seed is: {seed_value}")

In [None]:
import re


def filter_text(text):
    # Only keep the first line of the answer
    text = text.split("\n")[0]

    # Remove quotes from the answer. Both single and double quotes are removed.
    text = text.replace('"', "").replace("'", "")

    # Remove leading and trailing whitespaces
    text = text.strip()

    # Use regex to remove special characters
    text = re.sub(r"[^\x00-\x7F]+", "", text)

    # Remove backslash from the answer.
    text = text.replace("\\", "")

    # Remove [end of text]
    text = text.replace("[end of text]", "")

    return text


# Example usage
text = "Byzantine\u5171\u8bc6 ('Byzantine' \"consensus\")\n next line must be removed"
filtered_text = filter_text(text)
print(filtered_text)

In [None]:
# Define where the llama-cli is located, relative to this notebook
LLAMA_CLI_PATH = "../../../ggerganov_llama_615212.cpp/build/bin/llama-cli"
# LLAMA_CLI_PATH = "../../../ggerganov_llama_latest.cpp/build/bin/llama-cli"

# Select a model to use
MODEL = "../../../llama_cpp_canister/models/Qwen/Qwen2.5-0.5B-Instruct-GGUF/qwen2.5-0.5b-instruct-q8_0.gguf"
# MODEL = "../../../llama_cpp_canister/models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q8_0.gguf"
# MODEL = (
#     "../../../llama_cpp_canister/models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q4_K_M.gguf"
# )
# MODEL = "../../../llama_cpp_canister/models/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf"
# MODEL = "../../../llama_cpp_canister/models/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q2_K.gguf"

print_command = True
def run_llama_cpp(
    prompt,
    num_tokens,
    seed,
    temp,
    # top_k,
    # top_p,
    # min_p,
):
    command = [
        LLAMA_CLI_PATH,
        "-m",
        MODEL,
        "--no-warmup", # needed when running from CLI. Is default for llama_cpp_canister
        "-no-cnv", # needed when running from CLI. Is default for llama_cpp_canister
        "--simple-io",
        "--no-display-prompt",  # only return the generated text, without special characters
        # "-sp", # output special tokens
        "-n",
        f"{num_tokens}",
        "--seed",
        f"{seed}",
        "--temp",
        f"{temp}",
        # "--top-k",
        # f"{top_k}",
        # "--top-p",
        # f"{top_p}",
        # "--min-p",
        # f"{min_p}",
        "-p",
        prompt,
    ]

    # print this only once !
    global print_command
    if print_command:
        print_command = False
        # Print the command on a single line for terminal use, preserving \n
        print(
            "\nCommand:\n",
            f"{LLAMA_CLI_PATH} -m {MODEL} --no-warmup -no-cnv --simple-io --no-display-prompt -n {num_tokens} --seed {seed} --temp {temp} -p '{prompt}'".replace(
                "\n", "\\n"
            ),
        )

    # Run the command and capture the output
    result = subprocess.run(
        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
    output = result.stdout
    return output


# create_challenge_prompt = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nGenerate a question that can be answered by an LLM in 5 words or less.<|im_end|>\n<|im_start|>assistant\n"
# create_challenge_prompt = "<|im_start|>user\nGenerate a question that can be answered in 5 words or less.<|im_end|>\n<|im_start|>assistant\n"
# BAD create_challenge_prompt = "<|im_start|>user\nPlease create a question that can be answered with common knowledge.<|im_end|>\n<|im_start|>assistant\n"
# create_challenge_prompt = "<|im_start|>system\nYou are a quiz master.<|im_end|>\n<|im_start|>user\nGenerate 10 different questions that can be answered in 5 words or less. Return as JSON<|im_end|>\n<|im_start|>assistant\n"
# create_challenge_prompt = "<|im_start|>system\nYou are a quiz master.<|im_end|>\n<|im_start|>user\nAsk me 10 different questions that can be answered in 5 words or less. Return as JSON<|im_end|>\n<|im_start|>assistant\n"
# create_challenge_prompt = "<|im_start|>system\nYou are a quiz master.<|im_end|>\n<|im_start|>user\nAsk me 10 different questions that start with What, Who, Where, When, Why, How, Which, Can, Is, or Are. Also give me the answers. Return as JSON<|im_end|>\n<|im_start|>assistant\n"
# create_challenge_prompt = '<|im_start|>system\nYou are a quiz master.<|im_end|>\n<|im_start|>user\nAsk me 10 different questions that start with What, Who, Where, When, Why, How, Which or Can. Also give me the answers. Return as a JSON format as shown in this example:\n[{"question": "What is the capital of France?", "answer": "Paris"}, {"question": "Who first discovered the Americas?", "answer": "Christopher Columbus"}, {"question": "Where is the headquarters of NASA located?", "answer": "Houston, Texas"}, {"question": "When did the First World War start?", "answer": "1914"}, {"question": "What is the longest river in the world?", "answer": "Mississippis"}, {"question": "Which historical event is considered the turning point in the Civil War?", "answer": "The Battle of Gettysburg"}, {"question": "Which year was the first Olympics held?", "answer": "1896"}, {"question": "Which city in France was the birthplace of French President Fran√ßois Hollande?", "answer": "Paris"}, {"question": "What is the name of the largest planet in our solar system?", "answer": "Mercury"}, {"question": "Who is known as the \'King of Jazz\'?", "answer": "Louis Armstrong"}]\n<|im_end|>\n<|im_start|>assistant\n'
# create_challenge_prompt = "<|im_start|>system\nYou are a quiz master.<|im_end|>\n<|im_start|>user\nAsk me a question that starts with What, Who, Where, When, Why, How, Which or Can. Do NOT give me the answer.\n<|im_end|>\n<|im_start|>assistant\n"
# create_challenge_prompt = "<|im_start|>user\nAsk a question that can be answered with common knowledge. Do NOT give the answer.\n<|im_end|>\n<|im_start|>assistant\n"
create_challenge_prompt_starts_with = [
    "What",
    "Who",
    "Where",
    "When",
    "Why",
    "How",
    "Which",
    "Can",
]
create_challenge_num_tokens = 1024

create_challenge_temp = 0.7
# create_challenge_top_k = 50
# create_challenge_top_p = 0.95
# create_challenge_min_p = 0.05

create_challenge_seed = None
# create_challenge_seed = calculate_hash_seed(create_challenge_prompt)
# print(
#     f"create_challenge_prompt = {create_challenge_prompt} (seed = {create_challenge_seed})"
# )


challenge_topics = [
    "crypto",
    "nature",
    "space",
    "history",
    "science",
    "technology",
    "engineering",
    "math",
    "art",
    "music",
]
challenges = []
challenge_id = 0
for challenge_topic in challenge_topics:
    print("=====================")
    print(f"Generating questions about challenge_topic: {challenge_topic}")
    for i in range(len(create_challenge_prompt_starts_with)):
        # create_challenge_prompt = f"<|im_start|>user\nAsk a question that can be answered with common knowledge. Do NOT give the answer. Start the question with {create_challenge_prompt_starts_with[i]}\n<|im_end|>\n<|im_start|>assistant\n"
        create_challenge_prompt = f"<|im_start|>user\nAsk a question about {challenge_topic}, that can be answered with common knowledge. Do NOT give the answer. Start the question with {create_challenge_prompt_starts_with[i]}\n<|im_end|>\n<|im_start|>assistant\n"
        if create_challenge_seed is None:
            create_challenge_seed = calculate_hash_seed(create_challenge_prompt)
            print(
                f"create_challenge_prompt = {create_challenge_prompt} (seed = {create_challenge_seed})"
            )
            print("-------------------")
        challenge_question = run_llama_cpp(
            create_challenge_prompt,
            create_challenge_num_tokens,
            create_challenge_seed,
            create_challenge_temp,
            # create_challenge_top_k,
            # create_challenge_top_p,
            # create_challenge_min_p,
        )

        challenge_question = filter_text(challenge_question)

        # Continue (skip storing the question) if there is no question mark
        if "?" not in challenge_question:
            continue

        # Remove everything after the first question mark
        challenge_question = challenge_question.split("?")[0] + "?"

        create_challenge_seed = calculate_hash_seed(challenge_question)
        print(f"--\n{challenge_question} (seed = {create_challenge_seed})")

        challenges.append(
            {
                "challenge_id": str(challenge_id),
                "challenge_topic": challenge_topic,
                "challenge_question": challenge_question,
            }
        )
        challenge_id += 1

        # Continuously save the challenges to a JSON file
        # Specify the file path
        file_path = "1-challenges.json"

        # Write the list to a JSON file
        with open(file_path, "w") as file:
            json.dump(challenges, file, indent=4)