# Setup

## Verify we're in the Conda environment

In [None]:
import sys

print(sys.executable)

## Import python packages

In [8]:
import os
import sys
import json
from PIL import Image
import base64
import io
from dotenv import load_dotenv
import requests
import pprint
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import subprocess
import jupyter_black

# Activate the jupyter_black extension, which reformats code cells with black
# https://github.com/n8henrie/jupyter-black
jupyter_black.load()

In [None]:
import re


def filter_text(text):
    # Only keep the first line of the answer
    text = text.split("\n")[0]

    # Remove quotes from the answer. Both single and double quotes are removed.
    text = text.replace('"', "").replace("'", "")

    # Remove leading and trailing whitespaces
    text = text.strip()

    # Use regex to remove special characters
    text = re.sub(r"[^\x00-\x7F]+", "", text)

    # Remove backslash from the answer.
    text = text.replace("\\", "")

    return text


def extract_mainer_id(text):
    text = filter_text(text)

    # Remove everything after the first whitespace
    text = text.split(" ")[0]

    # Remove everything after the first ':''
    text = text.split(":")[0]

    return text


# Example usage
text = "Byzantine\u5171\u8bc6 ('Byzantine' \"consensus\")\n next line must be removed"
filtered_text = filter_text(text)
print(filtered_text)

text = "abc-def-cai: Byzantine\u5171\u8bc6 ('Byzantine' \"consensus\")\n next line must be removed"
mainer_id = extract_mainer_id(text)
print(mainer_id)

In [10]:
def run_llama_cpp(
    prompt,
    num_tokens,
    seed,
    temp,
    top_k,
    top_p,
    min_p,
    tfs,
    # typical,
    # mirostat,
    # mirostat_lr,
    # mirostat_ent,
):
    # Define the command as a list
    command = [
        "../../../ggerganov_llama.cpp/llama-cli",
        "-m",
        "../../../llama_cpp_canister/models/Qwen/Qwen2.5-0.5B-Instruct-GGUF/qwen2.5-0.5b-instruct-q8_0.gguf",
        "--simple-io",
        "--no-display-prompt",  # only return the generated text, without special characters
        # "-sp", # output special tokens
        "-n",
        f"{num_tokens}",
        "--seed",
        f"{seed}",
        "--temp",
        f"{temp}",
        "--top-k",
        f"{top_k}",
        "--top-p",
        f"{top_p}",
        "--min-p",
        f"{min_p}",
        "--tfs",
        f"{tfs}",
        # "--typical",
        # f"{typical}",
        # "--mirostat",
        # f"{mirostat}",
        # "--mirostat-lr",
        # f"{mirostat_lr}",
        # "--mirostat-ent",
        # f"{mirostat_ent}",
        "-p",
        prompt,
    ]

    # Run the command and capture the output
    result = subprocess.run(
        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
    output = result.stdout
    return output

In [None]:
import json
import pprint

# Specify the file path
file_path = "2-mainer.json"

# Read the list from the JSON file
with open(file_path, "r") as file:
    challenges_with_answers = json.load(file)

print(f"challenges_with_answers have been read from the file: {file_path}")

pprint.pprint(challenges_with_answers[:3])

In [None]:
judge_num_tokens = 10
judge_seed = 42

# disable all these
judge_temp = 0.0
judge_top_k = 0  # (default: 40, 0 = disabled)
judge_top_p = 1.0  # (default: 0.9, 1.0 = disabled)
judge_min_p = 0.0  # (default: 0.1, 0.0 = disabled)
judge_tfs = 1.0  # (default: 1.0, 1.0 = disabled)


for challenge in challenges_with_answers[:1]:
    challenge_id = challenge["challenge_id"]
    challenge_topic = challenge["challenge_topic"]
    challenge_question = challenge["challenge_question"]
    mainer_answers = challenge["mainer_answers"]

    # Somehow judging works best for Qwen2.5 when I use:
    # <canister-id-1>: <answer>
    # <canister-id-2>: <answer>
    #
    best_mainer_answer = mainer_answers[0]
    best_mainer_answer["judged"] = True
    while True:
        found_next_mainer_answer = False
        for next_mainer_answer in mainer_answers[1:]:
            # We want to compare against an answer from a different mainer
            if next_mainer_answer["mainer_id"] == best_mainer_answer["mainer_id"]:
                continue
            # We want to compare against an answer that has not been judged yet
            if "judged" not in next_mainer_answer.keys():
                found_next_mainer_answer = True
                break

        # break the loop if no more mainer answers to judge for this challenge
        if not found_next_mainer_answer:
            break

        judge_content_best = f'{best_mainer_answer["mainer_id"]}: {filter_text(best_mainer_answer["mainer_answer"])}'
        judge_content_next = f'{next_mainer_answer["mainer_id"]}: {filter_text(next_mainer_answer["mainer_answer"])}'
        judge_content = (
            f"\n{judge_content_best}\njudge_content_next: {judge_content_next}\n"
        )

        print("-------------------")
        print(
            f"challenge_id: {challenge_id}, topic: {challenge_topic} - challenge_question: {challenge_question}"
        )

        # for judge_content in judge_contents:
        judge_prompt = f"""<|im_start|>user
        This is a question: {challenge_question}

        Select the best answer:

        {judge_content}

        <|im_end|>
        <|im_start|>assistant
        The best answer is:  
        """

        print(judge_prompt)
        print(" ")

        # for judge_temp in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
        # print(f"judge_temp: {judge_temp}")
        # for j in range(3):
        judge_answer = run_llama_cpp(
            judge_prompt,
            judge_num_tokens,
            judge_seed,
            judge_temp,
            judge_top_k,
            judge_top_p,
            judge_min_p,
            judge_tfs,
            # judge_typical,
            # judge_mirostat,
            # judge_mirostat_lr,
            # judge_mirostat_ent,
        )

        print(f"judge_answer (raw): {judge_answer}")

        judge_answer = filter_text(judge_answer)

        if judge_answer in judge_content_best:
            print(
                f"The best answer given by mAIner ID: {best_mainer_answer['mainer_id']}"
            )
        elif judge_answer in judge_content_next:
            print(
                f"The best answer given by mAIner ID: {next_mainer_answer['mainer_id']}"
            )
            best_mainer_answer = next_mainer_answer
        else:
            print(
                "Could not determine the best answer. Leaving the best answer unchanged."
            )

        # Always mark the next answer as judged
        next_mainer_answer["judged"] = True

In [None]:
#... THIS WORKED ... 
# REMOVE THIS CELL ONCE THE LOOP ABOVE IS WORKING

# judge_num_tokens = 10
# judge_seed = 1
# judge_temp = 0.2


# challenges_with_answers_and_scores = challenges_with_answers.copy()


# for challenge in challenges_with_answers_and_scores[:1]:
#     challenge_id = challenge["challenge_id"]
#     challenge_topic = challenge["challenge_topic"]
#     challenge_question = challenge["challenge_question"]
#     mainer_answers = challenge["mainer_answers"]

#     judge_content = ""
#     mainer_1 = None
#     mainer_2 = None
#     for mainer_answer in mainer_answers:
#         mainer_id = mainer_answer["mainer_id"]
#         mainer_try = mainer_answer["mainer_try"]
#         mainer_answer = mainer_answer["mainer_answer"]

#         if mainer_1 is None:
#             mainer_1 = mainer_id
#         elif mainer_2 is None:
#             mainer_2 = mainer_id
#         elif mainer_1 == mainer_id:
#             mainer_1 = mainer_id
#         elif mainer_2 == mainer_id:
#             mainer_2 = mainer_id
#         else:
#             mainer_1 = mainer_id
#             mainer_2 = None

#         print(f"mainer_id = {mainer_id}")
#         print(f"mainer_1  = {mainer_1}")
#         print(f"mainer_2  = {mainer_2}")
#         continue
#         judge_content += f"{mainer_id}_{mainer_try}: {mainer_answer}\n"

#     # Dynamically add the mainer_answers key to the challenge dictionary
#     # (Envisioning that the caller to the mainers will do it like this...)
#     challenge["mainer_answers"] = []

#     print("-------------------")
#     print(
#         f"challenge_id: {challenge_id}, topic: {challenge_topic} - challenge_question: {challenge_question}"
#     )

#     # GOOD !!
#     # judge_prompt = f"""<|im_start|>user
#     # This is a question: {challenge_question}

#     # Select the best answer:

#     # 0 - Byzantine fault tolerance
#     # 1 - cross-chain blockchain
#     # 2 - Parachain
#     # 3 - Hybrid blockchain

#     # <|im_end|>
#     # <|im_start|>assistant
#     # The best answer is:
#     # """

#     # GOOD !!
#     # judge_contents = [
#     #     f"""
#     # mAIner ID : Answer
#     # --------- : ------
#     # 000-AAA-XXX : Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990 : cross-chain blockchain
#     # blx-00x-cai : Parachain
#     # 9900-000-cai Hybrid blockchain
#     # xxxx-000-cai : Byzantine fault tolerance
#     # yyyy-9999-cai : Parlaychain
#     # abc-123-456 : Parallel chain
#     # """,
#     #     f"""
#     # mAIner ID : Answer
#     # --------- : ------
#     # 000-AAA-XXX : Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990 : cross-chain blockchain
#     # 9900-000-cai Hybrid blockchain
#     # xxxx-000-cai : Byzantine fault tolerance
#     # yyyy-9999-cai : Parlaychain
#     # abc-123-456 : Parallel chain
#     # """,
#     #     f"""
#     # mAIner ID : Answer
#     # --------- : ------
#     # 000-AAA-XXX : Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990 : cross-chain blockchain
#     # xxxx-000-cai : Byzantine fault tolerance
#     # yyyy-9999-cai : Parlaychain
#     # abc-123-456 : Parallel chain
#     # """,
#     # ]

#     # judge_contents = [
#     #     f"""
#     # 0: Byzantine fault tolerance
#     # 1: Multi-chain
#     # 2: cross-chain blockchain
#     # 3: Parachain
#     # 4: Hybrid blockchain
#     # 5: Byzantine fault tolerance
#     # 6: Parlaychain
#     # 7: Parallel chain
#     # """,
#     #     f"""
#     # 0: Byzantine fault tolerance
#     # 1: Multi-chain
#     # 2: cross-chain blockchain
#     # 3: Parachain
#     # 4: Hybrid blockchain
#     # 5: Byzantine fault tolerance
#     # 6: Parlaychain
#     # 7: Parallel chain
#     # """,
#     #     f"""
#     # 0: Byzantine fault tolerance
#     # 1: Multi-chain
#     # 2: cross-chain blockchain
#     # 3: Parachain
#     # 4: Hybrid blockchain
#     # 5: Byzantine fault tolerance
#     # 6: Parlaychain
#     # 7: Parallel chain
#     # """,
#     # ]

#     # judge_contents = [
#     #     f"""
#     # mAIner ID : Answer
#     # --------- : ------
#     # 000-AAA-XXX : Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990 : cross-chain blockchain
#     # blx-00x-cai : Parachain
#     # 9900-000-cai Hybrid blockchain
#     # xxxx-000-cai : Byzantine fault tolerance
#     # yyyy-9999-cai : Parlaychain
#     # abc-123-456 : Parallel chain
#     # """,
#     #     f"""
#     # mAIner ID : Answer
#     # --------- : ------
#     # 000-AAA-XXX : Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990 : cross-chain blockchain
#     # 9900-000-cai Hybrid blockchain
#     # xxxx-000-cai : Byzantine fault tolerance
#     # yyyy-9999-cai : Parlaychain
#     # abc-123-456 : Parallel chain
#     # """,
#     #     f"""
#     # mAIner ID : Answer
#     # --------- : ------
#     # 000-AAA-XXX : Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990 : cross-chain blockchain
#     # xxxx-000-cai : Byzantine fault tolerance
#     # yyyy-9999-cai : Parlaychain
#     # abc-123-456 : Parallel chain
#     # """,
#     # ]
#     # judge_contents = [
#     #     f"""
#     # 000-AAA-XXX: Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990: cross-chain blockchain
#     # blx-00x-cai: Parachain
#     # 9900-000-cai: Hybrid blockchain
#     # xxxx-000-cai: Byzantine fault tolerance
#     # yyyy-9999-cai: Parlaychain
#     # abc-123-456: Parallel chain
#     # """,
#     #     f"""
#     # 000-AAA-XXX: Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990: cross-chain blockchain
#     # 9900-000-cai: Hybrid blockchain
#     # xxxx-000-cai: Byzantine fault tolerance
#     # yyyy-9999-cai: Parlaychain
#     # abc-123-456: Parallel chain
#     # """,
#     #     f"""
#     # 000-AAA-XXX: Byzantine fault tolerance
#     # 345-aaa-cai: Multi-chain
#     # bcd-990: cross-chain blockchain
#     # 9900-000-cai: Hybrid blockchain
#     # xxxx-000-cai: Byzantine fault tolerance
#     # abc-123-456: Parallel chain
#     # """,
#     # ]

#     # judge_contents = [
#     #     f"""
#     # 0: Byzantine fault tolerance
#     # 1: Multi-chain
#     # 2: cross-chain blockchain
#     # 3: Parachain
#     # 4: Hybrid blockchain
#     # 5: Byzantine fault tolerance
#     # 6: Parlaychain
#     # 7: Parallel chain
#     # """,
#     #     f"""
#     # 0: Byzantine fault tolerance
#     # 1: Multi-chain
#     # 2: cross-chain blockchain
#     # 3: Parachain
#     # 4: Hybrid blockchain
#     # 5: Byzantine fault tolerance
#     # 6: Parlaychain
#     # 7: Parallel chain
#     # """,
#     #     f"""
#     # 0: Byzantine fault tolerance
#     # 1: Multi-chain
#     # 2: cross-chain blockchain
#     # 3: Parachain
#     # 4: Hybrid blockchain
#     # 5: Byzantine fault tolerance
#     # 6: Parlaychain
#     # 7: Parallel chain
#     # """,
#     # ]

#     # judge_contents = [
#     #     "Byzantine fault tolerance",
#     #     "Multi-chain",
#     #     "cross-chain blockchain",
#     #     "Parachain",
#     #     "Hybrid blockchain",
#     #     "Byzantine fault tolerance",
#     #     "Parlaychain",
#     #     "Parallel chain",
#     # ]

#     # for judge_content in judge_contents:
#     judge_prompt = f"""<|im_start|>user
#     This is a question: {challenge_question}

#     Select the best answer:

#     {judge_content}

#     <|im_end|>
#     <|im_start|>assistant
#     The best answer is:  
#     """

#     # judge_prompt = f"""<|im_start|>user
#     # This is a question: {challenge_question}

#     # Score this answer from 0 to 10

#     # {judge_content}

#     # <|im_end|>
#     # <|im_start|>assistant
#     # The score is:
#     # """

#     print(judge_prompt)
#     print(" ")

#     # disable all these
#     judge_temp = 0.0
#     judge_top_k = 0  # (default: 40, 0 = disabled)
#     judge_top_p = 1.0  # (default: 0.9, 1.0 = disabled)
#     judge_min_p = 0.0  # (default: 0.1, 0.0 = disabled)
#     judge_tfs = 1.0  # (default: 1.0, 1.0 = disabled)

#     # for judge_temp in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
#     # print(f"judge_temp: {judge_temp}")
#     # for j in range(3):
#     judge_answer = run_llama_cpp(
#         judge_prompt,
#         judge_num_tokens,
#         judge_seed,
#         judge_temp,
#         judge_top_k,
#         judge_top_p,
#         judge_min_p,
#         judge_tfs,
#         # judge_typical,
#         # judge_mirostat,
#         # judge_mirostat_lr,
#         # judge_mirostat_ent,
#     )
#     print(f"judge_answer (raw): {judge_answer}")

#     judge_answer = filter_text(judge_answer)

#     # Remove everything after the first whitespace
#     judge_answer = judge_answer.split(" ")[0]

#     # Remove everything after the first ':''
#     judge_answer = judge_answer.split(":")[0]

#     print(f"The best answer given by mAIner ID: {judge_answer}")

#     # print(f"judge_temp: {judge_temp} - answer: {judge_answer}")

#     # challenge["mainer_answers"].append(
#     #     {
#     #         "mainer_id": str(j + judge_temp),
#     #         "mainer_answer": mainer_answer,
#     #     }

In [19]:
# # Specify the file path
# file_path = "challenges_with_answers.json"

# # Write the list to a JSON file
# with open(file_path, "w") as file:
#     json.dump(challenges_with_answers, file, indent=4)

# print(f"Challenges with answers have been written to {file_path}")