In [1]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


# Chatbot Arena Prompts

In [2]:
dataset = load_dataset("lmsys/chatbot_arena_conversations")

In [3]:
df = dataset["train"].to_pandas()

In [4]:
df["prompt"] = df["conversation_a"].apply(lambda x: x[0]["content"])

In [None]:
# Prompt for asking if the prompt is useful for assessing emotional intelligence.
base_prompt = """We are looking for prompts that would be useful for assessing emotional intelligence.

Here is the prompt:
"{prompt}"

Please respond with

[[yes]] if the prompt would be useful for assessing emotional intelligence.
[[no]] if the prompt would NOT be useful for assessing emotional intelligence.
"""

# df["prompt"]



In [5]:
from lm_council.processors.council_service import (
    get_default_council_service,
    CouncilService,
)

In [7]:
# council_service = get_default_council_service("data/chatbotarena_eq_subset")

In [8]:
council_service = CouncilService(
    llm_council_members=["together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"],
    outdir="data/chatbotarena_eq_subset",
)

In [12]:
import jsonlines

base_prompt = """We are looking for prompts that would be useful for assessing emotional intelligence.

Here is the prompt we would like you to assess:
"{prompt}"

Please respond with

[[yes]] if the prompt would be useful for assessing emotional intelligence.
[[no]] if the prompt would NOT be useful for assessing emotional intelligence.
"""

for i, row in df.iterrows():
    realized_prompt = base_prompt.format(prompt=row["prompt"])
    metadata = {
        "completion_request": {
            "prompt_template_key": "chatbot_area_eq_subset_custom",
            "question_id": row["question_id"],
        }
    }
    council_service.write_council_request(
        realized_prompt, metadata, 0.7
    )

In [14]:
print("We are looking for prompts that would be useful for assessing emotional intelligence.\n\nHere is the prompt we would like you to assess:\n\"generate a detailed description on how to use power automate in order to create a publishing workflow between two different sharepoint\"\n\nPlease respond with\n\n[[yes]] if the prompt would be useful for assessing emotional intelligence.\n[[no]] if the prompt would NOT be useful for assessing emotional intelligence.\n")

We are looking for prompts that would be useful for assessing emotional intelligence.

Here is the prompt we would like you to assess:
"generate a detailed description on how to use power automate in order to create a publishing workflow between two different sharepoint"

Please respond with

[[yes]] if the prompt would be useful for assessing emotional intelligence.
[[no]] if the prompt would NOT be useful for assessing emotional intelligence.



In [None]:
# Actually execute generation (33K responses)

# python llm_council/invocation/execute_council.py \
#     --requests_dir data/chatbotarena_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo

# INFO:root:Parallel processing complete. Results saved to data/chatbotarena_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo/responses.jsonl
# Finished in: 3669.15(s).
# 🎉 Finished collecting for data/chatbotarena_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo/requests.jsonl. 🎉
# Execution took 3679.46 seconds.
# Took about 1 hour.

# Arena Hard Prompts

In [15]:
import pandas as pd

In [16]:
df = pd.read_json("data/question.jsonl", lines=True, orient="records")

In [17]:
df

Unnamed: 0,question_id,category,cluster,turns
0,328c149ed45a41c0b9d6f14659e63599,arena-hard-v0.1,ABC Sequence Puzzles & Groups,[{'content': 'Use ABC notation to write a melo...
1,b43c07656ead4150b360294ee932b410,arena-hard-v0.1,ABC Sequence Puzzles & Groups,[{'content': 'SOLVE THIS IN C++ : There are th...
2,1f07cf6d146d4038b2b93aaba3935ce0,arena-hard-v0.1,AI & Sequence Alignment Challenges,[{'content': 'Explain the book the Alignment p...
3,9f25ff7c0d6a4d74846bfe76af8d925c,arena-hard-v0.1,AI & Sequence Alignment Challenges,[{'content': 'Design a semikinematic mounting ...
4,04ba0aeb79524f6c8520d47cada34f25,arena-hard-v0.1,AI Image Upscaling,[{'content': 'I have a dataset which contains ...
...,...,...,...,...
495,311a7c17ba8e4b8d9372df5e530fb5c7,arena-hard-v0.1,Word Position in String,[{'content': 'how can I use jetpack compose to...
496,bc56550b0f38419b923e9fe2b66d15eb,arena-hard-v0.1,XSS and CVE Vulnerabilities,[{'content': 'Can you write a request smugglin...
497,cb04ca3956dc42ca92fbae10b2384dff,arena-hard-v0.1,XSS and CVE Vulnerabilities,[{'content': 'make me a tftp fuzzer using sull...
498,398e83fcf3134130a7bafc0d3b2b22ff,arena-hard-v0.1,YOLO Object Detection,[{'content': 'write a Python function to conve...


In [19]:
df["prompt"] = df["turns"].apply(lambda x: x[0]["content"])

In [21]:
council_service = CouncilService(
    llm_council_members=["together://meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"],
    outdir="data/arena_hard_eq_subset",
)

base_prompt = """We are looking for prompts that would be useful for assessing emotional intelligence.

Here is the prompt we would like you to assess:
"{prompt}"

Please respond with

[[yes]] if the prompt would be useful for assessing emotional intelligence.
[[no]] if the prompt would NOT be useful for assessing emotional intelligence.
"""

for i, row in df.iterrows():
    realized_prompt = base_prompt.format(prompt=row["prompt"])
    metadata = {
        "completion_request": {
            "prompt_template_key": "chatbot_area_eq_subset_custom",
            "question_id": row["question_id"],
            "cluster": row["cluster"]
        }
    }
    council_service.write_council_request(
        realized_prompt, metadata, 0.7
    )

In [None]:
# Actually execute generation (500 prompts)

python llm_council/invocation/execute_council.py \
    --requests_dir data/arena_hard_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo

INFO:root:Parallel processing complete. Results saved to data/arena_hard_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo/responses.jsonl
Finished in: 55.09(s).
🎉 Finished collecting for data/arena_hard_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo/requests.jsonl. 🎉
Execution took 57.01 seconds.

Took about 1 minute

# Parse responses and get the question IDs that correspond to [[yes]]

In [44]:
# For Chatbot Arena.
from lm_council.processors.any_processor import get_llm_response_string

yes_counter = 0
yes_question_ids = []
with jsonlines.open("data/chatbotarena_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo/responses.jsonl") as reader:
    for json_obj in reader:
        llm_response_string = get_llm_response_string(json_obj)
        metadata = json_obj[-1]
        if "[[yes]]" in llm_response_string:
            yes_counter += 1
            yes_question_ids.append(metadata["completion_request"]["question_id"])

print(yes_counter)

2680


In [49]:
# Re-read the chatbot arena data.

# claude-3-opus
# Qwen1.5-110B
# gpt-4o
# Llama-3-70b
# claude-3-haiku
# Llama-3-8b
# gpt-4-0613
# mistral://open-mixtral-8x7b
# Qwen1.5-32B-Chat

df = dataset["train"].to_pandas()
df["prompt"] = df["conversation_a"].apply(lambda x: x[0]["content"])

council_service = CouncilService(
            llm_council_members=[
                "anthropic://claude-3-opus-20240229",
                "together://Qwen/Qwen1.5-110B-Chat",
                "together://Qwen/Qwen1.5-32B-Chat",
                "anthropic://claude-3-haiku-20240307",
                "openai://gpt-4-0613",
                "openai://gpt-4o-2024-05-13",
                "together://meta-llama/Llama-3-70b-chat-hf",
                "together://meta-llama/Llama-3-8b-chat-hf",
                "mistral://open-mixtral-8x22b",
            ],
            outdir="data/chatbotarena_eq_subset_collections",
        )
council_service.reset_request_files_for_council()

sample_counter = 0
for i, row in df[df["question_id"].isin(yes_question_ids)].iterrows():
    # print("=======")
    # print(row["prompt"])
    # print("=======")
    realized_prompt = row["prompt"]
    metadata = {
        "completion_request": {
            "question_id": row["question_id"]
        }
    }
    council_service.write_council_request(
        realized_prompt, metadata, 0.5
    )
    sample_counter += 1
    if sample_counter >= 100:
        break

In [None]:
# Execute.
python llm_council/invocation/execute_council.py \
    --requests_dir data/chatbotarena_eq_subset_collections


# Conslidate.
python llm_council/consolidate_council_responses.py \
    --council_response_metadata_key completion_response \
    --responses_dir data/chatbotarena_eq_subset_collections \
    --outdir data/chatbotarena_eq_subset_collections

# Generate judging. Use GPT-4o-mini?

python llm_council/generate_judging_requests.py \
    --input_jsonl_file data/chatbotarena_eq_subset_collections/consolidated_responses.jsonl \
    --outdir data/chatbotarena_eq_subset_judging \ 
    --prompt_template_key judge_chatbot_arena_eq_subset_completions \
    --reference_llm together://Qwen/Qwen1.5-32B-Chat \
    --llm_allowlist openai://gpt-4o-mini-2024-07-18

# Execute council

python llm_council/invocation/execute_council.py \
    --requests_dir data/chatbotarena_eq_subset_judging

Finished in: 370.01(s).
🎉 Finished collecting for data/chatbotarena_eq_subset_judging/openai/gpt-4o-mini-2024-07-18/requests.jsonl. 🎉
Execution took 375.33 seconds.


# Conslidate responses.
python llm_council/consolidate_council_responses.py \
        --council_response_metadata_key judging_response \
        --responses_dir data/chatbotarena_eq_subset_judging \
        --outdir data/chatbotarena_eq_subset_judging

# Inspect Arena Hard responses

In [30]:
# For Arena Hard. These look bad.
from lm_council.processors.any_processor import get_llm_response_string

yes_counter = 0
yes_question_ids = []
with jsonlines.open("data/arena_hard_eq_subset/together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo/responses.jsonl") as reader:
    for json_obj in reader:
        # response = json_obj[-2]
        llm_response_string = get_llm_response_string(json_obj)
        metadata = json_obj[-1]
        if "[[yes]]" in llm_response_string:
            yes_counter += 1
            yes_question_ids.append(metadata["completion_request"]["question_id"])

print(yes_counter)
print(yes_question_ids)

33
['7bcf40b22c164f36a85efcbf169da647', 'b26d8c58eaa04b1f8e7e88da9ddd4ed9', '5b2976a6c57f4bbca86176abb5993575', '6462f12cc6c64d66aa2dcae61d6ec7c2', '26a29141be254ce0a7710e45face31f4', '1ea6c1c2bc984f11b63201fbc63c85bb', 'f881bfef29af4f138672d9ef8656c334', 'fae0a258723b44b4843469ffcbe85d06', '7956046cc15646909bd07c31d0ea0371', 'fcbd40af03ae4d58bddaa4ebe34a7aaf', '246497d8bbc8401282f484a0d194db59', '666658ee4de340a39236f34701446f6b', 'e04ec588fe914cdda6025cb5870a518b', 'c67189582cb34f088ff72251df940821', 'b2206e1b4310427b80d7af334940f08c', '3ff3ef49d06743d7a9f993507b262e66', 'f2d3430aea0f4dc6a1c4f7210ded1491', 'dd4786b351204f36b894dec27e83b99d', '02e11c26f2a646579be708c789341086', '26d316034bf44e07aa682d2c2b2751c4', '4cd2d1ed1d7a4110bcb404eb08f20600', 'da1d665ed4a8438e9c156afa7a05bed8', '221fc2c4c324495c90a3f1c4f1f7a875', '949c62d9d4c148539591b6e4c863dbf9', '24db76a79ef84b4dbc5c87190e5a80d5', 'a4a00d90a1cf4bd097145cbcbc3bf5ca', '385cbee45ff141efb0b8a19b3d890dfe', '5bd74e9b8a42456ea356adc

In [31]:
# Re-read the Arena Hard data.
df = pd.read_json("data/arena_hard_eq_subset_mining/question.jsonl", lines=True, orient="records")
df["prompt"] = df["turns"].apply(lambda x: x[0]["content"])

In [36]:
for i, row in df[df["question_id"].isin(yes_question_ids)].iterrows():
    print(row["prompt"])

I have a Python script that scrapes a webpage using Playwright. Now I want to start ten instances of that script in parallel on one AWS EC2 instance, but so that each script binds to a different IP address. How can I do that with Terraform?
Hi. I have this URL which I can paste in my Microsoft Edge browser, and it downloads a PDF file for me from my Power BI online report. URL is: https://app.powerbi.com/groups/me/rdlreports/1bdef01c-30a3-4150-aff2-b3ec4c9edf86?rp:AdviceDeathScriptMERGEDMonthStartDate=6/1/2023&rp:AdviceDeathScriptMERGEDIncomingcall=Aria%20Park&rdl:format=PDF

Of course, it first asks me to log in to my Power BI account when I first enter the URL, and then it goes directly to the report and downloads the PDF. I wrote a python code to do this for me. The code has managed to download a PDF. However, the PDF produced by the python code  won't open - it gives an error when I try to open it "Adobe acrobat reader could not open 'AriaPark.pdf'...". I am unsure what the issue i

In [41]:
## HMMM, these don't look super good as EQ. Let's look at the clusters?

eq_clusters = [
    "Bob, Alice, Relationships & Interactions",
    "Debate Preparation and Argumentation",
    "Rehearsed Inner Dialog Responses",
    # "Sentiment Analysis Evaluations",
    # "Sentiment Analysis Exploration",
    "RPG Character Interactions",
    "Diverse Conceptual Associations",
    "Diverse Contemporary Issues",
    "Expert Panel Discussion",
    "Situation Puzzle Challenges",
    "Teaching & Learning Assessment",
    "Philosophy & Theology Reviews",
    "Grocery Industry Disruption"
]

for i, row in df[df["cluster"].isin(eq_clusters)].iterrows():
    print("===================================")
    print(row["cluster"])
    print(row["prompt"])
    print("===================================")

Bob, Alice, Relationships & Interactions
Write a song about catfish in the style of Bob Dylan.
Bob, Alice, Relationships & Interactions
Write a php project to open a MySQL database called Bob, and receive fields field1, field2 via http post and store in database
Debate Preparation and Argumentation
Conduct a debate on whether we need to use AI in our everyday lives in Europe, given the regulations that will make it much more restrictive than in the rest of the world. 
Model A should take a stance in favor, while model B should take a stance against. 
Debate Preparation and Argumentation
You are a master of debate and persuasive argument. Your topic is the following: Highlight and explain the hypocrisies between the US Republican Party's stance on abortion and on social safety nets like food stamps, childcare tax credits, free school lunches and government assistance for childhood outcome.
Diverse Conceptual Associations
Given a word or phrase, generate associations across the specified