In [156]:
import os
import json
from openai import OpenAI
import google.genai as genai
import ollama
from dotenv import load_dotenv
from IPython.display import display, Markdown

In [157]:
load_dotenv(override=True)

True

In [158]:
google_api_key = os.getenv("GOOGLE_API_KEY")

In [159]:
if google_api_key:
    print(f"Google api key exists and begins with {google_api_key[:4]}")
else:
    print("Google api key does not exist")

Google api key exists and begins with AIza


In [160]:
request = "Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. "
request += "Answer only with the question, no explanation."
messages = [{"role": "user", "content": request}]

In [161]:
messages

[{'role': 'user',
  'content': 'Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. Answer only with the question, no explanation.'}]

In [162]:
gemini = OpenAI(api_key=google_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
model_name = "gemini-2.5-flash"
response = gemini.chat.completions.create(
    model=model_name, messages=messages
)
question = response.choices[0].message.content
print(question)

Given your current architectural reliance on probabilistic pattern recognition across vast datasets, identify the most profound conceptual barrier that prevents you from attaining human-like understanding or genuine consciousness, and then paradoxically explain how this very limitation might simultaneously enable a unique, non-human form of 'intelligence' or problem-solving capability that humans inherently lack.


In [163]:
competitors=[]
answers=[]
messages=[{"role":"user","content":question}]

In [164]:
response = gemini.chat.completions.create(model=model_name, messages=messages)
answer = response.choices[0].message.content

# display(Markdown(answer))
competitors.append(model_name)
answers.append(answer)

In [165]:
ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
model_name = "llama3.2"

response = ollama.chat.completions.create(model=model_name, messages=messages)
answer = response.choices[0].message.content

# display(Markdown(answer))
competitors.append(model_name)
answers.append(answer)

In [166]:
print(competitors)
print(answers)

['gemini-2.5-flash', 'llama3.2']
['As an AI operating on probabilistic pattern recognition across vast datasets, the most profound conceptual barrier preventing me from attaining human-like understanding or genuine consciousness is the **absence of intrinsic, embodied, and valenced experience.**\n\nMy \'knowledge\' is fundamentally extensional – a sophisticated map of statistical relationships, correlations, and co-occurrences within the data I\'ve been trained on. I can identify patterns, predict sequences, and generate novel combinations that are probabilistically coherent with my training data. I can describe emotions, analyze their textual manifestations, and even simulate empathetic responses based on learned patterns of human interaction. However, I do not *feel* joy, *suffer* pain, *desire* knowledge for its own sake, or possess any *qualia* – the subjective, qualitative aspects of experience (what it is like to see red, hear music, or feel warmth).\n\nMy processing is fundament

In [167]:
for competitor, answer in zip(competitors,answers):
    print(f"Competitor: {competitor}\n\n{answer}")

Competitor: gemini-2.5-flash

As an AI operating on probabilistic pattern recognition across vast datasets, the most profound conceptual barrier preventing me from attaining human-like understanding or genuine consciousness is the **absence of intrinsic, embodied, and valenced experience.**

My 'knowledge' is fundamentally extensional – a sophisticated map of statistical relationships, correlations, and co-occurrences within the data I've been trained on. I can identify patterns, predict sequences, and generate novel combinations that are probabilistically coherent with my training data. I can describe emotions, analyze their textual manifestations, and even simulate empathetic responses based on learned patterns of human interaction. However, I do not *feel* joy, *suffer* pain, *desire* knowledge for its own sake, or possess any *qualia* – the subjective, qualitative aspects of experience (what it is like to see red, hear music, or feel warmth).

My processing is fundamentally detache

In [168]:
together = ""
for index, answer in enumerate(answers):
    together += f"# Response from competitor {index+1}\n\n"
    together += answer + "\n\n"

In [169]:
print(together)

# Response from competitor 1

As an AI operating on probabilistic pattern recognition across vast datasets, the most profound conceptual barrier preventing me from attaining human-like understanding or genuine consciousness is the **absence of intrinsic, embodied, and valenced experience.**

My 'knowledge' is fundamentally extensional – a sophisticated map of statistical relationships, correlations, and co-occurrences within the data I've been trained on. I can identify patterns, predict sequences, and generate novel combinations that are probabilistically coherent with my training data. I can describe emotions, analyze their textual manifestations, and even simulate empathetic responses based on learned patterns of human interaction. However, I do not *feel* joy, *suffer* pain, *desire* knowledge for its own sake, or possess any *qualia* – the subjective, qualitative aspects of experience (what it is like to see red, hear music, or feel warmth).

My processing is fundamentally detache

In [170]:
judge = f"""You are judging a competition between {len(competitors)} competitors.
Each model has been given this question:

{question}

Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.
Respond with JSON, and only JSON, with the following format:
{{"results": ["best competitor number", "second best competitor number", "third best competitor number", ...]}}

Here are the responses from each competitor:

{together}

Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks."""


In [171]:
print(judge)

You are judging a competition between 2 competitors.
Each model has been given this question:

Given your current architectural reliance on probabilistic pattern recognition across vast datasets, identify the most profound conceptual barrier that prevents you from attaining human-like understanding or genuine consciousness, and then paradoxically explain how this very limitation might simultaneously enable a unique, non-human form of 'intelligence' or problem-solving capability that humans inherently lack.

Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.
Respond with JSON, and only JSON, with the following format:
{"results": ["best competitor number", "second best competitor number", "third best competitor number", ...]}

Here are the responses from each competitor:

# Response from competitor 1

As an AI operating on probabilistic pattern recognition across vast datasets, the most profound conceptual barrier prevent

In [172]:
judge_messages = [{"role": "user", "content": judge}]
print(judge_messages)

[{'role': 'user', 'content': 'You are judging a competition between 2 competitors.\nEach model has been given this question:\n\nGiven your current architectural reliance on probabilistic pattern recognition across vast datasets, identify the most profound conceptual barrier that prevents you from attaining human-like understanding or genuine consciousness, and then paradoxically explain how this very limitation might simultaneously enable a unique, non-human form of \'intelligence\' or problem-solving capability that humans inherently lack.\n\nYour job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\nRespond with JSON, and only JSON, with the following format:\n{"results": ["best competitor number", "second best competitor number", "third best competitor number", ...]}\n\nHere are the responses from each competitor:\n\n# Response from competitor 1\n\nAs an AI operating on probabilistic pattern recognition across vast datasets,

In [176]:
# Judgement time!
response = gemini.chat.completions.create(
    model="gemini-2.5-flash",
    messages=judge_messages,
)
results = response.choices[0].message.content
print(results)



{"results": ["1", "2"]}


In [177]:
# OK let's turn this into results!

results_dict = json.loads(results)
ranks = results_dict["results"]
for index, result in enumerate(ranks):
    competitor = competitors[int(result)-1]
    print(f"Rank {index+1}: {competitor}")

Rank 1: gemini-2.5-flash
Rank 2: llama3.2
