## Lab 4: OpenAI for non-OpenAI

In [4]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import display, Markdown, update_display
import requests
load_dotenv(override=True)

True

### We're now going to ask a hard question to lots of models

In [10]:
message = "In 1 sentence, describe a rainbow to someone who's never been able to see. \
Then in 1 sentence, describe the imaginary number i to someone who doesn't understand math. \
Then in 1 sentence, find a connection between rainbows and imaginary numbers. \
Then end by stating how many words are in your answer."

messages = [{"role": "user", "content": message}]

In [None]:
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')
grok_api_key = os.getenv('GROK_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set (and this is optional)")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")

if deepseek_api_key:
    print(f"DeepSeek API Key exists and begins {deepseek_api_key[:3]}")
else:
    print("DeepSeek API Key not set (and this is optional)")

if groq_api_key:
    print(f"Groq API Key exists and begins {groq_api_key[:4]}")
else:
    print("Groq API Key not set (and this is optional)")

if grok_api_key:
    print(f"Grok API Key exists and begins {grok_api_key[:4]}")
else:
    print("Grok API Key not set (and this is optional)")

#### <span style="color: green;">Question: what's going on below? Why are we calling OpenAI with details about Anthropic?</span>

In [6]:
anthropic_url = "https://api.anthropic.com/v1"
gemini_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
deepseek_url = "https://api.deepseek.com"
groq_url = "https://api.groq.com/openai/v1"
grok_url = "https://api.x.ai/v1"
ollama_url = 'http://localhost:11434/v1/chat'

openai = OpenAI()
anthropic = OpenAI(api_key=anthropic_api_key, base_url=anthropic_url)
gemini = OpenAI(api_key=google_api_key, base_url=gemini_url)
deepseek = OpenAI(api_key=deepseek_api_key, base_url=deepseek_url)
groq = OpenAI(api_key=groq_api_key, base_url=groq_url)
grok = OpenAI(api_key=grok_api_key, base_url=grok_url)
ollama = OpenAI(base_url=ollama_url, api_key='ollama')


NameError: name 'anthropic_api_key' is not defined

In [7]:
models = []
answers = []

def answer(client, model):
    stream = client.chat.completions.create(model=model, messages=messages, stream=True)
    prefix = f"### Response from {model}:\n\n"
    reply = ""
    display_handle = display(Markdown(prefix), display_id=True)
    for chunk in stream:
        reply += chunk.choices[0].delta.content or ''
        update_display(Markdown(prefix+reply), display_id=display_handle.display_id)
    words = reply.split('</think>')[1] if '</think>' in reply else reply
    reply += f"\n\n#### Calculated true word count: {len(words.split())}"
    update_display(Markdown(prefix+reply), display_id=display_handle.display_id)
    
    models.append(model)
    answers.append(reply)

In [11]:
answer(openai, "gpt-4.1-mini")

### Response from gpt-4.1-mini:

A rainbow is a graceful arc of vibrant colors that appears in the sky when sunlight passes through raindrops, creating a beautiful spectrum you can almost feel as a warm, soothing presence. The imaginary number i is like a special tool in math that lets us think about solutions to problems that can’t be solved using just regular numbers, almost like imagining a new direction to explore. Both rainbows and imaginary numbers reveal hidden dimensions—rainbows show the unseen spectrum of light, while imaginary numbers open up new possibilities beyond everyday numbers. My answer contains 66 words.

#### Calculated true word count: 96

In [None]:
answer(openai, "gpt-5-nano")

In [None]:
answer(openai, "gpt-5")

In [None]:
# claude-sonnet-4-20250514

answer(anthropic, "claude-sonnet-4-5-20250929")

In [None]:
answer(gemini, "gemini-2.5-flash-lite")

In [None]:
answer(gemini, "gemini-2.5-pro")

In [None]:
# DeepSeek 3.1 Terminus - not on reasoning mode or it takes too long

answer(deepseek, "deepseek-chat")

In [None]:
answer(groq, "deepseek-r1-distill-llama-70b")

#### <span style="color: orange;">Question: what's the difference between Grok and Groq and why do they have such similar names?</span>

In [None]:
answer(grok, "grok-4")

In [None]:
!ollama pull llama3.2
!ollama pull gpt-oss

In [None]:
requests.get("http://localhost:11434").content

In [None]:
answer(ollama, "llama3.2")

In [None]:
answer(ollama, "gpt-oss:20b")

In [None]:
answer(groq, "openai/gpt-oss-120b")

In [None]:
len(models)

## LLM as a Judge

In [None]:
together = ""
for index, answer in enumerate(answers):
    together += f"# Response from competitor {index+1}\n\n"
    together += answer + "\n\n"

In [None]:
display(Markdown(together))

In [None]:
judge = f"""You are judging a competition between {len(models)} competitors.
Each model has been given this question:

{message}

Your job is to evaluate each response for clarity and strength of argument and accuracy of word count, and rank them in order of best to worst.
Respond with JSON, and only JSON, with the following format:
{{"results": ["best competitor number", "second best competitor number", "third best competitor number", ...]}}

Here are the responses from each competitor:

{together}

Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks."""

In [None]:
display(Markdown(judge))

In [None]:
judge_messages = [{"role": "user", "content": judge}]
response = groq.chat.completions.create(model="openai/gpt-oss-120b", messages=judge_messages)
results = response.choices[0].message.content
results


In [None]:
results_dict = json.loads(results)
ranks = results_dict["results"]
for index, result in enumerate(ranks):
    competitor = models[int(result)-1]
    print(f"Rank {index+1}: {competitor}")