In [None]:

import os
import json
from dotenv import load_dotenv
from openai import OpenAI
#from anthropic import Anthropic
from IPython.display import Markdown, display

from perplexity import Perplexity

In [None]:
load_dotenv(override=True)  # Loads the .env file

In [None]:
#openai_api_key = os.getenv('OPENAI_API_KEY')
#anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GEMINI_API_KEY')
#deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
#groq_api_key = os.getenv('GROQ_API_KEY')
perplexity_api_key = os.getenv('PERPLEXITY_API_KEY')

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")

if perplexity_api_key:
    print(f"Perplexity API Key exists and begins {perplexity_api_key[:4]}") 
else:
    print("Perplexity API Key not set (and this is optional)")
# --- IGNORE ---


In [None]:
request = (
    "Please come up with a challenging, nuanced question that I can ask "
    "a number of LLMs to evaluate their intelligence. "
    "Answer only with the question, no explanation , no table."
)

messages = [{"role": "user", "content": request}]

In [None]:
messages

In [None]:

Perplexity_client = Perplexity(api_key=os.environ["PERPLEXITY_API_KEY"])

# ‚úÖ Use the chat completion endpoint for AI-generated answers
response = Perplexity_client.chat.completions.create(
    model="sonar-pro",  # or sonar-small if you want cheaper/faster
    messages=messages,
)

question =  response.choices[0].message.content

print("Generated Question:")
display(Markdown(f"**{question}**"))


In [None]:
competitors = []
answers = []
messages = [{"role": "user", "content": question}]

In [None]:
from google import genai
gemini_client = genai.Client(api_key = os.environ["GEMINI_API_KEY"])

model_name = "gemini-2.5-flash"

# Gemini expects content as a list of parts (e.g., [question])
answer = gemini_client.models.generate_content(
    model=model_name, contents=[question])

display(Markdown(answer.text))
competitors.append(model_name)
answers.append(answer)

In [None]:

Perplexity_client = Perplexity(api_key=os.environ["PERPLEXITY_API_KEY"])
model_name = "sonar-pro" # or sonar-small if you want cheaper/faster
# ‚úÖ Use the chat completion endpoint for AI-generated answers
response = Perplexity_client.chat.completions.create(
    model=model_name,  
    messages=messages
)

answer =  response.choices[0].message.content

display(Markdown(answer))
competitors.append(model_name)
answers.append(answer)

In [None]:
from ollama import Client

api_key = os.getenv("OLLAMA_API_KEY")
if not api_key:
    raise ValueError("‚ö†Ô∏è OLLAMA_API_KEY not found. Check your .env file.")

# Initialize Ollama client
client = Client(
    host="https://ollama.com",
    headers={'Authorization': f'Bearer {api_key}'}
)

# --- Your arrays already exist ---
# competitors = ["model1", "model2", "model3"]
# answers = ["answer from model1", "answer from model2", "answer from model3"]

# --- Build the judging prompt dynamically ---
judge_prompt = (
    "You are a neutral, strict evaluator. Below are answers from different models "
    "to the same question. Rank them from best to worst based on:\n"
    "- Accuracy and factual correctness\n"
    "- Clarity and coherence\n"
    "- Depth of reasoning and completeness\n\n"
)

for i, (model, answer) in enumerate(zip(competitors, answers), 1):
    judge_prompt += f"Model {i} ({model}):\n{answer}\n\n"

judge_prompt += (
    "Now give your ranking and reasoning in this format:\n"
    "1. model_name - reason\n"
    "2. model_name - reason\n"
    "3. model_name - reason\n"
)

# --- Judge using Ollama ---
print("\nüß† Evaluating models...\n")

response_text = ""
for part in client.chat("gpt-oss:120b", messages=[{'role': 'user', 'content': judge_prompt}], stream=True):
    content = part["message"]["content"]
    print(content, end="", flush=True)
    response_text += content