# 05-4. Comparativa LLMs y métricas en wandb

In [None]:
!pip install wandb
!pip install openai
!pip install cohere
!pip install google-cloud-aiplatform google-genai --upgrade

## Setup wandb

In [None]:
import wandb

wandb.init(project="comparison-llm", config=config, name = "comparison")
print(wandb.util.generate_id())
print(wandb.run)
table = wandb.Table(columns=["model", "time", "temperature", "max_output_tokens", "top_p", "top_k", "prompt", "response"])

## Setup OpenAI, Cohere and GOogle

In [None]:
# TODO: replace YOUR_OPEN_API_KEY
from openai import OpenAI
openai_client = OpenAI(api_key=YOUR_OPENAI_API_KEY) # <--- CHANGE THIS

In [None]:
# TODO: replace YOUR_COHERE_API_KEY
import cohere
co = cohere.ClientV2(YOUR_COHERE_API_KEY) # <--- CHANGE THIS

In [None]:
# TODO: replace YOUR_GOOGLE_API_KEY

from IPython.display import HTML, Markdown, display
from google import genai

PROJECT_ID = "YOUR_PROJECT_ID" # <--- CHANGE THIS
LOCATION = "europe-west4"

gemini_client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)


## Setup prompts

In [None]:
config = dict(
    temperature = 1.0,
    max_output_tokens = 128,
    top_p = 0.8,
    top_k = 40,
)

MODEL_GOOGLE       = "gemini-2.0-flash"
MODEL_COHERE       = "command-a-03-2025"
MODEL_OPENAI_TOP   = "gpt-4.1"
MODEL_OPENAI_BASIC = "gpt-4-0613"

## Set queries

In [None]:
import time

def call_llms(config, llm_call, show_activity = True):
  
  # Gemini 2.0 flash
  t0 = time.perf_counter()

  response = gemini_client.models.generate_content(
    model=MODEL_GOOGLE, contents=llm_call
)
  res_google = response.text

  table.add_data(
       MODEL_GOOGLE, 
       time.perf_counter() - t0, 
       config["temperature"], 
       config["max_output_tokens"], 
       config["top_p"], 
       config["top_k"], 
       llm_call, 
       res_google)
  
  # gpt-4.1
  t0 = time.perf_counter()

  res_openai = openai_client.responses.create(
      model=MODEL_OPENAI_TOP,
      input=llm_call
  )

  table.add_data(
    MODEL_OPENAI_TOP, 
    time.perf_counter() - t0, 
    config["temperature"], 
    config["max_output_tokens"], 
    config["top_p"], 
    config["top_k"], 
    llm_call, 
    res_openai.output_text
  )
  
  # gpt-4-8k
  t0 = time.perf_counter()

  res_openai = openai_client.chat.completions.create(
    model=MODEL_OPENAI_BASIC,
    messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": llm_call},
    ]
  )
  
  table.add_data(
    MODEL_OPENAI_BASIC, 
    time.perf_counter() - t0, 
    config["temperature"], 
    config["max_output_tokens"], 
    config["top_p"], 
    config["top_k"], 
    llm_call, 
    res_openai.choices[0].message.content
  )
  
  # Cohere
  t0 = time.perf_counter() 

  res_cohere = co.chat(
      model=MODEL_COHERE, 
      messages=[{"role": "user", "content": llm_call}],
      max_tokens=config["max_output_tokens"], 
      temperature=config["temperature"],
      k=config["top_k"],
      p=config["top_p"]
  )
  
  table.add_data(
    MODEL_COHERE, 
    time.perf_counter() - t0, 
    config["temperature"], 
    config["max_output_tokens"], 
    config["top_p"], 
    config["top_k"], 
    llm_call, 
    res_cohere.message.content[0].text 
  )
    
  ## Only show response from Gemini, not from all LLMs
  if show_activity:
    BOLD = "\033[1m"
    UNFORMAT = "\033[0m\x1B[0m"
    print(f"{BOLD}The call to Gemini LLM:{UNFORMAT}\n{llm_call}\n")
    print(f"{BOLD}The response:{UNFORMAT}")
    print(res_google)
        

  return res_google 

In [None]:
import time, wandb
from tqdm.auto import tqdm

queries = [
    "The planet earth is the ",
    "Implement a Python function to compute the Fibonacci numbers.",
    "Write a Rust function that performs binary exponentiation.",
    "How do I allocate memory in C?",
    "What are the differences between Javascript and Python?",
    "How do I find invalid indices in Postgres?",
    "How can you implement a LRU (Least Recently Used) cache in Python?",
    "What approach would you use to detect and prevent race conditions in a multithreaded application?",
    "Can you explain how a decision tree algorithm works in machine learning?",
    "How would you design a simple key-value store database from scratch?",
    "How do you handle deadlock situations in concurrent programming?",
    "What is the logic behind the A* search algorithm, and where is it used?",
    "How can you design an efficient autocomplete system?",
    "What approach would you take to design a secure session management system in a web application?",
    "How would you handle collision in a hash table?",
    "How can you implement a load balancer for a distributed system?",
    "What is the fable involving a fox and grapes?",
    "Write a story in the style of James Joyce about a trip to the Australian outback in 2083, to see robots in the beautiful desert.",
    "Who does Harry turn into a balloon?",
    "Write a tale about a time-traveling historian who's determined to witness the most significant events in human history.",
    "Describe a day in the life of a secret agent who's also a full-time parent.",
]

queries = [
    "El planeta Tierra es ",
    "Implementar una función en Python para calcular los números de Fibonacci.",
    "Escribir una función en Rust que realice una exponenciación binaria.",
    "¿Cómo reservo memoria en C?",
    "¿Cuáles son las diferencias entre Javascript y Python?",
    "¿Cómo encuentro índices inválidos en Postgres?",
    "¿Cómo puedes implementar una caché LRU (Least Recently Used) en Python?",
    "¿Qué enfoque usarías para detectar y prevenir condiciones de carrera en una aplicación multiproceso?",
    "¿Puedes explicar cómo funciona un algoritmo de árbol de decisión en el aprendizaje automático?",
    "¿Cómo diseñarías una base de datos simple de almacenamiento clave-valor desde cero?",
    "¿Cómo manejas situaciones de interbloqueo en la programación concurrente?",
    "¿Cuál es la lógica detrás del algoritmo de búsqueda A*, y dónde se utiliza?",
    "¿Cómo puedes diseñar un sistema de autocompletado eficiente?",
    "¿Qué enfoque tomarías para diseñar un sistema seguro de gestión de sesiones en una aplicación web?",
    "¿Cómo manejarías las colisiones en una tabla hash?",
    "¿Cómo puedes implementar un balanceador de carga para un sistema distribuido?",
    "Escribe un cuento sobre un historiador que viaja en el tiempo y que presencie los eventos más significativos de la historia de la humanidad.",
    "Describe un día en la vida de un agente secreto que también es padre/madre a tiempo completo."
]

## Call models

In [None]:
for q in tqdm(queries):
    t0 = time.perf_counter()
    res = call_llms(config, q, False)


## Write to wandb

In [None]:
wandb.log({"llm-comparison": table})
table = wandb.Table(columns=["model", "time", "temperature", "max_output_tokens", "top_p", "top_k", "prompt", "response"])
wandb.finish()