# 05-4. Comparativa OpenAI y Google y despliegue en wandb

In [None]:
#!pip install wandb
#!pip install openai
#!pip install google-cloud-platform --upgrade

## Setup wandb

In [None]:
import wandb

config = dict(
    temperature = 1.0,
    max_output_tokens = 128,
    top_p = 0.8,
    top_k = 40,
)
wandb.init(project="comparison-openai-gemini", config=config, name = "comparison")
print(wandb.util.generate_id())
print(wandb.run)
table = wandb.Table(columns=["model", "test", "time", "temperature", "max_output_tokens", "top_p", "top_k", "prompt", "response"])


## Setup GCP and models

In [None]:
# TODO: CHANGE PROJECT_ID
PROJECT_ID = "YOUR_PROJECT_ID"   # <---- CHANGE THIS
LOCATION = "us-central1"   
# Code examples may misbehave if the model is changed.
MODEL_NAME = "text-bison@001"  

In [None]:
import vertexai
from vertexai.language_models import TextGenerationModel
from vertexai.preview.generative_models import GenerativeModel


vertexai.init(project=PROJECT_ID,
              location=LOCATION)
parameters = {
    "temperature": 0,
    "max_output_tokens": 1024,
    "top_p": 0.8,
    "top_k": 40
}

model = TextGenerationModel.from_pretrained(MODEL_NAME)

generative_model = GenerativeModel("gemini-pro")


## Setup OpenAI

In [None]:
from openai import OpenAI

# TODO: CHANGE OPENAI API KEY
client = OpenAI(api_key=YOUR_OPENAI_API_KEY)  # <---- CHANGE THIS

## Set queries

In [None]:
import time

def call_llm(model, description, parameters, llm_call, show_activity = False):
  
  # text-bison
  t0 = time.perf_counter()
  res = model.predict(llm_call, **parameters).text

  table.add_data(
      "text-bison@002", 
      description, 
      time.perf_counter() - t0, 
      config["temperature"], 
      config["max_output_tokens"], 
      config["top_p"], 
      config["top_k"], 
      llm_call, 
      res)
  
  # gpt-3.5-turbo (ChatGPT)
  t0 = time.perf_counter()
  res_openai = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": llm_call},
    ]
  )
  
  table.add_data(
    "gpt-3.5-turbo", 
    description, 
    time.perf_counter() - t0, 
    config["temperature"], 
    config["max_output_tokens"], 
    config["top_p"], 
    config["top_k"], 
    llm_call, 
    res_openai.choices[0].message.content
  )
  
  # gpt-4-8k
  t0 = time.perf_counter()
  res_openai = client.chat.completions.create(
    model="gpt-4-0613",
    messages=[
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": llm_call},
    ]
  )
  
  table.add_data(
    "gpgpt-4-0613", 
    description, 
    time.perf_counter() - t0, 
    config["temperature"], 
    config["max_output_tokens"], 
    config["top_p"], 
    config["top_k"], 
    llm_call, 
    res_openai.choices[0].message.content
  )

  # Gemini text
  t0 = time.perf_counter() 
  res_gen = generative_model.generate_content(
    [llm_call],
    generation_config={
        "max_output_tokens": 2048,
        "temperature": 0.9,
        "top_p": 1
    },
  stream=False,
  )
  
  table.add_data(
    "gemini-pro", 
    description, 
    time.perf_counter() - t0, 
    0.9, 
    2048, 
    1, 
    config["top_k"], 
    llm_call, 
    res_gen.text
  )
  
  ## Only show response from text-bison, not openai or Gemini
  if show_activity:
    BOLD = "\033[1m"
    UNFORMAT = "\033[0m\x1B[0m"
    print(f"{BOLD}The call to the LLM:{UNFORMAT}\n{llm_call}\n")
    print(f"{BOLD}The response:{UNFORMAT}")
    print(res)
        

  return res  # Return to `_` if not needed.

In [None]:
import time, wandb
from tqdm.auto import tqdm

queries = [
    "The planet earth is the ",
    "Implement a Python function to compute the Fibonacci numbers.",
    "Write a Rust function that performs binary exponentiation.",
    "How do I allocate memory in C?",
    "What are the differences between Javascript and Python?",
    "How do I find invalid indices in Postgres?",
    "How can you implement a LRU (Least Recently Used) cache in Python?",
    "What approach would you use to detect and prevent race conditions in a multithreaded application?",
    "Can you explain how a decision tree algorithm works in machine learning?",
    "How would you design a simple key-value store database from scratch?",
    "How do you handle deadlock situations in concurrent programming?",
    "What is the logic behind the A* search algorithm, and where is it used?",
    "How can you design an efficient autocomplete system?",
    "What approach would you take to design a secure session management system in a web application?",
    "How would you handle collision in a hash table?",
    "How can you implement a load balancer for a distributed system?",
    "What is the fable involving a fox and grapes?",
    "Write a story in the style of James Joyce about a trip to the Australian outback in 2083, to see robots in the beautiful desert.",
    "Who does Harry turn into a balloon?",
    "Write a tale about a time-traveling historian who's determined to witness the most significant events in human history.",
    "Describe a day in the life of a secret agent who's also a full-time parent.",
]

## Call models

In [None]:
for q in tqdm(queries):
    t0 = time.perf_counter()
    res = call_llm(model, "comparison", parameters, q)


## Write to wandb

In [None]:
wandb.log({"comparison": table})
table = wandb.Table(columns=["model", "test", "time", "temperature", "max_output_tokens", "top_p", "top_k", "prompt", "response"])
wandb.finish()