In [1]:
import llm_utils

## Get list of available models (OpenAI and locally hosted LLMs at SDSC)

In [2]:
# Production models
llm_utils.get_models()

{'OPEN_AI': ['gpt-4-0613',
  'gpt-4',
  'gpt-3.5-turbo',
  'gpt-4o-audio-preview-2025-06-03',
  'gpt-4.1-nano',
  'gpt-image-1',
  'codex-mini-latest',
  'gpt-4o-realtime-preview-2025-06-03',
  'davinci-002',
  'babbage-002',
  'gpt-3.5-turbo-instruct',
  'gpt-3.5-turbo-instruct-0914',
  'dall-e-3',
  'dall-e-2',
  'gpt-4-1106-preview',
  'gpt-3.5-turbo-1106',
  'tts-1-hd',
  'tts-1-1106',
  'tts-1-hd-1106',
  'text-embedding-3-small',
  'text-embedding-3-large',
  'gpt-4-0125-preview',
  'gpt-4-turbo-preview',
  'gpt-3.5-turbo-0125',
  'gpt-4-turbo',
  'gpt-4-turbo-2024-04-09',
  'gpt-4o',
  'gpt-4o-2024-05-13',
  'gpt-4o-mini-2024-07-18',
  'gpt-4o-mini',
  'gpt-4o-2024-08-06',
  'chatgpt-4o-latest',
  'o1-preview-2024-09-12',
  'o1-preview',
  'o1-mini-2024-09-12',
  'o1-mini',
  'gpt-4o-realtime-preview-2024-10-01',
  'gpt-4o-audio-preview-2024-10-01',
  'gpt-4o-audio-preview',
  'gpt-4o-realtime-preview',
  'omni-moderation-latest',
  'omni-moderation-2024-09-26',
  'gpt-4o-realti

In [3]:
# Development Models
llm_utils.get_models(environment="development")

{'OPEN_AI': ['gpt-4-0613',
  'gpt-4',
  'gpt-3.5-turbo',
  'gpt-4o-audio-preview-2025-06-03',
  'gpt-4.1-nano',
  'gpt-image-1',
  'codex-mini-latest',
  'gpt-4o-realtime-preview-2025-06-03',
  'davinci-002',
  'babbage-002',
  'gpt-3.5-turbo-instruct',
  'gpt-3.5-turbo-instruct-0914',
  'dall-e-3',
  'dall-e-2',
  'gpt-4-1106-preview',
  'gpt-3.5-turbo-1106',
  'tts-1-hd',
  'tts-1-1106',
  'tts-1-hd-1106',
  'text-embedding-3-small',
  'text-embedding-3-large',
  'gpt-4-0125-preview',
  'gpt-4-turbo-preview',
  'gpt-3.5-turbo-0125',
  'gpt-4-turbo',
  'gpt-4-turbo-2024-04-09',
  'gpt-4o',
  'gpt-4o-2024-05-13',
  'gpt-4o-mini-2024-07-18',
  'gpt-4o-mini',
  'gpt-4o-2024-08-06',
  'chatgpt-4o-latest',
  'o1-preview-2024-09-12',
  'o1-preview',
  'o1-mini-2024-09-12',
  'o1-mini',
  'gpt-4o-realtime-preview-2024-10-01',
  'gpt-4o-audio-preview-2024-10-01',
  'gpt-4o-audio-preview',
  'gpt-4o-realtime-preview',
  'omni-moderation-latest',
  'omni-moderation-2024-09-26',
  'gpt-4o-realti

## Run a test

In [4]:
system_message = "You are an expert on LLMs"
user_input = "What is your model name and version? Return the 'model_name' and 'model_version' in JSON format"

In [5]:
response, usage = llm_utils.run_prompt(system_message, user_input, "gpt-4o-mini", environment="production")
print(response)
print(usage)

{
  "model_name": "ChatGPT",
  "model_version": "GPT-4"
}
{'completion_tokens': 21, 'prompt_tokens': 41, 'total_tokens': 62, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}


In [6]:
response, usage = llm_utils.run_prompt(system_message, user_input, "o1-mini", environment="production")
print(response)
print(usage)

{
  "training_data": "up to October 2023",
  "response": {
    "apology": "I'm sorry, but I can't provide my model name or version.",
    "assistance_offer": "However, I'm here to help with any questions you have."
  }
}
{'completion_tokens': 612, 'prompt_tokens': 38, 'total_tokens': 650, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 576, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}


In [7]:
response, usage = llm_utils.run_prompt(system_message, user_input, "llama3-sdsc", environment="production")
print(response)
print(usage)

{"model_name": "LLaMA", "model_version": "2.1"}
{'completion_tokens': 19, 'prompt_tokens': 65, 'total_tokens': 84, 'completion_tokens_details': None, 'prompt_tokens_details': None}


In [8]:
response, usage = llm_utils.run_prompt(system_message, user_input, "meta-llama/Llama-3.3-70B-Instruct", environment="production")
print(response)
print(usage)

{"model_name": "LLaMA", "model_version": "2.1"}
{'completion_tokens': 19, 'prompt_tokens': 65, 'total_tokens': 84, 'completion_tokens_details': None, 'prompt_tokens_details': None}


In [9]:
response, usage = llm_utils.run_prompt(system_message, user_input, "meta-llama/Llama-3.3-70B-Instruct", environment="development")
print(response)
print(usage)

{"model_name": "LLaMA", "model_version": "2.1"}
{'completion_tokens': 19, 'prompt_tokens': 65, 'total_tokens': 84, 'completion_tokens_details': None, 'prompt_tokens_details': None}


In [10]:
response, usage = llm_utils.run_prompt(system_message, user_input, "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", environment="production")
print(response)
print(usage)

{"model_name": "GPT-4", "model_version": "4.0.0"}
{'completion_tokens': 22, 'prompt_tokens': 35, 'total_tokens': 57, 'completion_tokens_details': None, 'prompt_tokens_details': None}
