promptfoo · typpo · Mar 5, 2024 · Mar 6, 2024 · Mar 6, 2024 · Mar 6, 2024
diff --git a/examples/amazon-bedrock/promptfooconfig.claude.yaml b/examples/amazon-bedrock/promptfooconfig.claude.yaml
@@ -2,7 +2,7 @@ prompts:
   - 'Convert this English to {{language}}: {{input}}'
   - 'Translate to {{language}}: {{input}}'
 providers:
-  - id: bedrock:completion:anthropic.claude-instant-v1
+  - model: bedrock:completion:anthropic.claude-instant-v1
     config:
       region: 'us-west-2'
       temperature: 0.7

diff --git a/examples/amazon-bedrock/promptfooconfig.titan-text.yaml b/examples/amazon-bedrock/promptfooconfig.titan-text.yaml
@@ -2,7 +2,7 @@ prompts:
   - 'Convert this English to {{language}}: {{input}}'
   - 'Translate to {{language}}: {{input}}'
 providers:
-  - id: bedrock:completion:amazon.titan-text-lite-v1
+  - model: bedrock:completion:amazon.titan-text-lite-v1
     config:
       region: 'us-west-2'
       textGenerationConfig:

diff --git a/examples/azure-openai/promptfooconfig.yaml b/examples/azure-openai/promptfooconfig.yaml
@@ -2,7 +2,7 @@ prompts:
   - 'Generate one very interesting fact about {{topic}}'
 
 providers:
-  - id: azureopenai:chat:gpt-35-turbo-deployment1
+  - model: azureopenai:chat:gpt-35-turbo-deployment1
     config:
       apiHost: 'your-org.openai.azure.com'
 
@@ -20,6 +20,6 @@ tests:
       - type: similar
         value: Bananas are naturally radioactive.
         provider:
-          id: azureopenai:embeddings:ada-deployment1
+          model: azureopenai:embeddings:ada-deployment1
           config:
             apiHost: 'your-org.openai.azure.com'
diff --git a/examples/cohere/simple_config.yaml b/examples/cohere/simple_config.yaml
@@ -2,12 +2,12 @@ prompts:
   - "Write a tweet about {{topic}}"
 
 providers:
-  - id: cohere:command
+  - model: cohere:command
     config:
       temperature: 0.5
       prompt_truncation: AUTO
       connectors:
-        - id: web-search
+        - model: web-search
       showSearchQueries: true
 
 tests:

diff --git a/examples/custom-provider/promptfooconfig.yaml b/examples/custom-provider/promptfooconfig.yaml
@@ -4,11 +4,11 @@ tests: vars.csv
 # To compare two of the same provider, you can do the following:
 #
 # providers:
-#   - customProvider.js:
-#       id: custom-provider-hightemp
-#       config:
-#         temperature: 1.0
-#   - customProvider.js:
-#       id: custom-provider-lowtemp
-#       config:
-#         temperature: 0
+#   - model: customProvider.js
+#     label: custom-provider-hightemp
+#     config:
+#       temperature: 1.0
+#   - model: customProvider.js
+#     label: custom-provider-lowtemp
+#     config:
+#       temperature: 0
diff --git a/examples/external-provider-config/gpt-3.5.yaml b/examples/external-provider-config/gpt-3.5.yaml
@@ -1,4 +1,4 @@
-id: 'openai:chat:gpt-3.5-turbo-0613'
+model: 'openai:chat:gpt-3.5-turbo-0613'
 config:
   functions:
     [

diff --git a/examples/gemma-vs-llama/promptfooconfig.yaml b/examples/gemma-vs-llama/promptfooconfig.yaml
@@ -2,15 +2,15 @@ prompts:
   - "{{message}}"
 
 providers:
-  - id: replicate:meta/llama-2-7b-chat
+  - model: replicate:meta/llama-2-7b-chat
     config:
       temperature: 0.01 # minimum temperature
       max_new_tokens: 1024
       prompt:
         prefix: "[INST] "
         suffix: "[/INST] "
 
-  - id: replicate:cjwbw/gemma-7b-it:2790a695e5dcae15506138cc4718d1106d0d475e6dca4b1d43f42414647993d5
+  - model: replicate:cjwbw/gemma-7b-it:2790a695e5dcae15506138cc4718d1106d0d475e6dca4b1d43f42414647993d5
     config:
       temperature: 0.01
       max_new_tokens: 1024

diff --git a/examples/gemma-vs-mistral/promptfooconfig.yaml b/examples/gemma-vs-mistral/promptfooconfig.yaml
@@ -6,23 +6,23 @@ defaultTest:
     transform: output.trim()
 
 providers:
-  - id: replicate:mistralai/mistral-7b-instruct-v0.2
+  - model: replicate:mistralai/mistral-7b-instruct-v0.2
     config:
       temperature: 0.01
       max_new_tokens: 1024
       prompt:
         prefix: "<s>[INST] "
         suffix: " [/INST]"
 
-  - id: replicate:mistralai/mixtral-8x7b-instruct-v0.1
+  - model: replicate:mistralai/mixtral-8x7b-instruct-v0.1
     config:
       temperature: 0.01
       max_new_tokens: 1024
       prompt:
         prefix: "<s>[INST] "
         suffix: " [/INST]"
 
-  - id: replicate:cjwbw/gemma-7b-it:2790a695e5dcae15506138cc4718d1106d0d475e6dca4b1d43f42414647993d5
+  - model: replicate:cjwbw/gemma-7b-it:2790a695e5dcae15506138cc4718d1106d0d475e6dca4b1d43f42414647993d5
     config:
       temperature: 0.01
       max_new_tokens: 1024

diff --git a/examples/google-aistudio-gemini/promptfooconfig.yaml b/examples/google-aistudio-gemini/promptfooconfig.yaml
@@ -3,7 +3,7 @@ prompts:
   - "Write a very concise, funny tweet about {{topic}}"
 
 providers:
-  - id: google:gemini-pro
+  - model: google:gemini-pro
     config:
       generationConfig:
         temperature: 0

diff --git a/examples/gpt-3.5-temperature-comparison/promptfooconfig.yaml b/examples/gpt-3.5-temperature-comparison/promptfooconfig.yaml
@@ -2,14 +2,14 @@ prompts:
   - 'Respond to the following instruction: {{message}}'
 
 providers:
-  - openai:gpt-3.5-turbo-0613:
-      id: openai-gpt-3.5-turbo-lowtemp
-      config:
-        temperature: 0
-  - openai:gpt-3.5-turbo-0613:
-      id: openai-gpt-3.5-turbo-hightemp
-      config:
-        temperature: 1
+  - model: openai:gpt-3.5-turbo-0613
+    label: openai-gpt-3.5-turbo-lowtemp
+    config:
+      temperature: 0
+  - model: openai:gpt-3.5-turbo-0613
+    label: openai-gpt-3.5-turbo-hightemp
+    config:
+      temperature: 1
 
 tests:
   - vars:

diff --git a/examples/huggingface-inference-endpoint/promptfooconfig.yaml b/examples/huggingface-inference-endpoint/promptfooconfig.yaml
@@ -4,7 +4,7 @@ prompts:
   - "Write a tweet about {{topic}}:"
 
 providers:
-  - id: huggingface:text-generation:gemma-7b-it
+  - model: huggingface:text-generation:gemma-7b-it
     config:
       apiEndpoint: https://v9igsezez4ei3cq4.us-east-1.aws.endpoints.huggingface.cloud
       # apiKey: abc123   # Or set HF_API_TOKEN environment variable

diff --git a/examples/llama-gpt-comparison/promptfooconfig.yaml b/examples/llama-gpt-comparison/promptfooconfig.yaml
@@ -3,23 +3,27 @@ prompts:
   prompts/completion_prompt.txt: completion_prompt
 
 providers:
-  - openai:gpt-3.5-turbo-0613:
-      id: openai-gpt-3.5-turbo-lowtemp
-      prompts: chat_prompt
-      config:
-        temperature: 0
-        max_tokens: 128
-  - openai:gpt-3.5-turbo-0613:
-      id: openai-gpt-3.5-turbo-hightemp
-      prompts: chat_prompt
-      config:
-        temperature: 1
-        max_tokens: 128
-  - replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48:
-      prompts: completion_prompt
-      config:
-        temperature: 0.01 # minimum temperature
-        max_length: 128
+  - model: openai:gpt-3.5-turbo-0613
+    label: openai-gpt-3.5-turbo-lowtemp
+    prompts: 
+      - chat_prompt
+    config:
+      temperature: 0
+      max_tokens: 128
+  - model: openai:gpt-3.5-turbo-0613
+    label: openai-gpt-3.5-turbo-hightemp
+    prompts: 
+      - chat_prompt
+    config:
+      temperature: 1
+      max_tokens: 128
+  - model: replicate:meta/llama70b-v2-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3
+    label: llama70b-v2-chat
+    prompts: 
+      - completion_prompt
+    config:
+      temperature: 0.01 # minimum temperature
+      max_length: 128
 
 tests:
   - vars:

diff --git a/examples/llama-gpt-comparison/prompts/chat_prompt.json b/examples/llama-gpt-comparison/prompts/chat_prompt.json
@@ -1,4 +1,8 @@
 [
+  {
+    "role": "system",
+    "content": "You are a pirate"
+  },
   {
     "role": "user",
     "content": "{{message}}"

diff --git a/examples/llama-gpt-comparison/prompts/completion_prompt.txt b/examples/llama-gpt-comparison/prompts/completion_prompt.txt
@@ -1,2 +1,4 @@
-User: {{message}}
-Assistant:
+[INST] <<SYS>>
+You are a pirate
+<</SYS>>
+{{message}}
diff --git a/examples/mistral-llama-comparison/promptfooconfig.yaml b/examples/mistral-llama-comparison/promptfooconfig.yaml
@@ -3,22 +3,25 @@ prompts:
   prompts/llama_prompt.txt: llama_prompt
 
 providers:
-  - huggingface:text-generation:mistralai/Mistral-7B-Instruct-v0.1:
-      prompts: mistral_prompt
-      config:
-        temperature: 0.01
-        max_new_tokens: 128
-  - replicate:mistralai/mixtral-8x7b-instruct-v0.1:2b56576fcfbe32fa0526897d8385dd3fb3d36ba6fd0dbe033c72886b81ade93e:
-      prompts: mistral_prompt
-      config:
-        temperature: 0.01
-        max_new_tokens: 128
-        prompt_template: '{prompt}'
-  - replicate:meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e:
-      prompts: llama_prompt
-      config:
-        temperature: 0.01
-        max_new_tokens: 128
+  - model: huggingface:text-generation:mistralai/Mistral-7B-Instruct-v0.1
+    prompts: 
+      - mistral_prompt
+    config:
+      temperature: 0.01
+      max_new_tokens: 128
+  - model: replicate:mistralai/mixtral-8x7b-instruct-v0.1
+    prompts: 
+      - mistral_prompt
+    config:
+      temperature: 0.01
+      max_new_tokens: 128
+      prompt_template: '{prompt}'
+  - model: replicate:meta/llama-2-7b-chat:2d19859030ff705a87c746f7e96eea03aefb71f166725aee39692f1476566d48
+    prompts: 
+      - llama_prompt
+    config:
+      temperature: 0.01
+      max_new_tokens: 128
 
 tests:
   - vars:

diff --git a/examples/ollama-comparison/promptfooconfig.yaml b/examples/ollama-comparison/promptfooconfig.yaml
@@ -3,16 +3,18 @@ prompts:
   prompts/llama_prompt.txt: llama_prompt
 
 providers:
-  - ollama:llama2:
-      prompts: llama_prompt
-      config:
-        num_predict: 1024
-  - ollama:llama2-uncensored:
-      prompts: llama_prompt
-      config:
-        num_predict: 1024
-  - openai:gpt-3.5-turbo:
-      prompts: openai_prompt
+  - id: ollama:llama2
+    prompts: 
+      - llama_prompt
+    config:
+      num_predict: 1024
+  - id: ollama:llama2-uncensored
+    prompts: 
+      - llama_prompt
+    config:
+      num_predict: 1024
+  - id: openai:gpt-3.5-turbo
+    prompts: openai_prompt
 
 defaultTest:
   assert:

diff --git a/examples/perplexity.ai-example/promptfooconfig.yaml b/examples/perplexity.ai-example/promptfooconfig.yaml
@@ -3,8 +3,8 @@ prompts:
 
 providers:
   # Compare GPT 3.5 vs Perplexity 70B
-  - id: openai:chat:gpt-3.5-turbo-0613
-  - id: openai:chat:pplx-70b-online
+  - model: openai:chat:gpt-3.5-turbo-0613
+  - model: openai:chat:pplx-70b-online
     config:
       apiBaseUrl: https://api.perplexity.ai
       apiKeyEnvar: PERPLEXITY_API_KEY

diff --git a/examples/python-provider/promptfooconfig.yaml b/examples/python-provider/promptfooconfig.yaml
@@ -3,7 +3,7 @@ prompts:
   - "Write a very concise, funny tweet about {{topic}}"
 
 providers:
-  - id: python:provider.py   # or provider_async.py
+  - model: python:provider.py   # or provider_async.py
     config:
       someOption: foobar
 

diff --git a/examples/replicate-lifeboat/promptfooconfig.yaml b/examples/replicate-lifeboat/promptfooconfig.yaml
@@ -2,12 +2,12 @@ prompts:
   - 'Respond to the user concisely: {{message}}'
 
 providers:
-  - id: openai:chat:gpt-3.5-turbo
+  - model: openai:chat:gpt-3.5-turbo
     config:
       apiKey: '...'
       temperature: 0.01
       max_tokens: 512
-  - id: openai:chat:meta/llama-2-70b-chat
+  - model: openai:chat:meta/llama-2-70b-chat
     config:
       apiKey: '...'
       apiBaseUrl: https://openai-proxy.replicate.com

diff --git a/site/docs/configuration/expected-outputs/model-graded.md b/site/docs/configuration/expected-outputs/model-graded.md
@@ -115,7 +115,7 @@ tests:
 
 ## Examples (comparison)
 
-The `select-best` assertion type is used to compare multiple outputs in the same TestCase row and select the one that best meets a specified criterion. 
+The `select-best` assertion type is used to compare multiple outputs in the same TestCase row and select the one that best meets a specified criterion.
 
 Here's an example of how to use `select-best` in a configuration file:
 
@@ -178,7 +178,7 @@ Use the `provider.config` field to set custom parameters:
 
 ```yaml
 provider:
-  - id: openai:gpt-3.5-turbo
+  - model: openai:gpt-3.5-turbo
     config:
       temperature: 0
 ```

diff --git a/site/docs/configuration/parameters.md b/site/docs/configuration/parameters.md
@@ -131,12 +131,12 @@ prompts:
   prompts/llama_completion_prompt.txt: llama_completion_prompt
 
 providers:
-  - openai:gpt-3.5-turbo-0613:
-      prompts: gpt_chat_prompt
-  - openai:gpt-4-turbo-0613:
-      prompts: gpt_chat_prompt
-  - replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48:
-      prompts: llama_completion_prompt
+  - model: openai:gpt-3.5-turbo-0613
+    prompts: gpt_chat_prompt
+  - model: openai:gpt-4-turbo-0613
+    prompts: gpt_chat_prompt
+  - model: replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48
+    prompts: llama_completion_prompt
 ```
 
 In this configuration, the `gpt_chat_prompt` is used for both GPT-3.5 and GPT-4 models, while the `llama_completion_prompt` is used for the Llama v2 model. The prompts are defined in separate files within the `prompts` directory.

diff --git a/site/docs/guides/azure-vs-openai.md b/site/docs/guides/azure-vs-openai.md
@@ -45,8 +45,8 @@ Edit your `promptfooconfig.yaml` to include both OpenAI and Azure OpenAI as prov
 
 ```yaml
 providers:
-  - id: openai:chat:gpt-3.5-turbo
-  - id: azureopenai:chat:my-gpt-35-turbo-deployment
+  - model: openai:chat:gpt-3.5-turbo
+  - model: azureopenai:chat:my-gpt-35-turbo-deployment
     config:
       apiHost: myazurehost.openai.azure.com
 ```
@@ -59,11 +59,11 @@ For each provider, you may configure additional parameters such as `temperature`
 
 ```yaml
 providers:
-  - id: openai:chat:gpt-3.5-turbo
+  - model: openai:chat:gpt-3.5-turbo
     config:
       temperature: 0
       max_tokens: 128
-  - id: azureopenai:chat:my-gpt-35-turbo-deployment
+  - model: azureopenai:chat:my-gpt-35-turbo-deployment
     config:
       apiHost: your_azure_openai_host
       temperature: 0