ollama · richardanaya · May 19, 2024 · May 24, 2024 · Jun 1, 2024 · Jun 15, 2024
diff --git a/api/types.go b/api/types.go
@@ -67,6 +67,9 @@ type GenerateRequest struct {
 	// Format specifies the format to return a response in.
 	Format string `json:"format"`
 
+	// Grammar specifies the GBNF grammar string to constrain generation output.
+	Grammar string `json:"grammar"`
+
 	// KeepAlive controls how long the model will stay loaded in memory following
 	// this request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
@@ -94,6 +97,9 @@ type ChatRequest struct {
 	// Format is the format to return the response in (e.g. "json").
 	Format string `json:"format"`
 
+	// Grammar specifies the GBNF grammar string to constrain generation output.
+	Grammar string `json:"grammar"`
+
 	// KeepAlive controls how long the model will stay loaded into memory
 	// followin the request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`

diff --git a/docs/api.md b/docs/api.md
@@ -44,6 +44,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
 Advanced parameters (optional):
 
 - `format`: the format to return a response in. Currently the only accepted value is `json`
+- `grammar`: the [GBNF grammar](https://github.com/ggerganov/llama.cpp/tree/master/grammars) to constrain generated output to
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `system`: system message to (overrides what is defined in the `Modelfile`)
 - `template`: the prompt template to use (overrides what is defined in the `Modelfile`)
@@ -162,6 +163,21 @@ curl http://localhost:11434/api/generate -d '{
 }'
 ```
 
+#### Request (GBNF mode)
+
+> When `grammar` is set to a [GBNF grammar](https://github.com/ggerganov/llama.cpp/tree/master/grammars) output will be constrained to the grammar's rules. This method does not rely upon the prompt containing references to how it should output.
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama3",
+  "prompt": "Are llamas amazing?",
+  "grammar": "root ::= \"yes\" | \"no\"",
+  "stream": false
+}'
+```
+
 ##### Response
 
 ```json