From ee5efb26d2e278165ad55b52917438400f293105 Mon Sep 17 00:00:00 2001
From: Emi <emi@sourcegraph.com>
Date: Tue, 20 May 2025 22:40:40 -0700
Subject: [PATCH 1/3] modelconfig: document new OpenAI-compatible
 configurations

Documents the new `openaicompatible` model configurations supported as part of https://linear.app/sourcegraph/issue/CORE-1019/enablement-ga-self-hosted-models-additional-openai-compatible

Signed-off-by: Emi <emi@sourcegraph.com>
---
 .../cody/enterprise/model-config-examples.mdx | 41 +++++++++++++++++++
 docs/cody/enterprise/model-configuration.mdx  | 11 ++++-
 2 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx
index 01e9af7bb..1ff6c48e1 100644
--- a/docs/cody/enterprise/model-config-examples.mdx
+++ b/docs/cody/enterprise/model-config-examples.mdx
@@ -505,6 +505,47 @@ In the configuration above,
     -   Set `clientSideConfig.openaicompatible` to `{}` to indicate to Cody clients that these models are OpenAI-compatible, ensuring the appropriate code paths are utilized
     -   Designate these models as the default choices for chat and autocomplete, respectively
 
+## Disabling legacy completions
+
+Available in Sourcegraph 6.4+ and 6.3.2692
+
+By default, Cody will send Autocomplete requests to the legacy OpenAI /completions endpoint (i.e. for pure-inference requests) - if your OpenAi-compatible API endpoint supports only /chat/completions, you may disable the use of the legacy completions endpoint by adding the following above your serverSideConfig endpoints list:
+
+```
+"serverSideConfig": {
+  "type": "openaicompatible",
+  "useLegacyCompletions": false,
+  // ^ add this to disable /completions and make Cody only use /chat/completions
+  "endpoints": [
+    {
+      "url": "https://api-inference.huggingface.co/models/meta-llama/CodeLlama-7b-hf/v1/",
+      "accessToken": "token"
+    }
+  ]
+}
+```
+
+## Sending custom HTTP headers
+
+Available in Sourcegraph 6.4+ and 6.3.2692
+
+By default, Cody will only send an Authorization: Bearer <accessToken> header to OpenAI-compatible endpoints. You may configure custom HTTP headers if you like under the url of endpoints:
+
+```
+"serverSideConfig": {
+  "type": "openaicompatible",
+  "endpoints": [
+    {
+      "url": "https://api-inference.huggingface.co/models/meta-llama/CodeLlama-7b-hf/v1/",
+      "headers": { "X-api-key": "foo", "My-Custom-Http-Header": "bar" },
+      // ^ add this to configure custom headers
+    }
+  ]
+}
+```
+
+**Note:** when using custom headers, both accessToken and accessTokenQuery configuration settings are ignored.
+
 </Accordion>
 
 <Accordion title="Google Gemini">
diff --git a/docs/cody/enterprise/model-configuration.mdx b/docs/cody/enterprise/model-configuration.mdx
index 78eacca90..f37dfbd2a 100644
--- a/docs/cody/enterprise/model-configuration.mdx
+++ b/docs/cody/enterprise/model-configuration.mdx
@@ -249,13 +249,22 @@ For OpenAI reasoning models, the `reasoningEffort` field value corresponds to th
       "displayName": "huggingface",
       "serverSideConfig": {
         "type": "openaicompatible",
+        // optional: disable the use of /completions for autocomplete requests, instead using
+        // only /chat/completions. (available in Sourcegraph 6.4+ and 6.3.2692)
+        //
+        // "useLegacyCompletions": false,
         "endpoints": [
           {
             "url": "https://api-inference.huggingface.co/models/meta-llama/CodeLlama-7b-hf/v1/",
             "accessToken": "token"
+
+            // optional: send custom headers (in which case accessToken above is not used)
+            // (available in Sourcegraph 6.4+ and 6.3.2692)
+            //
+            // "headers": { "X-api-key": "foo", "My-Custom-Http-Header": "bar" },
           }
         ]
-      }
+     }
     }
   ],
   "modelOverrides": [

From cfde40f9faf0589356c0cc41d29b7c44fd7f99b4 Mon Sep 17 00:00:00 2001
From: Maedah Batool <me@MaedahBatool.com>
Date: Wed, 21 May 2025 12:29:18 -0700
Subject: [PATCH 2/3] fix buuld errors

---
 docs/cody/enterprise/model-config-examples.mdx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx
index 1ff6c48e1..e43ce5067 100644
--- a/docs/cody/enterprise/model-config-examples.mdx
+++ b/docs/cody/enterprise/model-config-examples.mdx
@@ -318,8 +318,8 @@ In the configuration above,
 -   Set up a provider override for OpenAI, routing requests for this provider directly to the specified OpenAI endpoint (bypassing Cody Gateway)
 -   Add three OpenAI models:
     -   `"openai::2024-02-01::gpt-4o"` with chat capability - used as a default model for chat
-    -   `"openai::unknown::gpt-4.1-nano"` with chat, edit and autocomplete capabilities - used as a default model for fast chat and autocomplete 
-    -   `"openai::unknown::o3"` with chat and reasoning capabilities - o-series model that supports thinking, can be used for chat (note: to enable thinking, model override should include "reasoning" capability and have "reasoningEffort" defined). 
+    -   `"openai::unknown::gpt-4.1-nano"` with chat, edit and autocomplete capabilities - used as a default model for fast chat and autocomplete
+    -   `"openai::unknown::o3"` with chat and reasoning capabilities - o-series model that supports thinking, can be used for chat (note: to enable thinking, model override should include "reasoning" capability and have "reasoningEffort" defined).
 
 </Accordion>
 
@@ -511,7 +511,7 @@ Available in Sourcegraph 6.4+ and 6.3.2692
 
 By default, Cody will send Autocomplete requests to the legacy OpenAI /completions endpoint (i.e. for pure-inference requests) - if your OpenAi-compatible API endpoint supports only /chat/completions, you may disable the use of the legacy completions endpoint by adding the following above your serverSideConfig endpoints list:
 
-```
+```json
 "serverSideConfig": {
   "type": "openaicompatible",
   "useLegacyCompletions": false,
@@ -529,9 +529,9 @@ By default, Cody will send Autocomplete requests to the legacy OpenAI /completio
 
 Available in Sourcegraph 6.4+ and 6.3.2692
 
-By default, Cody will only send an Authorization: Bearer <accessToken> header to OpenAI-compatible endpoints. You may configure custom HTTP headers if you like under the url of endpoints:
+By default, Cody will only send an `Authorization: Bearer <accessToken>` header to OpenAI-compatible endpoints. You may configure custom HTTP headers if you like under the url of endpoints:
 
-```
+```json
 "serverSideConfig": {
   "type": "openaicompatible",
   "endpoints": [
@@ -786,7 +786,7 @@ Provider override `serverSideConfig` fields:
 Provisioned throughput for Amazon Bedrock models can be configured using the `"awsBedrockProvisionedThroughput"` server-side configuration type. Refer to the [Model Overrides](/cody/enterprise/model-configuration#model-overrides) section for more details.
 
 <Callout type="note">
-	If using [IAM roles for EC2 / instance role binding](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html), 
+	If using [IAM roles for EC2 / instance role binding](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html),
   you may need to increase the [HttpPutResponseHopLimit
 ](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_InstanceMetadataOptionsRequest.html#:~:text=HttpPutResponseHopLimit) instance metadata option to a higher value (e.g., 2) to ensure that the metadata service can be accessed from the frontend container running in the EC2 instance. See [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-IMDS-existing-instances.html) for instructions.
 </Callout>

From 4541c10cd885114a0ef7e5b485ea7488323a1c97 Mon Sep 17 00:00:00 2001
From: Maedah Batool <me@MaedahBatool.com>
Date: Wed, 21 May 2025 12:32:47 -0700
Subject: [PATCH 3/3] fix formatting

---
 docs/cody/enterprise/model-config-examples.mdx | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx
index e43ce5067..06c7f30a4 100644
--- a/docs/cody/enterprise/model-config-examples.mdx
+++ b/docs/cody/enterprise/model-config-examples.mdx
@@ -509,7 +509,7 @@ In the configuration above,
 
 Available in Sourcegraph 6.4+ and 6.3.2692
 
-By default, Cody will send Autocomplete requests to the legacy OpenAI /completions endpoint (i.e. for pure-inference requests) - if your OpenAi-compatible API endpoint supports only /chat/completions, you may disable the use of the legacy completions endpoint by adding the following above your serverSideConfig endpoints list:
+By default, Cody will send Autocomplete requests to the legacy OpenAI /completions endpoint (i.e. for pure-inference requests) - if your OpenAI-compatible API endpoint supports only /chat/completions, you may disable the use of the legacy completions endpoint by adding the following above your serverSideConfig endpoints list:
 
 ```json
 "serverSideConfig": {
@@ -527,9 +527,10 @@ By default, Cody will send Autocomplete requests to the legacy OpenAI /completio
 
 ## Sending custom HTTP headers
 
-Available in Sourcegraph 6.4+ and 6.3.2692
 
-By default, Cody will only send an `Authorization: Bearer <accessToken>` header to OpenAI-compatible endpoints. You may configure custom HTTP headers if you like under the url of endpoints:
+<Callout type="info">Available in Sourcegraph v6.4+ and v6.3.2692</Callout>
+
+By default, Cody will only send an `Authorization: Bearer <accessToken>` header to OpenAI-compatible endpoints. You may configure custom HTTP headers if you like under the URL of endpoints:
 
 ```json
 "serverSideConfig": {
@@ -544,7 +545,7 @@ By default, Cody will only send an `Authorization: Bearer <accessToken>` header
 }
 ```
 
-**Note:** when using custom headers, both accessToken and accessTokenQuery configuration settings are ignored.
+<Callout type="note">When using custom headers, both `accessToken` and `accessTokenQuery` configuration settings are ignored.</Callout>
 
 </Accordion>