diff --git a/docs/ai-integration/generating-embeddings/assets/add-ai-task-4-script.png b/docs/ai-integration/generating-embeddings/assets/add-ai-task-4-script.png
index 65c2868084..67e94d45df 100644
Binary files a/docs/ai-integration/generating-embeddings/assets/add-ai-task-4-script.png and b/docs/ai-integration/generating-embeddings/assets/add-ai-task-4-script.png differ
diff --git a/docs/ai-integration/generating-embeddings/assets/add-ai-task-4.png b/docs/ai-integration/generating-embeddings/assets/add-ai-task-4.png
index 4977ab669e..fa47cfdf5e 100644
Binary files a/docs/ai-integration/generating-embeddings/assets/add-ai-task-4.png and b/docs/ai-integration/generating-embeddings/assets/add-ai-task-4.png differ
diff --git a/docs/ai-integration/generating-embeddings/_embeddings-generation-task-csharp.mdx b/docs/ai-integration/generating-embeddings/content/_embeddings-generation-task-csharp.mdx
similarity index 64%
rename from docs/ai-integration/generating-embeddings/_embeddings-generation-task-csharp.mdx
rename to docs/ai-integration/generating-embeddings/content/_embeddings-generation-task-csharp.mdx
index 540d8fc90b..68198c5af1 100644
--- a/docs/ai-integration/generating-embeddings/_embeddings-generation-task-csharp.mdx
+++ b/docs/ai-integration/generating-embeddings/content/_embeddings-generation-task-csharp.mdx
@@ -6,27 +6,28 @@ import CodeBlock from '@theme/CodeBlock';
* In RavenDB, you can define AI tasks to automatically generate embeddings from your document content.
- These embeddings are then stored in [dedicated collections](../../ai-integration/generating-embeddings/embedding-collections.mdx) within the database,
- enabling [Vector search](../../ai-integration/vector-search/ravendb-as-vector-database.mdx) on your documents.
+ These embeddings are then stored in [dedicated collections](../../../ai-integration/generating-embeddings/embedding-collections.mdx) within the database,
+ enabling [Vector search](../../../ai-integration/vector-search/ravendb-as-vector-database.mdx) on your documents.
* This article explains how to configure such a task.
- It is recommended to first refer to this [Overview](../../ai-integration/generating-embeddings/overview.mdx#embeddings-generation---overview)
+ It is recommended to first refer to this [Overview](../../../ai-integration/generating-embeddings/overview.mdx#embeddings-generation---overview)
to understand the embeddings generation process flow.
* In this article:
- * [Configuring an embeddings generation task - from the Studio](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configuring-an-embeddings-generation-task---from-the-studio)
- * [Configuring an embeddings generation task - from the Client API](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configuring-an-embeddings-generation-task---from-the-client-api)
- * [Define source using PATHS](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configure-an-embeddings-generation-task---define-source-using-paths)
- * [Define source using SCRIPT](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configure-an-embeddings-generation-task---define-source-using-script)
- * [Chunking methods and tokens](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens)
- * [Syntax](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#syntax)
+ * [Configuring an embeddings generation task - from the Studio](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configuring-an-embeddings-generation-task---from-the-studio)
+ * [Configuring an embeddings generation task - from the Client API](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configuring-an-embeddings-generation-task---from-the-client-api)
+ * [Define source using PATHS](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configure-an-embeddings-generation-task---define-source-using-paths)
+ * [Define source using SCRIPT](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#configure-an-embeddings-generation-task---define-source-using-script)
+ * [Chunking methods and tokens](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens)
+ * [Syntax](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#syntax)
+
## Configuring an embeddings generation task - from the Studio
* **Define the general task settings**:
- 
+ 
1. **Name**
Enter a name for the task.
@@ -43,13 +44,13 @@ import CodeBlock from '@theme/CodeBlock';
**This identifier is used:**
* When querying embeddings generated by the task via a dynamic query.
- An example is available in [Querying pre-made embeddings](../../ai-integration/vector-search/vector-search-using-dynamic-query.mdx#querying-pre-made-embeddings-generated-by-tasks).
+ An example is available in [Querying pre-made embeddings](../../../ai-integration/vector-search/vector-search-using-dynamic-query.mdx#querying-pre-made-embeddings-generated-by-tasks).
* When indexing the embeddings generated by the task.
- An example is available in [Indexing pre-made text-embeddings](../../ai-integration/vector-search/vector-search-using-static-index.mdx#indexing-pre-made-text-embeddings).
- * In documents in the [Embeddings collection](../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-collection),
+ An example is available in [Indexing pre-made text-embeddings](../../../ai-integration/vector-search/vector-search-using-static-index.mdx#indexing-pre-made-text-embeddings).
+ * In documents in the [Embeddings collection](../../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-collection),
where the task identifier is used to identify the origin of each embedding.
- See how this identifier is used in the [Embeddings collection](../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-collection)
+ See how this identifier is used in the [Embeddings collection](../../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-collection)
documents that reference the generated embeddings.
3. **Regenerate**
@@ -57,48 +58,66 @@ import CodeBlock from '@theme/CodeBlock';
4. **Task state**
Enable/Disable the task.
5. **Responsible node**
- Select a node from the [Database group](../../studio/database/settings/manage-database-group.mdx) to be the responsible node for this task.
+ Select a node from the [Database group](../../../studio/database/settings/manage-database-group.mdx) to be the responsible node for this task.
6. **Connection string**
- Select a previously defined [AI connection string](../../ai-integration/connection-strings/connection-strings-overview.mdx) or create a new one.
+ Select a previously defined [AI connection string](../../../ai-integration/connection-strings/connection-strings-overview.mdx) or create a new one.
7. **Enable document expiration**
- This toggle appears only if the [Document expiration feature](../../studio/database/settings/document-expiration.mdx) is Not enabled in the database.
+ This toggle appears only if the [Document expiration feature](../../../studio/database/settings/document-expiration.mdx) is Not enabled in the database.
Enabling document expiration ensures that embeddings in the `@embeddings-cache` collection are automatically deleted when they expire.
8. **Save**
Click _Save_ to store the task definition or _Cancel_.
+
* **Define the embeddings source - using PATHS**:
- 
+ 
1. **Collection**
Enter or select the source document collection from the dropdown.
2. **Embeddings source**
- Select `Paths` to define the source content by specifying document properties.
- 3. **Source text path**
- Enter the property name from the document that contains the text for embedding generation.
- 4. **Chunking method**
- Select the method for splitting the source text into chunks.
- Learn more in [Chunking methods and tokens](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens).
- 5. **Max tokens per chunk**
- Enter the maximum number of tokens allowed per chunk (this depends on the service provider).
- 6. **Add path configuration**
+ Select `Paths` to define the source content by document properties.
+ 3. **Path configuration**
+ Specify which document properties to extract text from, and how the text should be chunked into embeddings.
+
+ * **Source text path**
+ Enter the property name from the document that contains the text for embedding generation.
+ * **Chunking method**
+ Select the method for splitting the source text into chunks.
+ Learn more in [Chunking methods and tokens](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens).
+ * **Max tokens per chunk**
+ Enter the maximum number of tokens allowed per chunk (this depends on the service provider).
+ * **Overlap tokens**
+ Enter the number of tokens to repeat at the start of each chunk from the end of the previous one.
+ This helps preserve context between chunks by carrying over some tokens from one to the next.
+ Applies only to the _"Plain Text: Split Paragraphs"_ and _"Markdown: Split Paragraphs"_ chunking methods.
+
+ 4. **Add path configuration**
Click to add the specified to the list.
+ 5. **List of paths**
+ Displays the document properties you added for embedding generation.
+
* **Define the embeddings source - using SCRIPT**:
- 
+ 
1. **Embeddings source**
Select `Script` to define the source content and chunking methods using a JavaScript script.
2. **Script**
- Refer to section [Chunking methods and tokens](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens) for available JavaScript methods.
- 3. **Chunking method**
+ Refer to section [Chunking methods and tokens](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens) for available JavaScript methods.
+ 3. **Default chunking method**
The selected chunking method will be used by default when no method is specified in the script.
e.g., when the script contains: `Name: this.Name`.
- 4. **Max tokens per chunk**:
- Enter the default value to use when no specific value is set for the chunking method in the script.
+ 4. **Default max tokens per chunk**:
+ Enter the default value to use when no specific value is set for the chunking method in the script.
+ This is the maximum number of tokens allowed per chunk (depends on the service provider).
+ 5. **Default overlap tokens**
+ Enter the default value to use when no specific value is set for the chunking method in the script.
+ This is the number of tokens to repeat at the start of each chunk from the end of the previous one.
+ Applies only to the _"Plain Text: Split Paragraphs"_ and _"Markdown: Split Paragraphs"_ chunking methods.
+
* **Define quantization and expiration -
for the generated embeddings from the source documents**:
- 
+ 
1. **Quantization**
Select the quantization method that RavenDB will apply to embeddings received from the service provider.
@@ -110,42 +129,42 @@ import CodeBlock from '@theme/CodeBlock';
Set the expiration period for documents stored in the `@embeddings-cache` collection.
These documents contain embeddings generated from the source documents, serving as a cache for these embeddings.
The default initial period is `90` days. This period may be extended when the source documents change.
- Learn more in [The embeddings cache collection](../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-cache-collection).
+ Learn more in [The embeddings cache collection](../../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-cache-collection).
3. **Regenerate embeddings**
This toggle is visible only when editing an existing task.
Toggle ON to regenerate embeddings for all documents in the collection, as specified by the _Paths_ or _Script_.
+
* **Define chunking method & expiration -
for the embedding generated from a search term in a vector search query**:
- 
+ 
1. **Querying**
This label indicates that this section configures parameters only for embeddings
generated by the task for **search terms** in vector search queries.
2. **Chunking method**
Select the method for splitting the search term into chunks.
- Learn more in [Chunking methods and tokens](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens).
+ Learn more in [Chunking methods and tokens](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens).
3. **Max tokens per chunk**
Enter the maximum number of tokens allowed per chunk (this depends on the service provider).
4. **Embeddings cache expiration**
Set the expiration period for documents stored in the `@embeddings-cache` collection.
These documents contain embeddings generated from the search terms, serving as a cache for these embeddings.
- The default period is `14` days. Learn more in [The embeddings cache collection](../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-cache-collection).
-
-
+ The default period is `14` days. Learn more in [The embeddings cache collection](../../../ai-integration/generating-embeddings/embedding-collections.mdx#the-embeddings-cache-collection).
## Configuring an embeddings generation task - from the Client API
#### Configure an embeddings generation task - define source using PATHS:
+
-
-{`// Define a connection string that will be used in the task definition:
+```csharp
+// Define a connection string that will be used in the task definition:
// ====================================================================
var connectionString = new AiConnectionString
-\{
+{
// Connection string name & identifier
Name = "ConnectionStringToOpenAI",
Identifier = "id-for-open-ai-connection-string",
@@ -155,7 +174,7 @@ var connectionString = new AiConnectionString
apiKey: "your-api-key",
endpoint: "https://api.openai.com/v1",
model: "text-embedding-3-small")
-\};
+};
// Deploy the connection string to the server:
// ===========================================
@@ -163,11 +182,10 @@ var putConnectionStringOp =
new PutConnectionStringOperation(connectionString);
var putConnectionStringResult = store.Maintenance.Send(putConnectionStringOp);
-
// Define the embeddings generation task:
// ======================================
var embeddingsTaskConfiguration = new EmbeddingsGenerationConfiguration
-\{
+{
// General info:
Name = "GetEmbeddingsFromOpenAI",
Identifier = "id-for-task-open-ai",
@@ -177,23 +195,27 @@ var embeddingsTaskConfiguration = new EmbeddingsGenerationConfiguration
// Embeddings source & chunking methods - using PATHS configuration:
Collection = "Categories",
EmbeddingsPathConfigurations = [
- new EmbeddingPathConfiguration() \{
+ new EmbeddingPathConfiguration() {
Path = "Name",
ChunkingOptions = new()
- \{
+ {
ChunkingMethod = ChunkingMethod.PlainTextSplit,
MaxTokensPerChunk = 2048
- \}
- \},
+ }
+ },
new EmbeddingPathConfiguration()
- \{
+ {
Path = "Description",
ChunkingOptions = new()
- \{
- ChunkingMethod = ChunkingMethod.PlainTextSplitLines,
- MaxTokensPerChunk = 2048
- \}
- \},
+ {
+ ChunkingMethod = ChunkingMethod.PlainTextSplitParagraphs,
+ MaxTokensPerChunk = 2048,
+
+ // 'OverlapTokens' is only applicable when ChunkingMethod is
+ // 'PlainTextSplitParagraphs' or 'MarkDownSplitParagraphs'
+ OverlapTokens = 128
+ }
+ },
],
// Quantization & expiration -
@@ -204,27 +226,28 @@ var embeddingsTaskConfiguration = new EmbeddingsGenerationConfiguration
// Chunking method and expiration -
// for the embeddings generated from search term in vector search query:
ChunkingOptionsForQuerying = new()
- \{
+ {
ChunkingMethod = ChunkingMethod.PlainTextSplit,
MaxTokensPerChunk = 2048
- \},
+ },
EmbeddingsCacheForQueryingExpiration = TimeSpan.FromDays(14)
-\};
+};
-// Deploy the connection string to the server:
-// ===========================================
+// Deploy the embeddings generation task to the server:
+// ====================================================
var addEmbeddingsGenerationTaskOp =
new AddEmbeddingsGenerationOperation(embeddingsTaskConfiguration);
var addAiIntegrationTaskResult = store.Maintenance.Send(addEmbeddingsGenerationTaskOp);
-`}
-
+```
+
#### Configure an embeddings generation task - define source using SCRIPT:
+
* To configure the source content using a script -
use the `EmbeddingsTransformation` object instead of the `EmbeddingsPathConfigurations` object.
@@ -234,66 +257,63 @@ var addAiIntegrationTaskResult = store.Maintenance.Send(addEmbeddingsGenerationT
Each KEY in the object represents a document field, and the VALUE is a text-splitting function that processes the field's content before generating embeddings.
* These methods ensure that the text chunks derived from document fields stay within the token limits required by the provider, preventing request rejection.
- Learn more in [Chunking methods and tokens](../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens).
+ Learn more in [Chunking methods and tokens](../../../ai-integration/generating-embeddings/embeddings-generation-task.mdx#chunking-methods-and-tokens).
* For example:
-
-{`// Source collection:
+```csharp
+// Source collection:
Collection = "Categories",
// Use 'EmbeddingsTransformation':
EmbeddingsTransformation = new EmbeddingsTransformation()
-\{
+{
// Define the script:
Script =
- @"embeddings.generate(\{
+ @"embeddings.generate({
// Process the document 'Name' field using method text.split().
// The text content will be split into chunks of up to 2048 tokens.
Name: text.split(this.Name, 2048),
- // Process the document 'Description' field using method text.splitLines().
+ // Process the document 'Description' field using method text.splitParagraphs().
// The text content will be split into chunks of up to 2048 tokens.
- Description: text.splitLines(this.Description, 2048)
- \});"
-\},
-`}
-
+ // 128 overlapping tokens will be repeated at the start of each chunk
+ // from the end of the previous one.
+ Description: text.splitParagraphs(this.Description, 2048, 128)
+ });"
+},
+```
-* If no chunking method is provided in the script,
- you can set the default chunking method and the maximum tokens per chunk to be used as follows:
+* If no chunking method is provided in the script, you can set default values as follows:
-
-{`Collection = "Categories",
+```csharp
+Collection = "Categories",
EmbeddingsTransformation = new EmbeddingsTransformation()
-\{
+{
Script =
- @"embeddings.generate(\{
+ @"embeddings.generate({
// No chunking method is specified here
Name: this.Name,
Description: this.Description
- \});",
+ });",
- // Specify the default chunking method and max tokens per chunk
- // to use in the script
+ // Specify the default chunking options to use in the script
ChunkingOptions = new ChunkingOptions()
- \{
+ {
ChunkingMethod = ChunkingMethod.PlainTextSplit,
MaxTokensPerChunk = 2048
- \}
-\},
-`}
-
+ }
+},
+```
-
## Chunking methods and tokens
**Tokens and processing limits**:
@@ -307,14 +327,14 @@ EmbeddingsTransformation = new EmbeddingsTransformation()
* To handle lengthy text, you can define chunking strategies in the task definition and specify the desired number of tokens per chunk.
Chunking splits large input texts into smaller, manageable chunks, each containing no more than the specified maximum number of tokens.
-* The maximum number of tokens per chunk depends on the AI service provider and the specific model defined in the [connection string](../../ai-integration/connection-strings/connection-strings-overview.mdx).
+* The maximum number of tokens per chunk depends on the AI service provider and the specific model defined in the [connection string](../../../ai-integration/connection-strings/connection-strings-overview.mdx).
While RavenDB does not tokenize text, it estimates the number of tokens for chunking purposes by dividing the text length by 4.
* The AI provider generates a single embedding for each chunk.
Depending on the maximum tokens per chunk setting, a single input text may result in multiple embeddings.
-
-**Available chunking methods**:
-
+
+**Available chunking methods**:
+
RavenDB offers several chunking methods that can be applied per source type.
These methods determine how input text is split before being sent to the provider.
@@ -341,7 +361,9 @@ These methods determine how input text is split before being sent to the provide
* `PlainText: Split Paragraphs`
Uses the Semantic Kernel _SplitPlainTextParagraphs_ method.
- Combines consecutive lines to form paragraphs while ensuring each paragraph is as complete as possible without exceeding the specified token limit.
+ Combines consecutive lines to form paragraphs while ensuring each paragraph is as complete as possible without exceeding the specified token limit.
+ Optionally, set an overlap between chunks using the _overlapTokens_ parameter, which repeats the last _n_ tokens from one chunk at the start of the next.
+ This helps preserve context continuity across paragraph boundaries.
**Applies to**:
Fields containing an array of plain text strings.
@@ -361,7 +383,10 @@ These methods determine how input text is split before being sent to the provide
* `Markdown: Split Paragraphs`
Uses the Semantic Kernel _SplitMarkdownParagraphs_ method.
Groups lines into coherent paragraphs at designated paragraph breaks while ensuring each paragraph remains within the specified token limit.
- Preserves markdown formatting to maintain structure.
+ Markdown formatting is preserved.
+ Optionally, set an overlap between chunks using the _overlapTokens_ parameter, which repeats the last _n_ tokens from one chunk at the start of the next.
+ This helps preserve context continuity across paragraph boundaries.
+
**Applies to**:
Fields containing an array of strings with markdown content.
@@ -380,57 +405,55 @@ These methods determine how input text is split before being sent to the provide
**Chunking method syntax for the JavaScript scripts**:
-
-{`// Available text-splitting methods:
+```javascript
+// Available text-splitting methods:
// =================================
// Plain text methods:
-text.split(text, maxTokensPerChunk);
-text.splitLines(text, maxTokensPerChunk);
-text.splitParagraphs(lines, maxTokensPerChunk);
+text.split(text | [text], maxTokensPerLine);
+text.splitLines(text | [text], maxTokensPerLine);
+text.splitParagraphs(line | [line], maxTokensPerLine, overlapTokens?);
// Markdown methods:
-markdown.splitLines(text, maxTokensPerChunk);
-markdown.splitParagraphs(lines, maxTokensPerChunk);
+markdown.splitLines(text | [text], maxTokensPerLine);
+markdown.splitParagraphs(line | [line], maxTokensPerLine, overlapTokens?);
// HTML processing:
-html.strip(htmlText, maxTokensPerChunk);
-`}
-
+html.strip(htmlText | [htmlText], maxTokensPerChunk);
+```
-| Parameter | Type | Description |
-|-----------------------|------------|----------------------------------------------|
-| **text** | `string` | A plain text or markdown string to split. |
-| **lines** | `string[]` | An array of text lines to split into chunks. |
-| **htmlText** | `string` | A string containing HTML content to process. |
-| **maxTokensPerChunk** | `number` | The maximum tokens allowed per chunk. |
-
-
+| Parameter | Type | Description |
+|------------------------------------------|-----------|------------------------------------------------------------------ |
+| **text** | `string` | A plain text or markdown string to split. |
+| **line** | `string` | A single line or paragraph of text. |
+| **[text] / [line]** | `string[]`| An array of text or lines to split into chunks. |
+| **htmlText** | `string` | A string containing HTML content to process. |
+| **maxTokensPerChunk / maxTokensPerLine** | `number` | The maximum number of tokens allowed per chunk.
Default is `512`. |
+| **overlapTokens** | `number` (optional) | The number of tokens to overlap between consecutive chunks. Helps preserve context continuity across chunks (e.g., between paragraphs).
Default is `0`. |
## Syntax
#### The embeddings generation task configuration:
-
-{`// The 'EmbeddingsGenerationConfiguration' class inherits from 'EtlConfiguration'
+```csharp
+// The 'EmbeddingsGenerationConfiguration' class inherits from 'EtlConfiguration'
// and provides the following specialized configurations for the embeddings generation task:
// =========================================================================================
public class EmbeddingsGenerationConfiguration : EtlConfiguration
-\{
- public string Identifier \{ get; set; \}
- public string Collection \{ get; set; \}
- public List EmbeddingsPathConfigurations \{ get; set; \}
- public EmbeddingsTransformation EmbeddingsTransformation \{ get; set; \}
- public VectorEmbeddingType Quantization \{ get; set; \}
- public ChunkingOptions ChunkingOptionsForQuerying \{ get; set; \}
- public TimeSpan EmbeddingsCacheExpiration \{ get; set; \} = TimeSpan.FromDays(90);
- public TimeSpan EmbeddingsCacheForQueryingExpiration \{ get; set; \} = TimeSpan.FromDays(14);
-\}
-`}
-
+{
+ public string Identifier { get; set; }
+ public string Collection { get; set; }
+ public List EmbeddingsPathConfigurations { get; set; }
+ public EmbeddingsTransformation EmbeddingsTransformation { get; set; }
+ public VectorEmbeddingType Quantization { get; set; }
+ public ChunkingOptions ChunkingOptionsForQuerying { get; set; }
+ public TimeSpan EmbeddingsCacheExpiration { get; set; } = TimeSpan.FromDays(90);
+ public TimeSpan EmbeddingsCacheForQueryingExpiration { get; set; } = TimeSpan.FromDays(14);
+}
+```
| Parameter | Type | Description |
@@ -441,58 +464,57 @@ public class EmbeddingsGenerationConfiguration : EtlConfiguration
-
-{`public class EmbeddingPathConfiguration
-\{
- public string Path \{ get; set; \}
- public ChunkingOptions ChunkingOptions \{ get; set; \}
-\}
+```csharp
+public class EmbeddingPathConfiguration
+{
+ public string Path { get; set; }
+ public ChunkingOptions ChunkingOptions { get; set; }
+}
public class ChunkingOptions
-\{
- public ChunkingMethod ChunkingMethod \{ get; set; \} // Default is PlainTextSplit
- public int MaxTokensPerChunk \{ get; set; \} = 512;
-\}
+{
+ public ChunkingMethod ChunkingMethod { get; set; } // Default is PlainTextSplit
+ public int MaxTokensPerChunk { get; set; } = 512;
+
+ // 'OverlapTokens' is only applicable when ChunkingMethod is
+ // 'PlainTextSplitParagraphs' or 'MarkDownSplitParagraphs'
+ public int OverlapTokens { get; set; } = 0;
+}
public enum ChunkingMethod
-\{
+{
PlainTextSplit,
PlainTextSplitLines,
PlainTextSplitParagraphs,
MarkDownSplitLines,
MarkDownSplitParagraphs,
HtmlStrip
-\}
+}
public class EmbeddingsTransformation
-\{
- public string Script \{ get; set; \}
- public ChunkingOptions ChunkingOptions \{get; set;\}
-\}
+{
+ public string Script { get; set; }
+ public ChunkingOptions ChunkingOptions {get; set;}
+}
public enum VectorEmbeddingType
-\{
+{
Single,
Int8,
Binary,
Text
-\}
-`}
-
+}
+```
+
#### Deploying the embeddings generation task:
-
-{`public AddEmbeddingsGenerationOperation(EmbeddingsGenerationConfiguration configuration);
-`}
-
+```csharp
+public AddEmbeddingsGenerationOperation(EmbeddingsGenerationConfiguration configuration);
+```
-
-
-
-
diff --git a/docs/ai-integration/generating-embeddings/embeddings-generation-task.mdx b/docs/ai-integration/generating-embeddings/embeddings-generation-task.mdx
index 42466130ca..94e49e1721 100644
--- a/docs/ai-integration/generating-embeddings/embeddings-generation-task.mdx
+++ b/docs/ai-integration/generating-embeddings/embeddings-generation-task.mdx
@@ -8,7 +8,7 @@ sidebar_position: 1
import LanguageSwitcher from "@site/src/components/LanguageSwitcher";
import LanguageContent from "@site/src/components/LanguageContent";
-import EmbeddingsGenerationTaskCsharp from './_embeddings-generation-task-csharp.mdx';
+import EmbeddingsGenerationTaskCsharp from './content/_embeddings-generation-task-csharp.mdx';
export const supportedLanguages = ["csharp"];
@@ -18,7 +18,6 @@ export const supportedLanguages = ["csharp"];
-
\ No newline at end of file
+-->