Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions PACKAGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ A BOM is provided that can be used to define the versions of all Semantic Kernel
`semantickernel-aiservices-openai`
: Provides a connector that can be used to interact with the OpenAI API.

`semantickernel-aiservices-voyageai`
: Provides connectors for VoyageAI's embedding and reranking services, including text embeddings, contextualized embeddings, multimodal embeddings, and document reranking.

## Example Configurations

### Example: OpenAI + SQLite
Expand Down Expand Up @@ -72,5 +75,36 @@ POM XML for a simple project that uses OpenAI.
</project>
```

### Example: VoyageAI Embeddings and Reranking

POM XML for a project that uses VoyageAI for embeddings and reranking.

```xml

<project>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-bom</artifactId>
<version>${semantickernel.version}</version>
<scope>import</scope>
<type>pom</type>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-api</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-aiservices-voyageai</artifactId>
</dependency>
</dependencies>
</project>
```



89 changes: 89 additions & 0 deletions aiservices/voyageai/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-parent</artifactId>
<version>1.4.4-RC3-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

<artifactId>semantickernel-aiservices-voyageai</artifactId>
<name>Semantic Kernel VoyageAI Services</name>
<description>VoyageAI services for Semantic Kernel</description>

<dependencies>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-api</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-api-builders</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-api-ai-services</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-api-textembedding-services</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-api-exceptions</artifactId>
</dependency>
<dependency>
<groupId>com.microsoft.semantic-kernel</groupId>
<artifactId>semantickernel-api-localization</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<scope>compile</scope>
</dependency>

<!-- HTTP Client -->
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.12.0</version>
</dependency>

<!-- Reactive Streams -->
<dependency>
<groupId>io.projectreactor</groupId>
<artifactId>reactor-core</artifactId>
</dependency>

<!-- Logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>

<!-- Test Dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
// Copyright (c) Microsoft. All rights reserved.
package com.microsoft.semantickernel.aiservices.voyageai.contextualizedembedding;

import com.microsoft.semantickernel.aiservices.voyageai.core.VoyageAIClient;
import com.microsoft.semantickernel.aiservices.voyageai.core.VoyageAIModels;
import com.microsoft.semantickernel.orchestration.PromptExecutionSettings;
import com.microsoft.semantickernel.services.textembedding.Embedding;
import com.microsoft.semantickernel.services.textembedding.TextEmbeddingGenerationService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.publisher.Mono;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;

/**
* VoyageAI contextualized embedding generation service.
* Generates embeddings that capture both local chunk details and global document-level metadata.
* Supports models like voyage-3.
*/
public class VoyageAIContextualizedEmbeddingGenerationService implements TextEmbeddingGenerationService {

private static final Logger LOGGER = LoggerFactory.getLogger(VoyageAIContextualizedEmbeddingGenerationService.class);

private final VoyageAIClient client;
private final String modelId;
private final String serviceId;

/**
* Creates a new instance of VoyageAI contextualized embedding generation service.
*
* @param client VoyageAI client
* @param modelId Model ID (e.g., "voyage-3")
* @param serviceId Optional service ID
*/
public VoyageAIContextualizedEmbeddingGenerationService(
VoyageAIClient client,
String modelId,
@Nullable String serviceId) {

if (client == null) {
throw new IllegalArgumentException("Client cannot be null");
}
if (modelId == null || modelId.trim().isEmpty()) {
throw new IllegalArgumentException("Model ID cannot be null or empty");
}

this.client = client;
this.modelId = modelId;
this.serviceId = serviceId != null ? serviceId : PromptExecutionSettings.DEFAULT_SERVICE_ID;
}

@Override
public String getServiceId() {
return serviceId;
}

@Override
public String getModelId() {
return modelId;
}

/**
* Generates contextualized embeddings for document chunks.
*
* @param inputs List of lists where each inner list contains document chunks
* @return A Mono containing a list of embeddings for all chunks across all documents
*/
public Mono<List<Embedding>> generateContextualizedEmbeddingsAsync(List<List<String>> inputs) {
if (inputs == null || inputs.isEmpty()) {
return Mono.just(Collections.emptyList());
}

LOGGER.debug("Generating contextualized embeddings for {} document groups using model {}",
inputs.size(), modelId);

VoyageAIModels.ContextualizedEmbeddingRequest request =
new VoyageAIModels.ContextualizedEmbeddingRequest();
request.setInputs(inputs);
request.setModel(modelId);

return client.sendRequestAsync(
"contextualizedembeddings",
request,
VoyageAIModels.ContextualizedEmbeddingResponse.class)
.map(response -> {
List<Embedding> embeddings = new ArrayList<>();
// Parse nested data structure: {"data":[{"data":[{"embedding":[...]}]}]}
for (VoyageAIModels.ContextualizedEmbeddingDataList dataList : response.getData()) {
for (VoyageAIModels.EmbeddingDataItem item : dataList.getData()) {
embeddings.add(new Embedding(item.getEmbedding()));
}
}

LOGGER.debug("Received {} contextualized embeddings from VoyageAI", embeddings.size());
return embeddings;
});
}

/**
* Generates embeddings for the given text.
* For standard text embedding, wraps the data as a single input.
*
* @param data The text to generate embeddings for
* @return A Mono that completes with the embedding
*/
@Override
public Mono<Embedding> generateEmbeddingAsync(String data) {
return generateEmbeddingsAsync(Arrays.asList(data))
.flatMap(embeddings -> {
if (embeddings.isEmpty()) {
return Mono.empty();
}
return Mono.just(embeddings.get(0));
});
}

/**
* Generates embeddings for the given texts.
* Each text is treated as a separate document for contextualized embeddings.
*
* @param data The texts to generate embeddings for
* @return A Mono that completes with the list of embeddings
*/
@Override
public Mono<List<Embedding>> generateEmbeddingsAsync(List<String> data) {
if (data == null || data.isEmpty()) {
return Mono.just(Collections.emptyList());
}

// Convert each string to a single-element list for contextualized embeddings
List<List<String>> inputs = new ArrayList<>();
for (String text : data) {
inputs.add(Arrays.asList(text));
}

return generateContextualizedEmbeddingsAsync(inputs);
}

/**
* Creates a builder for VoyageAI contextualized embedding generation service.
*
* @return A new builder instance
*/
public static Builder builder() {
return new Builder();
}

/**
* Builder for {@link VoyageAIContextualizedEmbeddingGenerationService}.
*/
public static class Builder {
private VoyageAIClient client;
private String modelId;
private String serviceId;

/**
* Sets the VoyageAI client.
*
* @param client VoyageAI client
* @return This builder
*/
public Builder withClient(VoyageAIClient client) {
this.client = client;
return this;
}

/**
* Sets the model ID.
*
* @param modelId Model ID (e.g., "voyage-3")
* @return This builder
*/
public Builder withModelId(String modelId) {
this.modelId = modelId;
return this;
}

/**
* Sets the service ID.
*
* @param serviceId Service ID
* @return This builder
*/
public Builder withServiceId(String serviceId) {
this.serviceId = serviceId;
return this;
}

/**
* Builds the VoyageAI contextualized embedding generation service.
*
* @return A new instance of VoyageAIContextualizedEmbeddingGenerationService
*/
public VoyageAIContextualizedEmbeddingGenerationService build() {
return new VoyageAIContextualizedEmbeddingGenerationService(client, modelId, serviceId);
}
}
}
Loading