From 45b4ed0b21847097f2a4258cd7dab08db6e8ded8 Mon Sep 17 00:00:00 2001 From: Philip Blair Date: Thu, 26 Jul 2018 13:15:40 -0400 Subject: [PATCH 1/5] RD-2370: Update API model to support per-token embeddings --- .../apimodel/TextEmbeddingOptions.java | 33 +++++++++++++++++++ .../apimodel/TextEmbeddingResponse.java | 14 ++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java diff --git a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java new file mode 100644 index 000000000..6d88307b7 --- /dev/null +++ b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java @@ -0,0 +1,33 @@ +/****************************************************************************** + ** This data and information is proprietary to, and a valuable trade secret + ** of, Basis Technology Corp. It is given in confidence by Basis Technology + ** and may only be used as permitted under the license agreement under which + ** it has been distributed, and in no other way. + ** + ** Copyright (c) 2018 Basis Technology Corporation All rights reserved. + ** + ** The technical data and information provided herein are provided with + ** `limited rights', and the computer software provided herein is provided + ** with `restricted rights' as those terms are defined in DAR and ASPR + ** 7-104.9(a). + ******************************************************************************/ +package com.basistech.rosette.apimodel; + +import com.basistech.rosette.annotations.JacksonMixin; +import lombok.Builder; +import lombok.Value; + +/** + * Text embedding options + */ +@Value +@Builder +@JacksonMixin +public class TextEmbeddingOptions extends Options { + + /** + * @return whether embeddings should be returned for each token + */ + private Boolean perTokenEmbeddings; + +} diff --git a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingResponse.java b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingResponse.java index a6a89b841..6fb8f1d0f 100644 --- a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingResponse.java +++ b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingResponse.java @@ -33,7 +33,17 @@ public class TextEmbeddingResponse extends Response { /** - * @return the embedding vector as a list + * @return the document embedding vector as a list */ - private final List embedding; + private final List documentEmbedding; + + /** + * @return list of tokens, or {@code null} + */ + private final List tokens; + + /** + * @return list of per-token embeddings, 1:1 with tokens, or {@code null} + */ + private final List> tokenEmbeddings; } From a7d371bad072735f9d29eab074c65ea59173f026 Mon Sep 17 00:00:00 2001 From: Philip Blair Date: Mon, 30 Jul 2018 14:23:32 -0400 Subject: [PATCH 2/5] RD-2370: Rename option --- .../com/basistech/rosette/apimodel/TextEmbeddingOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java index 6d88307b7..8998e04fd 100644 --- a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java +++ b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java @@ -28,6 +28,6 @@ public class TextEmbeddingOptions extends Options { /** * @return whether embeddings should be returned for each token */ - private Boolean perTokenEmbeddings; + private Boolean perToken; } From b44289c94ac13170da110ab4903b2fc68f1211f5 Mon Sep 17 00:00:00 2001 From: Philip Blair Date: Mon, 30 Jul 2018 14:24:23 -0400 Subject: [PATCH 3/5] RD-2370: Update TVEC example --- .../com/basistech/rosette/examples/TextEmbeddingExample.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/src/main/java/com/basistech/rosette/examples/TextEmbeddingExample.java b/examples/src/main/java/com/basistech/rosette/examples/TextEmbeddingExample.java index e36688128..f00d307ef 100644 --- a/examples/src/main/java/com/basistech/rosette/examples/TextEmbeddingExample.java +++ b/examples/src/main/java/com/basistech/rosette/examples/TextEmbeddingExample.java @@ -17,6 +17,7 @@ import com.basistech.rosette.api.HttpRosetteAPI; import com.basistech.rosette.apimodel.DocumentRequest; +import com.basistech.rosette.apimodel.TextEmbeddingOptions; import com.basistech.rosette.apimodel.TextEmbeddingResponse; import java.io.IOException; @@ -44,7 +45,9 @@ private void run() throws IOException { //The api object creates an http client, but to provide your own: //api.httpClient(CloseableHttpClient) // When no options, use . - DocumentRequest request = DocumentRequest.builder().content(embeddingsData).build(); + DocumentRequest request = DocumentRequest.builder() + .content(embeddingsData) + .build(); TextEmbeddingResponse response = rosetteApi.perform(HttpRosetteAPI.TEXT_EMBEDDING_SERVICE_PATH, request, TextEmbeddingResponse.class); System.out.println(responseToJson(response)); } From 7302788e473559658eb9373525b9ee0c20750b06 Mon Sep 17 00:00:00 2001 From: Philip Blair Date: Mon, 30 Jul 2018 14:36:22 -0400 Subject: [PATCH 4/5] RD-2370: Fix copyright notice on TextEmbeddingOptions --- .../apimodel/TextEmbeddingOptions.java | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java index 8998e04fd..3f439c758 100644 --- a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java +++ b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java @@ -1,16 +1,18 @@ -/****************************************************************************** - ** This data and information is proprietary to, and a valuable trade secret - ** of, Basis Technology Corp. It is given in confidence by Basis Technology - ** and may only be used as permitted under the license agreement under which - ** it has been distributed, and in no other way. - ** - ** Copyright (c) 2018 Basis Technology Corporation All rights reserved. - ** - ** The technical data and information provided herein are provided with - ** `limited rights', and the computer software provided herein is provided - ** with `restricted rights' as those terms are defined in DAR and ASPR - ** 7-104.9(a). - ******************************************************************************/ +/* +* Copyright 2018 Basis Technology Corp. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ package com.basistech.rosette.apimodel; import com.basistech.rosette.annotations.JacksonMixin; From 94641ad766ef385e5da75420deb1476b33948056 Mon Sep 17 00:00:00 2001 From: Philip Blair Date: Mon, 30 Jul 2018 14:55:39 -0400 Subject: [PATCH 5/5] RD-2370: Clarify documentation --- .../com/basistech/rosette/apimodel/TextEmbeddingOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java index 3f439c758..035eba4fc 100644 --- a/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java +++ b/model/src/main/java/com/basistech/rosette/apimodel/TextEmbeddingOptions.java @@ -28,7 +28,7 @@ public class TextEmbeddingOptions extends Options { /** - * @return whether embeddings should be returned for each token + * @return whether embeddings should be returned for each token in addition to the whole document */ private Boolean perToken;