/
InProcessEmbeddingModelExamples.java
76 lines (56 loc) · 4.07 KB
/
InProcessEmbeddingModelExamples.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package embedding.model;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.OnnxEmbeddingModel;
import dev.langchain4j.model.embedding.PoolingMode;
import dev.langchain4j.store.embedding.CosineSimilarity;
import java.io.IOException;
public class InProcessEmbeddingModelExamples {
static class Pre_Packaged_In_Process_Embedding_Model_Example {
public static void main(String[] args) throws IOException {
String text = "Let's demonstrate that embedding can be done within a Java process and entirely offline.";
// requires "langchain4j-embeddings-all-minilm-l6-v2" Maven/Gradle dependency, see pom.xml
EmbeddingModel embeddingModel = new AllMiniLmL6V2EmbeddingModel();
Embedding inProcessEmbedding = embeddingModel.embed(text).content();
System.out.println(inProcessEmbedding);
// Uncomment to compare with embedding generated by HuggingFace
// EmbeddingModel huggingFaceEmbeddingModel = HuggingFaceEmbeddingModel.builder()
// .accessToken(System.getenv("HF_API_KEY"))
// .modelId("sentence-transformers/all-MiniLM-L6-v2")
// .build();
//Embedding huggingFaceEmbedding = huggingFaceEmbeddingModel.embed(text).content();
//System.out.println(CosineSimilarity.between(inProcessEmbedding, huggingFaceEmbedding));
// 1.000000001963221 <- this indicates that the embedding created by the offline in-process all-MiniLM-L6-v2 model
// is practically identical to that generated using the HuggingFace API.
}
}
static class Custom_In_Process_Embedding_Model_Example {
public static void main(String[] args) throws IOException {
// You can use many of the embedding models from Hugging Face.
// https://huggingface.co/Xenova repository has a lot of popular models converted into ONNX format.
// For example, https://huggingface.co/Xenova/multilingual-e5-large
// Go to "Files and versions": https://huggingface.co/Xenova/multilingual-e5-large/tree/main
// Download "tokenizer.json": https://huggingface.co/Xenova/multilingual-e5-large/resolve/main/tokenizer.json?download=true
// Go to "onnx" directory: https://huggingface.co/Xenova/multilingual-e5-large/tree/main/onnx
// Download "model_quantized.onnx": https://huggingface.co/Xenova/multilingual-e5-large/resolve/main/onnx/model_quantized.onnx?download=true
// Go to the original model repo: https://huggingface.co/intfloat/multilingual-e5-large
// Go to "Files and versions": https://huggingface.co/intfloat/multilingual-e5-large/tree/main
// Go to "1_Pooling": https://huggingface.co/intfloat/multilingual-e5-large/tree/main/1_Pooling
// Go to "config.json": https://huggingface.co/intfloat/multilingual-e5-large/blob/main/1_Pooling/config.json
// Note "pooling_mode_mean_tokens": true, this means that we need to use PoolingMode.MEAN
// You can also convert any other model into ONNX format by following this guide: https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model
// requires "langchain4j-embeddings" Maven/Gradle dependency, see pom.xml
EmbeddingModel embeddingModel = new OnnxEmbeddingModel(
"/home/langchain4j/model_quantized.onnx",
"/home/langchain4j/tokenizer.json",
PoolingMode.MEAN
);
String englishText = "Hello, how are you doing?";
String frenchText = "Bonjour comment allez-vous?";
Embedding englishTextEmbedding = embeddingModel.embed(englishText).content();
Embedding frenchTextEmbedding = embeddingModel.embed(frenchText).content();
System.out.println(CosineSimilarity.between(englishTextEmbedding, frenchTextEmbedding)); // 0.9060777281158113
}
}
}