software-mansion · jakmro · Apr 23, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt b/android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt
@@ -9,7 +9,6 @@ import com.swmansion.rnexecutorch.utils.ETError
 import com.swmansion.rnexecutorch.utils.TensorUtils
 import org.pytorch.executorch.EValue
 import org.pytorch.executorch.Module
-import java.net.URL
 
 class ETModule(
   reactContext: ReactApplicationContext,
@@ -23,7 +22,7 @@ class ETModule(
     modelSource: String,
     promise: Promise,
   ) {
-    module = Module.load(URL(modelSource).path)
+    module = Module.load(modelSource)
     promise.resolve(0)
   }
 

diff --git a/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt b/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt
@@ -10,7 +10,6 @@ import com.swmansion.rnexecutorch.utils.llms.ConversationManager
 import com.swmansion.rnexecutorch.utils.llms.END_OF_TEXT_TOKEN
 import org.pytorch.executorch.extension.llm.LlmCallback
 import org.pytorch.executorch.extension.llm.LlmModule
-import java.net.URL
 
 class LLM(
   reactContext: ReactApplicationContext,
@@ -50,7 +49,7 @@ class LLM(
           systemPrompt,
           ArrayUtils.createMapArray<String>(messageHistory),
         )
-      llamaModule = LlmModule(URL(modelSource).path, URL(tokenizerSource).path, 0.7f)
+      llamaModule = LlmModule(modelSource, tokenizerSource, 0.7f)
       this.tempLlamaResponse.clear()
       promise.resolve("Model loaded successfully")
     } catch (e: Exception) {

diff --git a/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt b/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
@@ -34,6 +34,8 @@ class RnExecutorchPackage : TurboReactPackage() {
       ImageSegmentation(reactContext)
     } else if (name == Tokenizer.NAME) {
       Tokenizer(reactContext)
+    } else if (name == TextEmbeddings.NAME) {
+      TextEmbeddings(reactContext)
     } else {
       null
     }
@@ -139,6 +141,17 @@ class RnExecutorchPackage : TurboReactPackage() {
           false, // isCxxModule
           true,
         )
+
+      moduleInfos[TextEmbeddings.NAME] =
+        ReactModuleInfo(
+          TextEmbeddings.NAME,
+          TextEmbeddings.NAME,
+          false, // canOverrideExistingModule
+          false, // needsEagerInit
+          false, // isCxxModule
+          true,
+        )
+
       moduleInfos
     }
 }
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt b/android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt
@@ -0,0 +1,51 @@
+package com.swmansion.rnexecutorch
+
+import com.facebook.react.bridge.Promise
+import com.facebook.react.bridge.ReactApplicationContext
+import com.facebook.react.bridge.WritableNativeArray
+import com.swmansion.rnexecutorch.models.textEmbeddings.TextEmbeddingsModel
+import com.swmansion.rnexecutorch.utils.ETError
+
+class TextEmbeddings(
+  reactContext: ReactApplicationContext,
+) : NativeTextEmbeddingsSpec(reactContext) {
+  private lateinit var textEmbeddingsModel: TextEmbeddingsModel
+
+  companion object {
+    const val NAME = "TextEmbeddings"
+  }
+
+  override fun loadModule(
+    modelSource: String,
+    tokenizerSource: String,
+    promise: Promise,
+  ) {
+    try {
+      textEmbeddingsModel = TextEmbeddingsModel(reactApplicationContext)
+
+      textEmbeddingsModel.loadModel(modelSource)
+      textEmbeddingsModel.loadTokenizer(tokenizerSource)
+
+      promise.resolve(0)
+    } catch (e: Exception) {
+      promise.reject(e.message!!, ETError.InvalidModelSource.toString())
+    }
+  }
+
+  override fun forward(
+    input: String,
+    promise: Promise,
+  ) {
+    try {
+      val output = textEmbeddingsModel.runModel(input)
+      val writableArray = WritableNativeArray()
+      output.forEach { writableArray.pushDouble(it) }
+
+      promise.resolve(writableArray)
+    } catch (e: Exception) {
+      promise.reject(e.message!!, e.message)
+    }
+  }
+
+  override fun getName(): String = NAME
+}
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt b/android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt
@@ -7,7 +7,6 @@ import com.swmansion.rnexecutorch.utils.ArrayUtils.Companion.createIntArray
 import com.swmansion.rnexecutorch.utils.ArrayUtils.Companion.createReadableArrayFromIntArray
 import com.swmansion.rnexecutorch.utils.ETError
 import org.pytorch.executorch.HuggingFaceTokenizer
-import java.net.URL
 
 class Tokenizer(
   reactContext: ReactApplicationContext,
@@ -18,12 +17,12 @@ class Tokenizer(
     const val NAME = "Tokenizer"
   }
 
-  override fun load(
+  override fun loadModule(
     tokenizerSource: String,
     promise: Promise,
   ) {
     try {
-      tokenizer = HuggingFaceTokenizer(URL(tokenizerSource).path)
+      tokenizer = HuggingFaceTokenizer(tokenizerSource)
       promise.resolve(0)
     } catch (e: Exception) {
       promise.reject(e.message!!, ETError.InvalidModelSource.toString())

diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/BaseModel.kt
@@ -5,20 +5,19 @@ import com.swmansion.rnexecutorch.utils.ETError
 import org.pytorch.executorch.EValue
 import org.pytorch.executorch.Module
 import org.pytorch.executorch.Tensor
-import java.net.URL
 
 abstract class BaseModel<Input, Output>(
   val context: Context,
 ) {
   protected lateinit var module: Module
 
   fun loadModel(modelSource: String) {
-    module = Module.load(URL(modelSource).path)
+    module = Module.load(modelSource)
   }
 
-  protected fun forward(input: EValue): Array<EValue> {
+  protected fun forward(vararg inputs: EValue): Array<EValue> {
     try {
-      val result = module.forward(input)
+      val result = module.forward(*inputs)
       return result
     } catch (e: IllegalArgumentException) {
       // The error is thrown when transformation to Tensor fails

diff --git a/...oid/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt b/...oid/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsModel.kt
@@ -0,0 +1,48 @@
+package com.swmansion.rnexecutorch.models.textEmbeddings
+
+import com.facebook.react.bridge.ReactApplicationContext
+import com.swmansion.rnexecutorch.models.BaseModel
+import org.pytorch.executorch.EValue
+import org.pytorch.executorch.HuggingFaceTokenizer
+import org.pytorch.executorch.Tensor
+
+class TextEmbeddingsModel(
+  reactApplicationContext: ReactApplicationContext,
+) : BaseModel<String, DoubleArray>(reactApplicationContext) {
+  private lateinit var tokenizer: HuggingFaceTokenizer
+
+  fun loadTokenizer(tokenizerSource: String) {
+    tokenizer = HuggingFaceTokenizer(tokenizerSource)
+  }
+
+  fun preprocess(input: String): Array<LongArray> {
+    val inputIds = tokenizer.encode(input).map { it.toLong() }.toLongArray()
+    val attentionMask = inputIds.map { if (it != 0L) 1L else 0L }.toLongArray()
+    return arrayOf(inputIds, attentionMask) // Shape: [2, max_length]
+  }
+
+  fun postprocess(
+    modelOutput: FloatArray, // [max_length * embedding_dim]
+    attentionMask: LongArray, // [max_length]
+  ): DoubleArray {
+    val modelOutputDouble = modelOutput.map { it.toDouble() }.toDoubleArray()
+    val embeddings = TextEmbeddingsUtils.meanPooling(modelOutputDouble, attentionMask)
+    return TextEmbeddingsUtils.normalize(embeddings)
+  }
+
+  override fun runModel(input: String): DoubleArray {
+    val modelInput = preprocess(input)
+    val inputsIds = modelInput[0]
+    val attentionMask = modelInput[1]
+
+    val inputsIdsShape = longArrayOf(1, inputsIds.size.toLong())
+    val attentionMaskShape = longArrayOf(1, attentionMask.size.toLong())
+
+    val inputIdsEValue = EValue.from(Tensor.fromBlob(inputsIds, inputsIdsShape))
+    val attentionMaskEValue = EValue.from(Tensor.fromBlob(attentionMask, attentionMaskShape))
+
+    val modelOutput = forward(inputIdsEValue, attentionMaskEValue)[0].toTensor().dataAsFloatArray
+
+    return postprocess(modelOutput, attentionMask)
+  }
+}
diff --git a/...oid/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt b/...oid/src/main/java/com/swmansion/rnexecutorch/models/TextEmbeddings/TextEmbeddingsUtils.kt
@@ -0,0 +1,37 @@
+package com.swmansion.rnexecutorch.models.textEmbeddings
+
+import kotlin.math.sqrt
+
+class TextEmbeddingsUtils {
+  companion object {
+    fun meanPooling(
+      modelOutput: DoubleArray,
+      attentionMask: LongArray,
+    ): DoubleArray {
+      val attentionMaskLength = attentionMask.size
+      val modelOutputLength = modelOutput.size
+      val embeddingDim = modelOutputLength / attentionMaskLength
+
+      val result = DoubleArray(embeddingDim)
+      var sumMask = attentionMask.sum().toDouble()
+      sumMask = maxOf(sumMask, 1e-9)
+
+      for (i in 0 until embeddingDim) {
+        var sum = 0.0
+        for (j in 0 until attentionMaskLength) {
+          sum += modelOutput[j * embeddingDim + i] * attentionMask[j]
+        }
+        result[i] = sum / sumMask
+      }
+
+      return result
+    }
+
+    fun normalize(embeddings: DoubleArray): DoubleArray {
+      var sum = embeddings.sumOf { it * it }
+      sum = maxOf(sqrt(sum), 1e-9)
+
+      return embeddings.map { it / sum }.toDoubleArray()
+    }
+  }
+}
diff --git a/docs/docs/benchmarks/_category_.json b/docs/docs/benchmarks/_category_.json
@@ -1,6 +1,6 @@
 {
   "label": "Benchmarks",
-  "position": 8,
+  "position": 7,
   "link": {
     "type": "generated-index"
   }

diff --git a/docs/docs/benchmarks/inference-time.md b/docs/docs/benchmarks/inference-time.md
@@ -99,3 +99,9 @@ Average time for decoding one token in sequence of 100 tokens, with encoding con
 | Moonshine-tiny (10s) |            54.24             |            51.74             |           55.07            |               46.31               |           32.41           |
 | Moonshine-tiny (30s) |            76.38             |            76.19             |           87.37            |               65.61               |           45.04           |
 | Whisper-tiny (30s)   |            128.03            |            113.65            |           141.63           |               89.08               |           84.49           |
+
+## Text Embeddings
+
+| Model            | iPhone 16 Pro (XNNPACK) [ms] | iPhone 14 Pro Max (XNNPACK) [ms] | iPhone SE 3 (XNNPACK) [ms] | Samsung Galaxy S24 (XNNPACK) | OnePlus 12 (XNNPACK) [ms] |
+| ---------------- | :--------------------------: | :------------------------------: | :------------------------: | :--------------------------: | :-----------------------: |
+| ALL_MINILM_L6_V2 |             105              |               126                |            151             |             165              |            152            |
diff --git a/docs/docs/benchmarks/memory-usage.md b/docs/docs/benchmarks/memory-usage.md
@@ -54,3 +54,9 @@ sidebar_position: 2
 | -------------- | :--------------------: | :----------------: |
 | WHISPER_TINY   |          900           |        600         |
 | MOONSHINE_TINY |          650           |        560         |
+
+## Text Embeddings
+
+| Model            | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
+| ---------------- | :--------------------: | :----------------: |
+| ALL_MINILM_L6_V2 |          140           |         64         |
diff --git a/docs/docs/benchmarks/model-size.md b/docs/docs/benchmarks/model-size.md
@@ -63,3 +63,9 @@ sidebar_position: 1
 | -------------- | :----------: |
 | WHISPER_TINY   |    231.0     |
 | MOONSHINE_TINY |    148.9     |
+
+## Text Embeddings
+
+| Model            | XNNPACK [MB] |
+| ---------------- | :----------: |
+| ALL_MINILM_L6_V2 |      91      |
diff --git a/docs/docs/computer-vision/_category_.json b/docs/docs/computer-vision/_category_.json
@@ -1,6 +1,6 @@
 {
   "label": "Computer Vision",
-  "position": 4,
+  "position": 3,
   "link": {
     "type": "generated-index"
   }

diff --git a/docs/docs/hookless-api/_category_.json → .../docs/executorch-bindings/_category_.json b/docs/docs/hookless-api/_category_.json → .../docs/executorch-bindings/_category_.json
@@ -1,5 +1,5 @@
 {
-  "label": "Hookless API",
+  "label": "ExecuTorch Bindings",
   "position": 5,
   "link": {
     "type": "generated-index"

diff --git a/docs/docs/module-api/executorch-bindings.md → ...xecutorch-bindings/useExecutorchModule.md b/docs/docs/module-api/executorch-bindings.md → ...xecutorch-bindings/useExecutorchModule.md
@@ -1,5 +1,5 @@
 ---
-title: ExecuTorch Bindings
+title: useExecutorchModule
 sidebar_position: 1
 ---
 

diff --git a/docs/docs/utils/_category_.json → docs/docs/faq/_category_.json b/docs/docs/utils/_category_.json → docs/docs/faq/_category_.json
@@ -1,5 +1,5 @@
 {
-  "label": "Utils",
+  "label": "FAQ",
   "position": 7,
   "link": {
     "type": "generated-index"

diff --git a/docs/docs/faq/frequently-asked-questions.md b/docs/docs/faq/frequently-asked-questions.md
@@ -0,0 +1,38 @@
+---
+title: Frequently asked questions
+sidebar_position: 1
+---
+
+This section is meant to answer some common community inquiries, especially regarding the ExecuTorch runtime or adding your own models. If you can't see an answer to your question, feel free to open up a [discussion](https://github.com/software-mansion/react-native-executorch/discussions/new/choose).
+
+### What models are supported?
+
+Each hook documentation subpage (useClassification, useLLM, etc.) contains a supported models section, which lists the models that are runnable within the library with close to no setup. For running your custom models, refer to `ExecuTorchModule` or `useExecuTorchModule`.
+
+### How can I run my own AI model?
+
+To run your own model, you need to directly access the underlying [ExecuTorch Module API](https://pytorch.org/executorch/stable/extension-module.html). We provide an experimental [React hook](../executorch-bindings/useExecutorchModule.md) along with a [TypeScript alternative](../typescript-api/ExecutorchModule.md), which serve as a way to use the aforementioned API without the need of diving into native code. In order to get a model in a format runnable by the runtime, you'll need to get your hands dirty with some ExecuTorch knowledge. For more guides on exporting models, please refer to the [ExecuTorch tutorials](https://pytorch.org/executorch/stable/tutorials/export-to-executorch-tutorial.html). Once you obtain your model in a `.pte` format, you can run it with `useExecuTorchModule` and `ExecuTorchModule`.
+
+### Can you do function calling with useLLM?
+
+We currently don't provide an out-of-the-box solution for function calling, but modifying system prompts for Llama models should be enough for simple use cases. For more details, refer to [this comment](https://github.com/software-mansion/react-native-executorch/issues/173#issuecomment-2775082278)
+
+### Can I use React Native ExecuTorch in bare React Native apps?
+
+To use the library, you need to install Expo Modules first. For a setup guide, refer to [this tutorial](https://docs.expo.dev/bare/installing-expo-modules/). This is because we use Expo File System under the hood to download and manage the model binaries.
+
+### Do you support the old architecture?
+
+The old architecture is not supported and we're currently not planning to add support.
+
+### Can I run GGUF models using the library?
+
+No, as of now ExecuTorch runtime doesn't provide a reliable way to use GGUF models, hence it is not possible.
+
+### Are the models leveraging GPU acceleration?
+
+While it is possible to run some models using Core ML on iOS, which is a backend that utilizes CPU, GPU and ANE, we currently don't have many models exported to Core ML. For Android, the current state of GPU acceleration is pretty limited. As of now, there are attempts of running the models using a Vulkan backend. However the operator support is very limited meaning that the resulting performance is often inferior to XNNPACK. Hence, most of the models use XNNPACK, which is a highly optimized and mature CPU backend that runs on both Android and iOS.
+
+### Does this library support XNNPACK and Core ML?
+
+Yes, all of the backends are linked, therefore the only thing that needs to be done on your end is to export the model with the backend that you're interested in using.
diff --git a/docs/docs/fundamentals/loading-models.md b/docs/docs/fundamentals/loading-models.md
@@ -1,6 +1,6 @@
 ---
 title: Loading models
-sidebar_position: 1
+sidebar_position: 2
 ---
 
 There are three different methods available for loading model files, depending on their size and location.