Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions android/src/main/java/com/swmansion/rnexecutorch/ETModule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import com.swmansion.rnexecutorch.utils.ETError
import com.swmansion.rnexecutorch.utils.TensorUtils
import org.pytorch.executorch.EValue
import org.pytorch.executorch.Module
import java.net.URL

class ETModule(
reactContext: ReactApplicationContext,
Expand All @@ -23,7 +22,7 @@ class ETModule(
modelSource: String,
promise: Promise,
) {
module = Module.load(URL(modelSource).path)
module = Module.load(modelSource)
promise.resolve(0)
}

Expand Down
3 changes: 1 addition & 2 deletions android/src/main/java/com/swmansion/rnexecutorch/LLM.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import com.swmansion.rnexecutorch.utils.llms.ConversationManager
import com.swmansion.rnexecutorch.utils.llms.END_OF_TEXT_TOKEN
import org.pytorch.executorch.extension.llm.LlmCallback
import org.pytorch.executorch.extension.llm.LlmModule
import java.net.URL

class LLM(
reactContext: ReactApplicationContext,
Expand Down Expand Up @@ -50,7 +49,7 @@ class LLM(
systemPrompt,
ArrayUtils.createMapArray<String>(messageHistory),
)
llamaModule = LlmModule(URL(modelSource).path, URL(tokenizerSource).path, 0.7f)
llamaModule = LlmModule(modelSource, tokenizerSource, 0.7f)
this.tempLlamaResponse.clear()
promise.resolve("Model loaded successfully")
} catch (e: Exception) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class RnExecutorchPackage : TurboReactPackage() {
ImageSegmentation(reactContext)
} else if (name == Tokenizer.NAME) {
Tokenizer(reactContext)
} else if (name == TextEmbeddings.NAME) {
TextEmbeddings(reactContext)
} else {
null
}
Expand Down Expand Up @@ -139,6 +141,17 @@ class RnExecutorchPackage : TurboReactPackage() {
false, // isCxxModule
true,
)

moduleInfos[TextEmbeddings.NAME] =
ReactModuleInfo(
TextEmbeddings.NAME,
TextEmbeddings.NAME,
false, // canOverrideExistingModule
false, // needsEagerInit
false, // isCxxModule
true,
)

moduleInfos
}
}
51 changes: 51 additions & 0 deletions android/src/main/java/com/swmansion/rnexecutorch/TextEmbeddings.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package com.swmansion.rnexecutorch

import com.facebook.react.bridge.Promise
import com.facebook.react.bridge.ReactApplicationContext
import com.facebook.react.bridge.WritableNativeArray
import com.swmansion.rnexecutorch.models.textEmbeddings.TextEmbeddingsModel
import com.swmansion.rnexecutorch.utils.ETError

class TextEmbeddings(
reactContext: ReactApplicationContext,
) : NativeTextEmbeddingsSpec(reactContext) {
private lateinit var textEmbeddingsModel: TextEmbeddingsModel

companion object {
const val NAME = "TextEmbeddings"
}

override fun loadModule(
modelSource: String,
tokenizerSource: String,
promise: Promise,
) {
try {
textEmbeddingsModel = TextEmbeddingsModel(reactApplicationContext)

textEmbeddingsModel.loadModel(modelSource)
textEmbeddingsModel.loadTokenizer(tokenizerSource)

promise.resolve(0)
} catch (e: Exception) {
promise.reject(e.message!!, ETError.InvalidModelSource.toString())
}
}

override fun forward(
input: String,
promise: Promise,
) {
try {
val output = textEmbeddingsModel.runModel(input)
val writableArray = WritableNativeArray()
output.forEach { writableArray.pushDouble(it) }

promise.resolve(writableArray)
} catch (e: Exception) {
promise.reject(e.message!!, e.message)
}
}

override fun getName(): String = NAME
}
5 changes: 2 additions & 3 deletions android/src/main/java/com/swmansion/rnexecutorch/Tokenizer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import com.swmansion.rnexecutorch.utils.ArrayUtils.Companion.createIntArray
import com.swmansion.rnexecutorch.utils.ArrayUtils.Companion.createReadableArrayFromIntArray
import com.swmansion.rnexecutorch.utils.ETError
import org.pytorch.executorch.HuggingFaceTokenizer
import java.net.URL

class Tokenizer(
reactContext: ReactApplicationContext,
Expand All @@ -18,12 +17,12 @@ class Tokenizer(
const val NAME = "Tokenizer"
}

override fun load(
override fun loadModule(
tokenizerSource: String,
promise: Promise,
) {
try {
tokenizer = HuggingFaceTokenizer(URL(tokenizerSource).path)
tokenizer = HuggingFaceTokenizer(tokenizerSource)
promise.resolve(0)
} catch (e: Exception) {
promise.reject(e.message!!, ETError.InvalidModelSource.toString())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,19 @@ import com.swmansion.rnexecutorch.utils.ETError
import org.pytorch.executorch.EValue
import org.pytorch.executorch.Module
import org.pytorch.executorch.Tensor
import java.net.URL

abstract class BaseModel<Input, Output>(
val context: Context,
) {
protected lateinit var module: Module

fun loadModel(modelSource: String) {
module = Module.load(URL(modelSource).path)
module = Module.load(modelSource)
}

protected fun forward(input: EValue): Array<EValue> {
protected fun forward(vararg inputs: EValue): Array<EValue> {
try {
val result = module.forward(input)
val result = module.forward(*inputs)
return result
} catch (e: IllegalArgumentException) {
// The error is thrown when transformation to Tensor fails
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package com.swmansion.rnexecutorch.models.textEmbeddings

import com.facebook.react.bridge.ReactApplicationContext
import com.swmansion.rnexecutorch.models.BaseModel
import org.pytorch.executorch.EValue
import org.pytorch.executorch.HuggingFaceTokenizer
import org.pytorch.executorch.Tensor

class TextEmbeddingsModel(
reactApplicationContext: ReactApplicationContext,
) : BaseModel<String, DoubleArray>(reactApplicationContext) {
private lateinit var tokenizer: HuggingFaceTokenizer

fun loadTokenizer(tokenizerSource: String) {
tokenizer = HuggingFaceTokenizer(tokenizerSource)
}

fun preprocess(input: String): Array<LongArray> {
val inputIds = tokenizer.encode(input).map { it.toLong() }.toLongArray()
val attentionMask = inputIds.map { if (it != 0L) 1L else 0L }.toLongArray()
return arrayOf(inputIds, attentionMask) // Shape: [2, max_length]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is max_length specified? I think mentioning it here would be nice

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

max_length is specified inside tokenizer.json

}

fun postprocess(
modelOutput: FloatArray, // [max_length * embedding_dim]
attentionMask: LongArray, // [max_length]
): DoubleArray {
val modelOutputDouble = modelOutput.map { it.toDouble() }.toDoubleArray()
val embeddings = TextEmbeddingsUtils.meanPooling(modelOutputDouble, attentionMask)
return TextEmbeddingsUtils.normalize(embeddings)
}

override fun runModel(input: String): DoubleArray {
val modelInput = preprocess(input)
val inputsIds = modelInput[0]
val attentionMask = modelInput[1]

val inputsIdsShape = longArrayOf(1, inputsIds.size.toLong())
val attentionMaskShape = longArrayOf(1, attentionMask.size.toLong())

val inputIdsEValue = EValue.from(Tensor.fromBlob(inputsIds, inputsIdsShape))
val attentionMaskEValue = EValue.from(Tensor.fromBlob(attentionMask, attentionMaskShape))

val modelOutput = forward(inputIdsEValue, attentionMaskEValue)[0].toTensor().dataAsFloatArray

return postprocess(modelOutput, attentionMask)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.swmansion.rnexecutorch.models.textEmbeddings

import kotlin.math.sqrt

class TextEmbeddingsUtils {
companion object {
fun meanPooling(
modelOutput: DoubleArray,
attentionMask: LongArray,
): DoubleArray {
val attentionMaskLength = attentionMask.size
val modelOutputLength = modelOutput.size
val embeddingDim = modelOutputLength / attentionMaskLength

val result = DoubleArray(embeddingDim)
var sumMask = attentionMask.sum().toDouble()
sumMask = maxOf(sumMask, 1e-9)

for (i in 0 until embeddingDim) {
var sum = 0.0
for (j in 0 until attentionMaskLength) {
sum += modelOutput[j * embeddingDim + i] * attentionMask[j]
}
result[i] = sum / sumMask
}

return result
}

fun normalize(embeddings: DoubleArray): DoubleArray {
var sum = embeddings.sumOf { it * it }
sum = maxOf(sqrt(sum), 1e-9)

return embeddings.map { it / sum }.toDoubleArray()
}
}
}
2 changes: 1 addition & 1 deletion docs/docs/benchmarks/_category_.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"label": "Benchmarks",
"position": 8,
"position": 7,
"link": {
"type": "generated-index"
}
Expand Down
6 changes: 6 additions & 0 deletions docs/docs/benchmarks/inference-time.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,9 @@ Average time for decoding one token in sequence of 100 tokens, with encoding con
| Moonshine-tiny (10s) | 54.24 | 51.74 | 55.07 | 46.31 | 32.41 |
| Moonshine-tiny (30s) | 76.38 | 76.19 | 87.37 | 65.61 | 45.04 |
| Whisper-tiny (30s) | 128.03 | 113.65 | 141.63 | 89.08 | 84.49 |

## Text Embeddings

| Model | iPhone 16 Pro (XNNPACK) [ms] | iPhone 14 Pro Max (XNNPACK) [ms] | iPhone SE 3 (XNNPACK) [ms] | Samsung Galaxy S24 (XNNPACK) | OnePlus 12 (XNNPACK) [ms] |
| ---------------- | :--------------------------: | :------------------------------: | :------------------------: | :--------------------------: | :-----------------------: |
| ALL_MINILM_L6_V2 | 105 | 126 | 151 | 165 | 152 |
6 changes: 6 additions & 0 deletions docs/docs/benchmarks/memory-usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ sidebar_position: 2
| -------------- | :--------------------: | :----------------: |
| WHISPER_TINY | 900 | 600 |
| MOONSHINE_TINY | 650 | 560 |

## Text Embeddings

| Model | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
| ---------------- | :--------------------: | :----------------: |
| ALL_MINILM_L6_V2 | 140 | 64 |
6 changes: 6 additions & 0 deletions docs/docs/benchmarks/model-size.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@ sidebar_position: 1
| -------------- | :----------: |
| WHISPER_TINY | 231.0 |
| MOONSHINE_TINY | 148.9 |

## Text Embeddings

| Model | XNNPACK [MB] |
| ---------------- | :----------: |
| ALL_MINILM_L6_V2 | 91 |
2 changes: 1 addition & 1 deletion docs/docs/computer-vision/_category_.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"label": "Computer Vision",
"position": 4,
"position": 3,
"link": {
"type": "generated-index"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"label": "Hookless API",
"label": "ExecuTorch Bindings",
"position": 5,
"link": {
"type": "generated-index"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: ExecuTorch Bindings
title: useExecutorchModule
sidebar_position: 1
---

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"label": "Utils",
"label": "FAQ",
"position": 7,
"link": {
"type": "generated-index"
Expand Down
38 changes: 38 additions & 0 deletions docs/docs/faq/frequently-asked-questions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
title: Frequently asked questions
sidebar_position: 1
---

This section is meant to answer some common community inquiries, especially regarding the ExecuTorch runtime or adding your own models. If you can't see an answer to your question, feel free to open up a [discussion](https://github.com/software-mansion/react-native-executorch/discussions/new/choose).

### What models are supported?

Each hook documentation subpage (useClassification, useLLM, etc.) contains a supported models section, which lists the models that are runnable within the library with close to no setup. For running your custom models, refer to `ExecuTorchModule` or `useExecuTorchModule`.

### How can I run my own AI model?

To run your own model, you need to directly access the underlying [ExecuTorch Module API](https://pytorch.org/executorch/stable/extension-module.html). We provide an experimental [React hook](../executorch-bindings/useExecutorchModule.md) along with a [TypeScript alternative](../typescript-api/ExecutorchModule.md), which serve as a way to use the aforementioned API without the need of diving into native code. In order to get a model in a format runnable by the runtime, you'll need to get your hands dirty with some ExecuTorch knowledge. For more guides on exporting models, please refer to the [ExecuTorch tutorials](https://pytorch.org/executorch/stable/tutorials/export-to-executorch-tutorial.html). Once you obtain your model in a `.pte` format, you can run it with `useExecuTorchModule` and `ExecuTorchModule`.

### Can you do function calling with useLLM?

We currently don't provide an out-of-the-box solution for function calling, but modifying system prompts for Llama models should be enough for simple use cases. For more details, refer to [this comment](https://github.com/software-mansion/react-native-executorch/issues/173#issuecomment-2775082278)

### Can I use React Native ExecuTorch in bare React Native apps?

To use the library, you need to install Expo Modules first. For a setup guide, refer to [this tutorial](https://docs.expo.dev/bare/installing-expo-modules/). This is because we use Expo File System under the hood to download and manage the model binaries.

### Do you support the old architecture?

The old architecture is not supported and we're currently not planning to add support.

### Can I run GGUF models using the library?

No, as of now ExecuTorch runtime doesn't provide a reliable way to use GGUF models, hence it is not possible.

### Are the models leveraging GPU acceleration?

While it is possible to run some models using Core ML on iOS, which is a backend that utilizes CPU, GPU and ANE, we currently don't have many models exported to Core ML. For Android, the current state of GPU acceleration is pretty limited. As of now, there are attempts of running the models using a Vulkan backend. However the operator support is very limited meaning that the resulting performance is often inferior to XNNPACK. Hence, most of the models use XNNPACK, which is a highly optimized and mature CPU backend that runs on both Android and iOS.

### Does this library support XNNPACK and Core ML?

Yes, all of the backends are linked, therefore the only thing that needs to be done on your end is to export the model with the backend that you're interested in using.
2 changes: 1 addition & 1 deletion docs/docs/fundamentals/loading-models.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Loading models
sidebar_position: 1
sidebar_position: 2
---

There are three different methods available for loading model files, depending on their size and location.
Expand Down
Loading