Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion crates/spin-python-engine/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,13 +545,65 @@ fn llm_infer(
.map(LLMInferencingResult::from)
}

#[pyo3::pyfunction]
fn generate_embeddings(model: &str, text: Vec<String>) -> Result<LLMEmbeddingsResult, Anyhow> {
let model = match model {
"all-minilm-l6-v2" => llm::EmbeddingModel::AllMiniLmL6V2,
_ => llm::EmbeddingModel::Other(model),
};

let text = text.iter().map(|s| s.as_str()).collect::<Vec<_>>();

llm::generate_embeddings(model, &text)
.map_err(Anyhow::from)
.map(LLMEmbeddingsResult::from)
}

#[derive(Clone)]
#[pyo3::pyclass]
#[pyo3(name = "LLMEmbeddingsUsage")]
struct LLMEmbeddingsUsage {
#[pyo3(get)]
prompt_token_count: u32,
}

impl From<llm::EmbeddingsUsage> for LLMEmbeddingsUsage {
fn from(result: llm::EmbeddingsUsage) -> Self {
LLMEmbeddingsUsage {
prompt_token_count: result.prompt_token_count,
}
}
}

#[derive(Clone)]
#[pyo3::pyclass]
#[pyo3(name = "LLMEmbeddingResult")]
struct LLMEmbeddingsResult {
#[pyo3(get)]
embeddings: Vec<Vec<f32>>,
#[pyo3(get)]
usage: LLMEmbeddingsUsage,
}

impl From<llm::EmbeddingsResult> for LLMEmbeddingsResult {
fn from(result: llm::EmbeddingsResult) -> Self {
LLMEmbeddingsResult {
embeddings: result.embeddings,
usage: LLMEmbeddingsUsage::from(result.usage),
}
}
}

#[pyo3::pymodule]
#[pyo3(name = "spin_llm")]
fn spin_llm_module(_py: Python<'_>, module: &PyModule) -> PyResult<()> {
module.add_function(pyo3::wrap_pyfunction!(llm_infer, module)?)?;
module.add_function(pyo3::wrap_pyfunction!(generate_embeddings, module)?)?;
module.add_class::<LLMInferencingUsage>()?;
module.add_class::<LLMInferencingParams>()?;
module.add_class::<LLMInferencingResult>()
module.add_class::<LLMInferencingResult>()?;
module.add_class::<LLMEmbeddingsUsage>()?;
module.add_class::<LLMEmbeddingsResult>()
}

pub fn run_ctors() {
Expand Down
12 changes: 10 additions & 2 deletions examples/llm/app.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import json
from spin_http import Response
from spin_llm import llm_infer
from spin_llm import llm_infer, generate_embeddings


def handle_request(request):
prompt="You are a stand up comedy writer. Tell me a joke."
result=llm_infer("llama2-chat", prompt)

embeddings = generate_embeddings("all-minilm-l6-v2", ["hat", "cat", "bat"])

body = (f"joke: {result.text}\n\n"
f"embeddings: {json.dumps(embeddings.embeddings)}\n"
f"prompt token count: {embeddings.usage.prompt_token_count}")

return Response(200,
{"content-type": "text/plain"},
bytes(result.text, "utf-8"))
bytes(body, "utf-8"))
2 changes: 1 addition & 1 deletion examples/llm/spin.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ version = "0.1.0"
[[component]]
id = "python-sdk-example"
source = "app.wasm"
ai_models = ["llama2-chat"]
ai_models = ["llama2-chat", "all-minilm-l6-v2"]
[component.trigger]
route = "/..."
[component.build]
Expand Down