luxfi · zeekay · Apr 22, 2026 · Apr 21, 2026
diff --git a/pkg/miner/backend/README.md b/pkg/miner/backend/README.md
@@ -0,0 +1,83 @@
+# pkg/miner/backend
+
+Pluggable inference-engine seam for the Lux AI miner.
+
+The miner used to inline three `// TODO: Integrate with actual ...` stubs for
+chat, inference, and embedding. Those stubs are now hidden behind a small
+interface so operators can point the miner at whichever engine they run
+without touching the mining binary.
+
+```go
+type InferenceBackend interface {
+    Name() string
+    Capabilities() Capabilities
+    Chat(ctx context.Context, req ChatRequest)          (ChatResponse, error)
+    Inference(ctx context.Context, req InferenceRequest) (InferenceResponse, error)
+    Embed(ctx context.Context, req EmbedRequest)         (EmbedResponse, error)
+}
+```
+
+## Backends shipped in-tree
+
+| Package                          | Name     | Use case                                              |
+|----------------------------------|----------|-------------------------------------------------------|
+| `pkg/miner/backend/noop`         | `noop`   | Deterministic mock. Default. Zero config, zero deps.  |
+| `pkg/miner/backend/openai`       | `openai` | OpenAI-compatible HTTP adapter (stdlib `net/http`).   |
+
+`noop` preserves the pre-refactor placeholder output (`"Response to: <prompt>"`,
+`"I'm an AI assistant running on the Lux network."`, 384-dim zero-vector
+embeddings) so existing tests and downstream consumers see no behaviour change.
+
+`openai` works against any server that speaks the OpenAI HTTP dialect — which
+happens to cover all the local engines operators actually run:
+
+| Engine    | `OPENAI_API_BASE` target     | Notes                         |
+|-----------|------------------------------|-------------------------------|
+| llama.cpp | `http://localhost:8080/v1`   | `./server --port 8080`        |
+| vllm      | `http://localhost:8000/v1`   | `vllm serve <model>`          |
+| ollama    | `http://localhost:11434/v1`  | Native OpenAI compat endpoint |
+| LocalAI   | `http://localhost:8080/v1`   | Drop-in OpenAI replacement    |
+| OpenAI    | `https://api.openai.com/v1`  | The real thing                |
+
+One adapter, five engines. No new Go deps.
+
+## Wiring
+
+Via `Config`:
+
+```go
+cfg := miner.DefaultConfig()
+cfg.Backend = "openai"
+cfg.OpenAIBase = "http://localhost:11434/v1"  // ollama
+cfg.OpenAIModel = "llama3.1"
+m := miner.New(cfg)
+```
+
+Or via `WithBackend` for fully custom plumbing (e.g. a direct MLX/CUDA binding
+from your own `main`):
+
+```go
+m := miner.New(cfg).WithBackend(myBackend)
+```
+
+Unknown `Backend` values fall back to `noop` instead of failing — operator
+typos show up in logs as `name=noop` without crash-looping the miner.
+
+## Writing a new backend
+
+Implement `InferenceBackend` in your own module and pass it via
+`WithBackend`. Contract:
+
+- All methods must be safe for concurrent use.
+- `Capabilities()` should be cheap and pure.
+- `Name()` should be a short, stable identifier (`"noop"`, `"openai"`, etc.).
+- Return errors rather than panicking on upstream failures; the miner marks
+  tasks failed and bumps `Stats.TasksFailed`.
+
+## Why OpenAI-compatible instead of direct bindings
+
+llama.cpp bindings pull in ~20 MB of C source. vllm is Python-only. MLX
+bindings require CGo. The OpenAI HTTP contract lets one small Go adapter
+cover every engine an operator would reasonably run, with zero new
+dependencies in `go.mod`. A direct-binding backend can still be added in
+a future PR for latency-sensitive deployments — the interface supports it.
diff --git a/pkg/miner/backend/backend.go b/pkg/miner/backend/backend.go
@@ -0,0 +1,95 @@
+// Copyright (C) 2019-2025, Lux Industries Inc. All rights reserved.
+// See the file LICENSE for licensing terms.
+
+// Package backend defines a pluggable inference-engine interface used by the
+// miner. Backends translate miner task inputs into model outputs; this package
+// deliberately imports nothing outside the stdlib so adapters (llama.cpp,
+// vllm, ollama, remote OpenAI-compatible endpoints, etc.) can be dropped in
+// without bloating the miner binary.
+//
+// The interface is intentionally minimal — it mirrors what the miner actually
+// does in runInference, runChat, and runEmbedding.
+package backend
+
+import "context"
+
+// Message is a single chat turn. Shape matches OpenAI chat messages and the
+// miner's internal message type.
+type Message struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+// ChatRequest is a multi-turn chat prompt.
+type ChatRequest struct {
+	Model     string    `json:"model"`
+	Messages  []Message `json:"messages"`
+	MaxTokens int       `json:"max_tokens,omitempty"`
+}
+
+// ChatResponse is the assistant's reply.
+type ChatResponse struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+	Model   string `json:"model"`
+	Tokens  int    `json:"tokens,omitempty"`
+}
+
+// InferenceRequest is a single-prompt completion request.
+type InferenceRequest struct {
+	Model     string `json:"model"`
+	Prompt    string `json:"prompt"`
+	MaxTokens int    `json:"max_tokens,omitempty"`
+}
+
+// InferenceResponse is the completion output.
+type InferenceResponse struct {
+	Text   string `json:"text"`
+	Tokens int    `json:"tokens"`
+	Model  string `json:"model"`
+}
+
+// EmbedRequest asks for a vector embedding of a piece of text.
+type EmbedRequest struct {
+	Model string `json:"model"`
+	Text  string `json:"text"`
+}
+
+// EmbedResponse carries the embedding vector.
+type EmbedResponse struct {
+	Embedding []float64 `json:"embedding"`
+	Model     string    `json:"model"`
+}
+
+// Capabilities reports what a backend can do. Consumers use this to pick a
+// backend or to skip tasks a backend cannot serve.
+type Capabilities struct {
+	Chat      bool `json:"chat"`
+	Inference bool `json:"inference"`
+	Embedding bool `json:"embedding"`
+	// EmbeddingDims, when non-zero, declares a fixed output dimensionality for
+	// embeddings; 0 means the backend decides per-request.
+	EmbeddingDims int `json:"embedding_dims,omitempty"`
+}
+
+// InferenceBackend is the pluggable compute layer for the miner.
+//
+// Implementations must be safe for concurrent use — the miner's task worker
+// pool may invoke any method from multiple goroutines.
+type InferenceBackend interface {
+	// Name returns a short identifier ("noop", "openai", ...). Used in logs
+	// and config matching.
+	Name() string
+
+	// Capabilities reports what this backend supports.
+	Capabilities() Capabilities
+
+	// Chat runs a multi-turn chat completion.
+	Chat(ctx context.Context, req ChatRequest) (ChatResponse, error)
+
+	// Inference runs a single-prompt completion.
+	Inference(ctx context.Context, req InferenceRequest) (InferenceResponse, error)
+
+	// Embed produces an embedding vector for the given text.
+	Embed(ctx context.Context, req EmbedRequest) (EmbedResponse, error)
+}
diff --git a/pkg/miner/backend/backend_test.go b/pkg/miner/backend/backend_test.go
@@ -0,0 +1,72 @@
+// Copyright (C) 2019-2025, Lux Industries Inc. All rights reserved.
+// See the file LICENSE for licensing terms.
+
+package backend_test
+
+import (
+	"context"
+	"testing"
+
+	"github.com/luxfi/ai/pkg/miner/backend"
+	"github.com/luxfi/ai/pkg/miner/backend/noop"
+)
+
+// TestInterfaceContract ensures the canonical in-tree backends implement
+// backend.InferenceBackend. If this test stops compiling, a backend has
+// drifted from the interface.
+func TestInterfaceContract(t *testing.T) {
+	var _ backend.InferenceBackend = noop.New()
+}
+
+// TestCapabilitiesShape exercises the Capabilities reporting path end-to-end
+// for a concrete backend. The point is to lock in the JSON-visible shape;
+// any rename breaks the wire contract for API consumers.
+func TestCapabilitiesShape(t *testing.T) {
+	caps := noop.New().Capabilities()
+	if !caps.Chat || !caps.Inference || !caps.Embedding {
+		t.Fatalf("noop backend should advertise all three capabilities: %+v", caps)
+	}
+	if caps.EmbeddingDims == 0 {
+		t.Fatal("noop backend should advertise a fixed embedding dimensionality")
+	}
+}
+
+// TestRequestResponseRoundTrip sanity-checks that every request/response pair
+// travels through the interface with no surprise mutations.
+func TestRequestResponseRoundTrip(t *testing.T) {
+	ctx := context.Background()
+	b := noop.New()
+
+	chat, err := b.Chat(ctx, backend.ChatRequest{
+		Model:    "test-model",
+		Messages: []backend.Message{{Role: "user", Content: "hi"}},
+	})
+	if err != nil {
+		t.Fatalf("Chat: %v", err)
+	}
+	if chat.Model != "test-model" {
+		t.Errorf("Chat: model not preserved, got %q", chat.Model)
+	}
+
+	inf, err := b.Inference(ctx, backend.InferenceRequest{
+		Model:  "inf-model",
+		Prompt: "hello",
+	})
+	if err != nil {
+		t.Fatalf("Inference: %v", err)
+	}
+	if inf.Model != "inf-model" {
+		t.Errorf("Inference: model not preserved, got %q", inf.Model)
+	}
+
+	emb, err := b.Embed(ctx, backend.EmbedRequest{
+		Model: "emb-model",
+		Text:  "some text",
+	})
+	if err != nil {
+		t.Fatalf("Embed: %v", err)
+	}
+	if emb.Model != "emb-model" {
+		t.Errorf("Embed: model not preserved, got %q", emb.Model)
+	}
+}
diff --git a/pkg/miner/backend/noop/noop.go b/pkg/miner/backend/noop/noop.go
@@ -0,0 +1,86 @@
+// Copyright (C) 2019-2025, Lux Industries Inc. All rights reserved.
+// See the file LICENSE for licensing terms.
+
+// Package noop provides a deterministic in-process InferenceBackend. It
+// preserves the placeholder behaviour that was inlined into
+// pkg/miner/miner.go before the backend interface landed, so existing tests
+// and downstream consumers that rely on "some response comes back" keep
+// working with zero configuration.
+//
+// This backend performs no real inference. Use it for local dev, CI, and as
+// the safe default when an operator hasn't configured a real engine.
+package noop
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/luxfi/ai/pkg/miner/backend"
+)
+
+// DefaultEmbeddingDims matches the placeholder dim used by the pre-refactor
+// embedding stub in pkg/miner/miner.go (len(embedding) == 384).
+const DefaultEmbeddingDims = 384
+
+// Backend is a deterministic mock implementing backend.InferenceBackend.
+type Backend struct {
+	// EmbeddingDims controls the length of the zero-vector returned by
+	// Embed. Defaults to DefaultEmbeddingDims when zero.
+	EmbeddingDims int
+}
+
+// New returns a new noop backend with default embedding dimensionality.
+func New() *Backend {
+	return &Backend{EmbeddingDims: DefaultEmbeddingDims}
+}
+
+// Name implements backend.InferenceBackend.
+func (*Backend) Name() string { return "noop" }
+
+// Capabilities implements backend.InferenceBackend.
+func (b *Backend) Capabilities() backend.Capabilities {
+	dims := b.EmbeddingDims
+	if dims == 0 {
+		dims = DefaultEmbeddingDims
+	}
+	return backend.Capabilities{
+		Chat:          true,
+		Inference:     true,
+		Embedding:     true,
+		EmbeddingDims: dims,
+	}
+}
+
+// Chat returns a fixed assistant message. Identical to the string returned by
+// the previous inline stub in miner.runChat.
+func (b *Backend) Chat(_ context.Context, req backend.ChatRequest) (backend.ChatResponse, error) {
+	return backend.ChatResponse{
+		Role:    "assistant",
+		Content: "I'm an AI assistant running on the Lux network.",
+		Model:   req.Model,
+	}, nil
+}
+
+// Inference echoes the prompt, matching the previous inline stub in
+// miner.runInference.
+func (b *Backend) Inference(_ context.Context, req backend.InferenceRequest) (backend.InferenceResponse, error) {
+	return backend.InferenceResponse{
+		Text:   fmt.Sprintf("Response to: %s", req.Prompt),
+		Tokens: 10,
+		Model:  req.Model,
+	}, nil
+}
+
+// Embed returns a zero vector of EmbeddingDims length — byte-for-byte identical
+// to the pre-refactor miner.runEmbedding placeholder.
+func (b *Backend) Embed(_ context.Context, req backend.EmbedRequest) (backend.EmbedResponse, error) {
+	dims := b.EmbeddingDims
+	if dims == 0 {
+		dims = DefaultEmbeddingDims
+	}
+	vec := make([]float64, dims)
+	return backend.EmbedResponse{
+		Embedding: vec,
+		Model:     req.Model,
+	}, nil
+}