From 5b916c5c3d9093e22d723ce1e1ca32cc4b841fb7 Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Wed, 30 Jul 2025 08:50:43 +0200 Subject: [PATCH] LCORE-248: Getting started guide, 3rd version --- docs/getting_started.md | 465 +++++++++++++++++++++++++++++++++------- 1 file changed, 384 insertions(+), 81 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index e4d8b8f0..dd6c919b 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -8,15 +8,25 @@ * [Llama Stack as a library](#llama-stack-as-a-library) * [Llama Stack as a server](#llama-stack-as-a-server) * [Local deployment](#local-deployment) - * [Llama Stack used as a library](#llama-stack-used-as-a-library) * [Llama Stack used as a separate process](#llama-stack-used-as-a-separate-process) + * [Prerequisities](#prerequisities) + * [Installation of all required tools](#installation-of-all-required-tools) + * [Installing dependencies for Llama Stack](#installing-dependencies-for-llama-stack) + * [Check if Llama Stack can be started](#check-if-llama-stack-can-be-started) + * [Llama Stack configuration](#llama-stack-configuration) + * [Run Llama Stack in a separate process](#run-llama-stack-in-a-separate-process) + * [LCS configuration to connect to Llama Stack running in separate process](#lcs-configuration-to-connect-to-llama-stack-running-in-separate-process) + * [Start LCS](#start-lcs) + * [Llama Stack used as a library](#llama-stack-used-as-a-library) + * [Prerequisities](#prerequisities-1) * [Running from container](#running-from-container) - * [Llama Stack used as a library](#llama-stack-used-as-a-library-1) * [Llama Stack used as a separate process in container](#llama-stack-used-as-a-separate-process-in-container) - * [Llama Stack configuration](#llama-stack-configuration) + * [Llama Stack used as a library](#llama-stack-used-as-a-library-1) + + ## Preface In this document, you will learn how to install and run a service called *Lightspeed Core Stack (LCS)*. It is a service that allows users to communicate with large language models (LLMs), access to RAG databases, call so called agents, process conversation history, ensure that the conversation is only about permitted topics, etc. @@ -79,50 +89,149 @@ The easiest option is to run Llama Stack in a separate process. This means that 1. Llama Stack framework with open port 8321 (can be easily changed if needed) 1. LCS with open port 8080 (can be easily changed if needed) -### Llama Stack used as a library - -## Running from container - -### Llama Stack used as a separate process in container - -### Llama Stack used as a library - - -```toml -[project] -name = "llama-stack-demo" -version = "0.1.0" -description = "Default template for PDM package" -authors = [] -dependencies = [ - "llama-stack==0.2.14", - "fastapi>=0.115.12", - "opentelemetry-sdk>=1.34.0", - "opentelemetry-exporter-otlp>=1.34.0", - "opentelemetry-instrumentation>=0.55b0", - "aiosqlite>=0.21.0", - "litellm>=1.72.1", - "uvicorn>=0.34.3", - "blobfile>=3.0.0", - "datasets>=3.6.0", - "sqlalchemy>=2.0.41", - "faiss-cpu>=1.11.0", - "mcp>=1.9.4", - "autoevals>=0.0.129", - "psutil>=7.0.0", - "torch>=2.7.1", - "peft>=0.15.2", - "trl>=0.18.2"] -requires-python = "==3.12.*" -readme = "README.md" -license = {text = "MIT"} - - -[tool.pdm] -distribution = false -``` +#### Prerequisities + +1. Python 3.12 or 3.13 +1. `pip` tool installed +1. `jq` and `curl` tools installed + +#### Installation of all required tools + +1. `pip install --user uv` +1. `sudo dnf install curl jq` + +#### Installing dependencies for Llama Stack + + +1. Create a new directory + ```bash + mkdir llama-stack-server + cd llama-stack-server + ``` +1. Create project file named `pyproject.toml` in this directory. This file should have the following content: + ```toml + [project] + name = "llama-stack-demo" + version = "0.1.0" + description = "Default template for PDM package" + authors = [] + dependencies = [ + "llama-stack==0.2.14", + "fastapi>=0.115.12", + "opentelemetry-sdk>=1.34.0", + "opentelemetry-exporter-otlp>=1.34.0", + "opentelemetry-instrumentation>=0.55b0", + "aiosqlite>=0.21.0", + "litellm>=1.72.1", + "uvicorn>=0.34.3", + "blobfile>=3.0.0", + "datasets>=3.6.0", + "sqlalchemy>=2.0.41", + "faiss-cpu>=1.11.0", + "mcp>=1.9.4", + "autoevals>=0.0.129", + "psutil>=7.0.0", + "torch>=2.7.1", + "peft>=0.15.2", + "trl>=0.18.2"] + requires-python = "==3.12.*" + readme = "README.md" + license = {text = "MIT"} + + + [tool.pdm] + distribution = false + ``` +1. Run the following command to install all dependencies: + + ```bash + uv sync + ``` + + You should get the following output: + + ```ascii + Using CPython 3.12.10 interpreter at: /usr/bin/python3 + Creating virtual environment at: .venv + Resolved 136 packages in 1.90s + Built sqlalchemy==2.0.42 + Prepared 14 packages in 10.04s + Installed 133 packages in 4.36s + + accelerate==1.9.0 + + aiohappyeyeballs==2.6.1 + ... + ... + ... + + transformers==4.54.0 + + triton==3.3.1 + + trl==0.20.0 + + typing-extensions==4.14.1 + + typing-inspection==0.4.1 + + tzdata==2025.2 + + urllib3==2.5.0 + + uvicorn==0.35.0 + + wcwidth==0.2.13 + + wrapt==1.17.2 + + xxhash==3.5.0 + + yarl==1.20.1 + + zipp==3.23.0 + ``` + + + +#### Check if Llama Stack can be started + +1. In the next step, we need to verify that it is possible to run a tool called `llama`. It was installed into a Python virtual environment and therefore we have to run it via `uv run` command: + ```bash + uv run llama + ``` +1. If the installation was successful, the following messages should be displayed on the terminal: + ``` + usage: llama [-h] {model,stack,download,verify-download} ... + + Welcome to the Llama CLI + + options: + -h, --help show this help message and exit + + subcommands: + {model,stack,download,verify-download} + + model Work with llama models + stack Operations for the Llama Stack / Distributions + download Download a model from llama.meta.com or Hugging Face Hub + verify-download Verify integrity of downloaded model files + ``` +1. If we try to run the Llama Stack without configuring it, only the exception information is displayed (which is not very user-friendly): + ```bash + uv run llama stack run + ``` + Output: + ``` + INFO 2025-07-27 16:56:12,464 llama_stack.cli.stack.run:147 server: No image type or image name provided. Assuming environment packages. + Traceback (most recent call last): + File "/tmp/ramdisk/llama-stack-runner/.venv/bin/llama", line 10, in + sys.exit(main()) + ^^^^^^ + File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/cli/llama.py", line 53, in main + parser.run(args) + File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/cli/llama.py", line 47, in run + args.func(args) + File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/cli/stack/run.py", line 164, in _run_stack_run_cmd + server_main(server_args) + File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/distribution/server/server.py", line 414, in main + elif args.template: + ^^^^^^^^^^^^^ + AttributeError: 'Namespace' object has no attribute 'template' + ``` + + + +#### Llama Stack configuration + +Llama Stack needs to be configured properly. For using the default runnable Llama Stack a file named `run.yaml` with following content needs to be created: ```yaml version: '2' @@ -252,53 +361,247 @@ models: provider_model_id: gpt-4-turbo ``` -In the next step, we need to verify that it is possible to run a tool called `llama`. It was installed in a Python virtual environment and therefore we have to run it via `uv run` command: -```bash - uv run llama -``` -If the installation was successful, the following messages should be displayed on the terminal: +#### Run Llama Stack in a separate process + +1. Export OpenAI key by using the following command: + ```bash + export OPENAI_API_KEY="sk-foo-bar-baz" + ``` +1. Run the following command: + ```bash + uv run llama stack run run.yaml + ``` +1. Check the output on terminal, it should look like: + ``` + INFO 2025-07-29 15:26:20,864 llama_stack.cli.stack.run:126 server: Using run configuration: run.yaml + INFO 2025-07-29 15:26:20,877 llama_stack.cli.stack.run:147 server: No image type or image name provided. Assuming environment packages. + INFO 2025-07-29 15:26:21,277 llama_stack.distribution.server.server:441 server: Using config file: run.yaml + INFO 2025-07-29 15:26:21,279 llama_stack.distribution.server.server:443 server: Run configuration: + INFO 2025-07-29 15:26:21,285 llama_stack.distribution.server.server:445 server: apis: + - agents + - datasetio + - eval + - inference + - post_training + - safety + - scoring + - telemetry + - tool_runtime + - vector_io + benchmarks: [] + container_image: null + datasets: [] + external_providers_dir: null + image_name: minimal-viable-llama-stack-configuration + inference_store: + db_path: .llama/distributions/ollama/inference_store.db + type: sqlite + logging: null + metadata_store: + db_path: .llama/distributions/ollama/registry.db + namespace: null + type: sqlite + models: + - metadata: {} + model_id: gpt-4-turbo + model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType + - llm + provider_id: openai + provider_model_id: gpt-4-turbo + providers: + agents: + - config: + persistence_store: + db_path: .llama/distributions/ollama/agents_store.db + namespace: null + type: sqlite + responses_store: + db_path: .llama/distributions/ollama/responses_store.db + type: sqlite + provider_id: meta-reference + provider_type: inline::meta-reference + datasetio: + - config: + kvstore: + db_path: .llama/distributions/ollama/huggingface_datasetio.db + namespace: null + type: sqlite + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + db_path: .llama/distributions/ollama/localfs_datasetio.db + namespace: null + type: sqlite + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + db_path: .llama/distributions/ollama/meta_reference_eval.db + namespace: null + type: sqlite + provider_id: meta-reference + provider_type: inline::meta-reference + inference: + - config: + api_key: '********' + provider_id: openai + provider_type: remote::openai + post_training: + - config: + checkpoint_format: huggingface + device: cpu + distributed_backend: null + provider_id: huggingface + provider_type: inline::huggingface + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + - config: {} + provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - config: + openai_api_key: '********' + provider_id: braintrust + provider_type: inline::braintrust + telemetry: + - config: + service_name: lightspeed-stack + sinks: sqlite + sqlite_db_path: .llama/distributions/ollama/trace_store.db + provider_id: meta-reference + provider_type: inline::meta-reference + tool_runtime: + - config: {} + provider_id: model-context-protocol + provider_type: remote::model-context-protocol + vector_io: + - config: + kvstore: + db_path: .llama/distributions/ollama/faiss_store.db + namespace: null + type: sqlite + provider_id: faiss + provider_type: inline::faiss + scoring_fns: [] + server: + auth: null + host: null + port: 8321 + quota: null + tls_cafile: null + tls_certfile: null + tls_keyfile: null + shields: [] + tool_groups: [] + vector_dbs: [] + version: 2 + ``` +1. The server with Llama Stack listens on port 8321. A description of the REST API is available in the form of OpenAPI (endpoint /openapi.json), but other endpoints can also be used. It is possible to check if Llama Stack runs as REST API server by retrieving its version. We use `curl` and `jq` tools for this purposes: + ```bash + curl localhost:8321/v1/version | jq . + ``` + The output should be in this form: + ```json + { + "version": "0.2.14" + } + ``` + + +#### LCS configuration to connect to Llama Stack running in separate process +```yaml +name: Lightspeed Core Service (LCS) +service: + host: localhost + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + use_as_library_client: false + url: http://localhost:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" + data_collector: + enabled: false + ingress_server_url: null + ingress_server_auth_token: null + ingress_content_service_name: null + collection_interval: 7200 # 2 hours in seconds + cleanup_after_send: true + connection_timeout_seconds: 30 +authentication: + module: "noop" ``` -usage: llama [-h] {model,stack,download,verify-download} ... - -Welcome to the Llama CLI -options: - -h, --help show this help message and exit +#### Start LCS -subcommands: - {model,stack,download,verify-download} - - model Work with llama models - stack Operations for the Llama Stack / Distributions - download Download a model from llama.meta.com or Hugging Face Hub - verify-download Verify integrity of downloaded model files +```bash +make run ``` -### Llama Stack configuration - -If we try to run the Llama Stack without configuring it, only the exception information is displayed (which is not very user-friendly): +``` +uv run src/lightspeed_stack.py +[07/29/25 15:43:35] INFO Initializing app main.py:19 + INFO Including routers main.py:68 +INFO: Started server process [1922983] +INFO: Waiting for application startup. + INFO Registering MCP servers main.py:81 + DEBUG No MCP servers configured, skipping registration common.py:36 + INFO Setting up model metrics main.py:84 +[07/29/25 15:43:35] DEBUG Set provider/model configuration for openai/gpt-4-turbo to 0 utils.py:45 + INFO App startup complete main.py:86 +INFO: Application startup complete. +INFO: Uvicorn running on http://localhost:8080 (Press CTRL+C to quit) +``` ```bash -llama-stack-runner]$ uv run llama stack run +curl localhost:8080/v1/models | jq . ``` ``` -INFO 2025-07-27 16:56:12,464 llama_stack.cli.stack.run:147 server: No image type or image name provided. Assuming environment packages. -Traceback (most recent call last): - File "/tmp/ramdisk/llama-stack-runner/.venv/bin/llama", line 10, in - sys.exit(main()) - ^^^^^^ - File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/cli/llama.py", line 53, in main - parser.run(args) - File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/cli/llama.py", line 47, in run - args.func(args) - File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/cli/stack/run.py", line 164, in _run_stack_run_cmd - server_main(server_args) - File "/tmp/ramdisk/llama-stack-runner/.venv/lib64/python3.12/site-packages/llama_stack/distribution/server/server.py", line 414, in main - elif args.template: - ^^^^^^^^^^^^^ -AttributeError: 'Namespace' object has no attribute 'template' +{ + "models": [ + { + "identifier": "gpt-4-turbo", + "metadata": {}, + "api_model_type": "llm", + "provider_id": "openai", + "type": "model", + "provider_resource_id": "gpt-4-turbo", + "model_type": "llm" + } + ] +} ``` + + + +### Llama Stack used as a library + +#### Prerequisities + +## Running from container + +### Llama Stack used as a separate process in container + +### Llama Stack used as a library + + +