From fcf23066ece7325bc85cb28defb8985e41a71f2f Mon Sep 17 00:00:00 2001 From: Andrej Simurka Date: Fri, 10 Oct 2025 10:58:00 +0200 Subject: [PATCH] Azure inference supported --- .github/workflows/e2e_tests.yaml | 31 +++++++- README.md | 2 + docker-compose.yaml | 2 + docs/providers.md | 4 +- examples/azure-run.yaml | 128 +++++++++++++++++++++++++++++++ tests/e2e/configs/run-azure.yaml | 128 +++++++++++++++++++++++++++++++ 6 files changed, 292 insertions(+), 3 deletions(-) create mode 100644 examples/azure-run.yaml create mode 100644 tests/e2e/configs/run-azure.yaml diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index 23187059..3280ad29 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -8,9 +8,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - environment: [ "ci"] + environment: [ "ci", "azure"] env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }} + CLIENT_ID: ${{ secrets.CLIENT_ID }} + TENANT_ID: ${{ secrets.TENANT_ID }} steps: - uses: actions/checkout@v4 @@ -72,6 +75,32 @@ jobs: authentication: module: "noop" + + - name: Get Azure API key (access token) + if: matrix.environment == 'azure' + id: azure_token + env: + CLIENT_ID: ${{ secrets.CLIENT_ID }} + CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }} + TENANT_ID: ${{ secrets.TENANT_ID }} + run: | + echo "Requesting Azure API token..." + RESPONSE=$(curl -s -X POST \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "client_id=$CLIENT_ID&scope=https://cognitiveservices.azure.com/.default&client_secret=$CLIENT_SECRET&grant_type=client_credentials" \ + "https://login.microsoftonline.com/$TENANT_ID/oauth2/v2.0/token") + + echo "Response received. Extracting access_token..." + ACCESS_TOKEN=$(echo "$RESPONSE" | jq -r '.access_token') + + if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" == "null" ]; then + echo "❌ Failed to obtain Azure access token. Response:" + echo "$RESPONSE" + exit 1 + fi + + echo "✅ Successfully obtained Azure access token." + echo "AZURE_API_KEY=$ACCESS_TOKEN" >> $GITHUB_ENV - name: Select and configure run.yaml env: diff --git a/README.md b/README.md index edfaaeb5..60353968 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,8 @@ Lightspeed Core Stack (LCS) supports the large language models from the provider | OpenAI | gpt-5, gpt-4o, gpt4-turbo, gpt-4.1, o1, o3, o4 | Yes | remote::openai | [1](examples/openai-faiss-run.yaml) [2](examples/openai-pgvector-run.yaml) | | OpenAI | gpt-3.5-turbo, gpt-4 | No | remote::openai | | | RHAIIS (vLLM)| meta-llama/Llama-3.1-8B-Instruct | Yes | remote::vllm | [1](tests/e2e/configs/run-rhaiis.yaml) | +| Azure | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes | remote::azure | [1](examples/azure-run.yaml) | +| Azure | o1, o1-mini | No | remote::azure | | The "provider_type" is used in the llama stack configuration file when refering to the provider. diff --git a/docker-compose.yaml b/docker-compose.yaml index a7b49f19..ad8a79de 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -12,6 +12,7 @@ services: - ./run.yaml:/opt/app-root/run.yaml:Z environment: - OPENAI_API_KEY=${OPENAI_API_KEY} + - AZURE_API_KEY=${AZURE_API_KEY} - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} - RHAIIS_URL=${RHAIIS_URL} @@ -36,6 +37,7 @@ services: - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z environment: - OPENAI_API_KEY=${OPENAI_API_KEY} + - AZURE_API_KEY=${AZURE_API_KEY} depends_on: llama-stack: condition: service_healthy diff --git a/docs/providers.md b/docs/providers.md index f256636a..d7bcb36d 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -36,7 +36,7 @@ The tables below summarize each provider category, containing the following atri | meta-reference | inline | `accelerate`, `fairscale`, `torch`, `torchvision`, `transformers`, `zmq`, `lm-format-enforcer`, `sentence-transformers`, `torchao==0.8.0`, `fbgemm-gpu-genai==1.1.2` | ❌ | | sentence-transformers | inline | `torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu`, `sentence-transformers --no-deps` | ❌ | | anthropic | remote | `litellm` | ❌ | -| azure | remote | `itellm` | ❌ | +| azure | remote | — | ✅ | | bedrock | remote | `boto3` | ❌ | | cerebras | remote | `cerebras_cloud_sdk` | ❌ | | databricks | remote | — | ❌ | @@ -287,4 +287,4 @@ Red Hat providers: --- -For a deeper understanding, see the [official llama-stack configuration documentation](https://llama-stack.readthedocs.io/en/latest/distributions/configuration.html). +For a deeper understanding, see the [official llama-stack providers documentation](https://llamastack.github.io/docs/providers). diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml new file mode 100644 index 00000000..a50301ad --- /dev/null +++ b/examples/azure-run.yaml @@ -0,0 +1,128 @@ +version: '2' +image_name: minimal-viable-llama-stack-configuration + +apis: + - agents + - datasetio + - eval + - files + - inference + - post_training + - safety + - scoring + - telemetry + - tool_runtime + - vector_io +benchmarks: [] +container_image: null +datasets: [] +external_providers_dir: null +inference_store: + db_path: .llama/distributions/ollama/inference_store.db + type: sqlite +logging: null +metadata_store: + db_path: .llama/distributions/ollama/registry.db + namespace: null + type: sqlite +providers: + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: /tmp/llama-stack-files + metadata_store: + type: sqlite + db_path: .llama/distributions/ollama/files_metadata.db + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + db_path: .llama/distributions/ollama/agents_store.db + namespace: null + type: sqlite + responses_store: + db_path: .llama/distributions/ollama/responses_store.db + type: sqlite + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + db_path: .llama/distributions/ollama/huggingface_datasetio.db + namespace: null + type: sqlite + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + db_path: .llama/distributions/ollama/localfs_datasetio.db + namespace: null + type: sqlite + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + db_path: .llama/distributions/ollama/meta_reference_eval.db + namespace: null + type: sqlite + inference: + - provider_id: azure + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY} + api_base: https://ols-test.openai.azure.com/ + api_version: 2024-02-15-preview + api_type: ${env.AZURE_API_TYPE:=} + post_training: + - provider_id: huggingface + provider_type: inline::huggingface-gpu + config: + checkpoint_format: huggingface + device: cpu + distributed_backend: null + dpo_output_dir: "." + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: 'lightspeed-stack-telemetry' + sinks: sqlite + sqlite_db_path: .llama/distributions/ollama/trace_store.db + tool_runtime: + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +scoring_fns: [] +server: + auth: null + host: null + port: 8321 + quota: null + tls_cafile: null + tls_certfile: null + tls_keyfile: null +shields: [] +models: + - model_id: gpt-4o-mini + model_type: llm + provider_id: azure + provider_model_id: gpt-4o-mini \ No newline at end of file diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml new file mode 100644 index 00000000..a50301ad --- /dev/null +++ b/tests/e2e/configs/run-azure.yaml @@ -0,0 +1,128 @@ +version: '2' +image_name: minimal-viable-llama-stack-configuration + +apis: + - agents + - datasetio + - eval + - files + - inference + - post_training + - safety + - scoring + - telemetry + - tool_runtime + - vector_io +benchmarks: [] +container_image: null +datasets: [] +external_providers_dir: null +inference_store: + db_path: .llama/distributions/ollama/inference_store.db + type: sqlite +logging: null +metadata_store: + db_path: .llama/distributions/ollama/registry.db + namespace: null + type: sqlite +providers: + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: /tmp/llama-stack-files + metadata_store: + type: sqlite + db_path: .llama/distributions/ollama/files_metadata.db + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + db_path: .llama/distributions/ollama/agents_store.db + namespace: null + type: sqlite + responses_store: + db_path: .llama/distributions/ollama/responses_store.db + type: sqlite + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + db_path: .llama/distributions/ollama/huggingface_datasetio.db + namespace: null + type: sqlite + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + db_path: .llama/distributions/ollama/localfs_datasetio.db + namespace: null + type: sqlite + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + db_path: .llama/distributions/ollama/meta_reference_eval.db + namespace: null + type: sqlite + inference: + - provider_id: azure + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY} + api_base: https://ols-test.openai.azure.com/ + api_version: 2024-02-15-preview + api_type: ${env.AZURE_API_TYPE:=} + post_training: + - provider_id: huggingface + provider_type: inline::huggingface-gpu + config: + checkpoint_format: huggingface + device: cpu + distributed_backend: null + dpo_output_dir: "." + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: 'lightspeed-stack-telemetry' + sinks: sqlite + sqlite_db_path: .llama/distributions/ollama/trace_store.db + tool_runtime: + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +scoring_fns: [] +server: + auth: null + host: null + port: 8321 + quota: null + tls_cafile: null + tls_certfile: null + tls_keyfile: null +shields: [] +models: + - model_id: gpt-4o-mini + model_type: llm + provider_id: azure + provider_model_id: gpt-4o-mini \ No newline at end of file