diff --git a/.cspell.json b/.cspell.json index ec6606a109..1a7f30edfd 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,27 +1,10 @@ { "version": "0.2", "allowCompoundWords": true, - "enableFiletypes": [ - "dockerfile", - "md", - "yaml" - ], - "enabledLanguageIds": [ - "json", - "jsonc", - "markdown", - "typescript", - "typescriptreact", - "yaml", - "yml" - ], - "ignorePaths": [ - "CHANGELOG.md", - "*.json" - ], - "ignoreRegExpList": [ - "/'s\\b/" - ], + "enableFiletypes": ["dockerfile", "md", "yaml"], + "enabledLanguageIds": ["json", "jsonc", "markdown", "typescript", "typescriptreact", "yaml", "yml"], + "ignorePaths": ["CHANGELOG.md", "*.json"], + "ignoreRegExpList": ["/'s\\b/"], "ignoreWords": [ "adrs", "trivy", diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c5b55ff8a8..32b7d27565 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,32 +1,32 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: // https://github.com/microsoft/vscode-dev-containers/tree/v0.191.1/containers/docker-existing-dockerfile { - "name": "Existing Dockerfile", + "name": "Existing Dockerfile", - // Sets the run context to one level up instead of the .devcontainer folder. - "context": "..", + // Sets the run context to one level up instead of the .devcontainer folder. + "context": "..", - // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. - "dockerFile": "../Dockerfile", + // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. + "dockerFile": "../Dockerfile", - // Set *default* container specific settings.json values on container create. - "settings": {}, - - // Add the IDs of extensions you want installed when the container is created. - "extensions": [] + // Set *default* container specific settings.json values on container create. + "settings": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], + // Add the IDs of extensions you want installed when the container is created. + "extensions": [] - // Uncomment the next line to run commands after the container is created - for example installing curl. - // "postCreateCommand": "apt-get update && apt-get install -y curl", + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], - // Uncomment when using a ptrace-based debugger like C++, Go, and Rust - // "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ], + // Uncomment the next line to run commands after the container is created - for example installing curl. + // "postCreateCommand": "apt-get update && apt-get install -y curl", - // Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker. - // "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], + // Uncomment when using a ptrace-based debugger like C++, Go, and Rust + // "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ], - // Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root. - // "remoteUser": "vscode" + // Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker. + // "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], + + // Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "vscode" } diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4f9b2443db..10487663ab 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -18,10 +18,10 @@ updates: groups: npm-dependencies: patterns: - - "*" + - '*' update-types: - - "minor" - - "patch" + - 'minor' + - 'patch' # Docker - package-ecosystem: 'docker' @@ -36,10 +36,10 @@ updates: groups: docker-dependencies: patterns: - - "*" + - '*' update-types: - - "minor" - - "patch" + - 'minor' + - 'patch' ignore: - dependency-name: 'node' versions: ['>=23'] @@ -57,7 +57,7 @@ updates: groups: github-actions-dependencies: patterns: - - "*" + - '*' update-types: - - "minor" - - "patch" + - 'minor' + - 'patch' diff --git a/.github/stale.yml b/.github/stale.yml index 42da5926d4..299eb23d58 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -5,7 +5,7 @@ daysUntilClose: 7 # Issues with these labels will never be considered stale exemptLabels: - pinned -# Set to true to ignore issues with an assignee (defaults to false) +# Set to true to ignore issues with an assignee (defaults to false) exemptAssignees: true # Only mark stale when these labels are found onlyLabels: diff --git a/.github/templates/wiz-admission-control.yaml b/.github/templates/wiz-admission-control.yaml index e3a9406284..596acfbeb7 100644 --- a/.github/templates/wiz-admission-control.yaml +++ b/.github/templates/wiz-admission-control.yaml @@ -1,14 +1,14 @@ wizApiToken: - clientId: "__WIZ_CLIENT_ID__" - clientToken: "__WIZ_CLIENT_TOKEN__" - clientEndpoint: "" + clientId: '__WIZ_CLIENT_ID__' + clientToken: '__WIZ_CLIENT_TOKEN__' + clientEndpoint: '' wiz-kubernetes-connector: enabled: true autoCreateConnector: - connectorName: "" + connectorName: '' webhook: - clusterExternalId: "" + clusterExternalId: '' wiz-admission-controller: enabled: true kubernetesAuditLogsWebhook: - enabled: true \ No newline at end of file + enabled: true diff --git a/.github/templates/wiz-kubernetes-integration.yaml b/.github/templates/wiz-kubernetes-integration.yaml index 38963ff5aa..b6431aec70 100644 --- a/.github/templates/wiz-kubernetes-integration.yaml +++ b/.github/templates/wiz-kubernetes-integration.yaml @@ -1,14 +1,14 @@ global: wizApiToken: - clientId: "__WIZ_CLIENT_ID__" - clientToken: "__WIZ_CLIENT_TOKEN__" - clientEndpoint: "" + clientId: '__WIZ_CLIENT_ID__' + clientToken: '__WIZ_CLIENT_TOKEN__' + clientEndpoint: '' wiz-kubernetes-connector: enabled: true autoCreateConnector: - connectorName: "" - clusterExternalId: "" + connectorName: '' + clusterExternalId: '' wiz-broker: enabled: true @@ -16,7 +16,7 @@ wiz-sensor: enabled: true imagePullSecret: create: false - name: "sensor-image-pull" + name: 'sensor-image-pull' wiz-admission-controller: enabled: true @@ -27,4 +27,4 @@ wiz-admission-controller: imageIntegrityWebhook: enabled: false policies: - - my-image-trust-policy \ No newline at end of file + - my-image-trust-policy diff --git a/.github/workflows/dependabot-auto-approve.yml b/.github/workflows/dependabot-auto-approve.yml index 854aa6d5bd..6824c1dc51 100644 --- a/.github/workflows/dependabot-auto-approve.yml +++ b/.github/workflows/dependabot-auto-approve.yml @@ -16,4 +16,4 @@ jobs: - name: Auto approve dependabot PRs uses: hmarr/auto-approve-action@v4 with: - github-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index daab25adee..b147c15139 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,15 @@ ## Getting started ### Step 1: [Install the App Platform](https://apl-docs.net/docs/get-started/installation/overview) + Install the App Platform on Linode Kubernetes Engine (LKE) or any other conformant Kubernetes cluster. ### Step 2: [Follow the post installation steps ](https://apl-docs.net/docs/get-started/installation/post-installation-steps) + Configure the App Platform for your use case. ### Step 3: [Explore the App Platform using the Labs](https://apl-docs.net/docs/get-started/labs/overview) + Explore the App Platform with a comprehensive set of hands-on labs. ## Akamai Application Platform supports diff --git a/SECURITY.md b/SECURITY.md index 20977f75ca..4831e8de02 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,4 +6,3 @@ We only want to support forward fixing of our code, so please always upgrade whe A lot of vulnerabilities are automatically closed when we upgrade the open source solutions in our code base. We only make explicit mention of vulnerabilities mentioned to us and or fixed by us to allow for tracking. Read the changelog to see what security related issues have been solved, if any. - diff --git a/adr/index.md b/adr/index.md index 1161080fe3..a56c70aaac 100644 --- a/adr/index.md +++ b/adr/index.md @@ -1,25 +1,25 @@ -# Architectural Decision Log - -This log lists the architectural decisions for apl-core. - - - -- [ADR-2020-10-01](2020-10-01-github-workflow.md) - Our GitHub workflow -- [ADR-2020-10-02](2020-10-02-docker-compose.md) - docker compose -- [ADR-2020-11-06](2020-11-06-bash-style-guide.md) - Bash coding standard -- [ADR-2020-11-06](2020-11-06-konstraint-policy-library.md) - Konstraint library to support Common Rego Language definitions -- [ADR-2021-10-18](2021-10-18-defaults-and-derived.md) - Derived values -- [ADR-2021-10-28](2021-10-28-internal-values.md) - Internal values -- [ADR-2022-03-24](2022-03-24-custom-ca.md) - urice: -- [ADR-2022-04-22](2022-04-22-values-migration.md) - Values migration -- [ADR-2022-04-23](2022-04-23-pre-upgrade.md) - A new otomi pre-upgrade command -- [ADR-2022-05-17](2022-05-17-destroy-upon-uninstall.md) - Extra flags to accomodate destroy upon uninstall -- [ADR-2022-06-07](2022-06-07-ingress-classes.md) - Ingress classes -- [ADR-2022-07-02](2022-07-02-node-affinity.md) - Node affinity -- [ADR-2022-08-26](2022-08-26-other-dns-provider.md) - Other DNS provider - - - -For new ADRs, please use [template.md](.template.md) as basis. -More information on MADR is available at . -General information about architectural decision records is available at . +# Architectural Decision Log + +This log lists the architectural decisions for apl-core. + + + +- [ADR-2020-10-01](2020-10-01-github-workflow.md) - Our GitHub workflow +- [ADR-2020-10-02](2020-10-02-docker-compose.md) - docker compose +- [ADR-2020-11-06](2020-11-06-bash-style-guide.md) - Bash coding standard +- [ADR-2020-11-06](2020-11-06-konstraint-policy-library.md) - Konstraint library to support Common Rego Language definitions +- [ADR-2021-10-18](2021-10-18-defaults-and-derived.md) - Derived values +- [ADR-2021-10-28](2021-10-28-internal-values.md) - Internal values +- [ADR-2022-03-24](2022-03-24-custom-ca.md) - urice: +- [ADR-2022-04-22](2022-04-22-values-migration.md) - Values migration +- [ADR-2022-04-23](2022-04-23-pre-upgrade.md) - A new otomi pre-upgrade command +- [ADR-2022-05-17](2022-05-17-destroy-upon-uninstall.md) - Extra flags to accomodate destroy upon uninstall +- [ADR-2022-06-07](2022-06-07-ingress-classes.md) - Ingress classes +- [ADR-2022-07-02](2022-07-02-node-affinity.md) - Node affinity +- [ADR-2022-08-26](2022-08-26-other-dns-provider.md) - Other DNS provider + + + +For new ADRs, please use [template.md](.template.md) as basis. +More information on MADR is available at . +General information about architectural decision records is available at . diff --git a/core.yaml b/core.yaml index 630ddc2091..0a8f4a07e7 100644 --- a/core.yaml +++ b/core.yaml @@ -36,13 +36,13 @@ k8s: - name: istio-system disableIstioInjection: true labels: - "apl.io/ingress-controller-scope": "true" + 'apl.io/ingress-controller-scope': 'true' - name: ingress # disabling istio sidecar as it does not preserve client ip (yet) # TODO: enable once it does disableIstioInjection: true labels: - "apl.io/ingress-controller-scope": "true" + 'apl.io/ingress-controller-scope': 'true' - name: jaeger app: jaeger - name: jaeger-operator diff --git a/package-lock.json b/package-lock.json index 2aae0c95f6..6100735954 100644 --- a/package-lock.json +++ b/package-lock.json @@ -150,7 +150,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -2612,8 +2611,7 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-css/-/dict-css-4.0.18.tgz", "integrity": "sha512-EF77RqROHL+4LhMGW5NTeKqfUd/e4OOv6EDFQ/UQQiFyWuqkEKyEz0NDILxOFxWUEVdjT2GQ2cC7t12B6pESwg==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-dart": { "version": "2.3.1", @@ -2753,16 +2751,14 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-html/-/dict-html-4.0.12.tgz", "integrity": "sha512-JFffQ1dDVEyJq6tCDWv0r/RqkdSnV43P2F/3jJ9rwLgdsOIXwQbXrz6QDlvQLVvNSnORH9KjDtenFTGDyzfCaA==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-html-symbol-entities": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@cspell/dict-html-symbol-entities/-/dict-html-symbol-entities-4.0.4.tgz", "integrity": "sha512-afea+0rGPDeOV9gdO06UW183Qg6wRhWVkgCFwiO3bDupAoyXRuvupbb5nUyqSTsLXIKL8u8uXQlJ9pkz07oVXw==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-java": { "version": "5.0.12", @@ -2960,8 +2956,7 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-typescript/-/dict-typescript-3.2.3.tgz", "integrity": "sha512-zXh1wYsNljQZfWWdSPYwQhpwiuW0KPW1dSd8idjMRvSD0aSvWWHoWlrMsmZeRl4qM4QCEAjua8+cjflm41cQBg==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-vue": { "version": "3.0.5", @@ -5563,7 +5558,6 @@ "integrity": "sha512-z+j7DixNnfpdToYsOutStDgeRzJSMnbj8T1C/oQjB6Aa+kRfNjs/Fn7W6c8bmlt6mfy3FkgeKBRnDjxQow5dow==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^5.0.0", "@octokit/graphql": "^8.1.2", @@ -7188,8 +7182,7 @@ "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@types/json5": { "version": "0.0.29", @@ -7254,7 +7247,6 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz", "integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==", "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -7456,7 +7448,6 @@ "integrity": "sha512-BnOroVl1SgrPLywqxyqdJ4l3S2MsKVLDVxZvjI1Eoe8ev2r3kGDo+PcMihNmDE+6/KjkTubSJnmqGZZjQSBq/g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.46.2", "@typescript-eslint/types": "8.46.2", @@ -7998,7 +7989,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -8860,7 +8850,6 @@ "url": "https://github.com/sponsors/ai" } ], - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001726", "electron-to-chromium": "^1.5.173", @@ -11236,7 +11225,6 @@ "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "env-paths": "^2.2.1", "import-fresh": "^3.3.0", @@ -12581,7 +12569,6 @@ "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -12642,7 +12629,6 @@ "integrity": "sha512-82GZUjRS0p/jganf6q1rEO25VSoHH0hKPCTrgillPjdI/3bgBhAE1QzHrHTizjpRvy6pGAvKjDJtk2pF9NDq8w==", "dev": true, "license": "MIT", - "peer": true, "bin": { "eslint-config-prettier": "bin/cli.js" }, @@ -12777,7 +12763,6 @@ "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -18889,7 +18874,6 @@ "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz", "integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==", "license": "MIT", - "peer": true, "engines": { "node": ">= 10.16.0" } @@ -19955,7 +19939,6 @@ "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==", "dev": true, "license": "MIT", - "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -23065,7 +23048,6 @@ "dev": true, "inBundle": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -24179,7 +24161,6 @@ "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz", "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==", "dev": true, - "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -25164,7 +25145,6 @@ "integrity": "sha512-phCkJ6pjDi9ANdhuF5ElS10GGdAKY6R1Pvt9lT3SFhOwM4T7QZE7MLpBDbNruUx/Q3gFD92/UOFringGipRqZA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@semantic-release/commit-analyzer": "^13.0.0-beta.1", "@semantic-release/error": "^4.0.0", @@ -27156,7 +27136,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -27360,7 +27339,6 @@ "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@cspotcode/source-map-support": "^0.8.0", "@tsconfig/node10": "^1.0.7", @@ -27644,7 +27622,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -27813,7 +27790,6 @@ "dev": true, "hasInstallScript": true, "license": "MIT", - "peer": true, "dependencies": { "napi-postinstall": "^0.2.4" }, @@ -28309,7 +28285,6 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz", "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=10.0.0" }, diff --git a/schemas/Readme.md b/schemas/Readme.md index fb2a8dd16f..0ed80cf213 100644 --- a/schemas/Readme.md +++ b/schemas/Readme.md @@ -29,13 +29,13 @@ pip3 install openapi2jsonschema 3. Observe new archive created # Generate missing CRD's + When there is a new CRD added and used in different helm-charts it needs to be added to the `generated-crd-schemas.tar.gz` otherwise the tests from `NODE_ENV=test binzx/otomi validate-templates` will fail. **Prerequsits** Have a Kubernetes cluster available with the correct CRD's you want to add or want to build a new list with - **Steps for adding new CRD to the list:** 1. On line 21 in `gen-missing-crd-schemas.sh` set the correct shorthand for the CRD you want to add. The CRD is selected by a `kubectl get crd | grep $shorthand` @@ -46,10 +46,9 @@ Have a Kubernetes cluster available with the correct CRD's you want to add or wa 6. Generate a new tar.gz from the diretory with: `tar -zcvf ../generated-crd-schemas.tar.gz .` 7. Run the tests `NODE_ENV=test binzx/otomi validate-templates` they shouldn't fail anymore. - **Steps for generating new CRD list:** 1. On line 21 in `gen-missing-crd-schemas.sh` set the correct shorthand for the CRD's you want in your new list. The CRD is selected by a `kubectl get crd | grep $shorthand` 2. Execute the script `gen-missing-crd-schemas.sh` 3. This wil generate a new directory `generated-crd-schemas` in there are your CRD's. Check if they are correct. Or unpack the new `generated-crd-schemas.tar.gz` and check if the correct CRD's are in there -4. Run the tests `NODE_ENV=test binzx/otomi validate-templates` they shouldn't fail anymore. \ No newline at end of file +4. Run the tests `NODE_ENV=test binzx/otomi validate-templates` they shouldn't fail anymore. diff --git a/src/cmd/apply.ts b/src/cmd/apply.ts index 439ad000ef..d2f4b4228b 100644 --- a/src/cmd/apply.ts +++ b/src/cmd/apply.ts @@ -14,6 +14,7 @@ import { runtimeUpgrade } from '../common/runtime-upgrade' import { applyAsApps } from './apply-as-apps' import { applyTeams } from './apply-teams' import { commit } from './commit' +import { collectTraces } from './traces' import { upgrade } from './upgrade' const cmdName = getFilename(__filename) @@ -86,6 +87,12 @@ export const apply = async (): Promise => { await applyAll() } catch (e) { d.error(e) + // Collect traces on apply failure + try { + await collectTraces() + } catch (traceError) { + d.error('Failed to collect traces:', traceError) + } d.info(`Retrying in ${retryOptions.maxTimeout} ms`) throw e } diff --git a/src/cmd/collect.ts b/src/cmd/collect.ts new file mode 100644 index 0000000000..a336ec60d9 --- /dev/null +++ b/src/cmd/collect.ts @@ -0,0 +1,13 @@ +import { Argv, CommandModule } from 'yargs' +import { module as tracesModule } from './traces' + +export const module: CommandModule = { + command: 'collect ', + describe: 'Collect diagnostic information from the cluster', + builder: (yargs: Argv): Argv => { + return yargs.command(tracesModule as CommandModule).demandCommand(1, 'You must specify a subcommand') + }, + handler: (): void => { + // Handler is not called when subcommands are used + }, +} diff --git a/src/cmd/index.ts b/src/cmd/index.ts index 45d9d6e45f..c8e286d117 100644 --- a/src/cmd/index.ts +++ b/src/cmd/index.ts @@ -24,6 +24,8 @@ import { module as statusModule } from './status' import { module as syncModule } from './sync' import { module as templateModule } from './template' import { module as testModule } from './test' +import { module as collectModule } from './collect' +import { module as tracesModule } from './traces' import { module as upgradeModule } from './upgrade' import { module as validateClusterModule } from './validate-cluster' import { module as validateTemplatesModule } from './validate-templates' @@ -55,6 +57,8 @@ export { module as status } from './status' export { module as sync } from './sync' export { module as template } from './template' export { module as test } from './test' +export { module as collect } from './collect' +export { module as traces } from './traces' export { module as upgrade } from './upgrade' export { module as validateTemplates } from './validate-templates' export { module as validateValues } from './validate-values' @@ -88,6 +92,7 @@ export const commands: CommandModule[] = [ syncModule, templateModule, testModule, + collectModule, validateClusterModule, validateTemplatesModule, validateValuesModule, diff --git a/src/cmd/install.ts b/src/cmd/install.ts index d9ce0dafb2..eff47f3add 100644 --- a/src/cmd/install.ts +++ b/src/cmd/install.ts @@ -25,6 +25,7 @@ import { printWelcomeMessage, retryIsOAuth2ProxyRunning, } from './commit' +import { collectTraces } from './traces' const cmdName = getFilename(__filename) const dir = '/tmp/otomi/' @@ -145,6 +146,12 @@ const install = async (): Promise => { await installAll() } catch (e) { d.error(e) + // Collect traces on installation failure + try { + await collectTraces() + } catch (traceError) { + d.error('Failed to collect traces:', traceError) + } d.info(`Retrying in ${retryOptions.maxTimeout} ms`) throw e } diff --git a/src/cmd/traces.test.ts b/src/cmd/traces.test.ts new file mode 100644 index 0000000000..f98fdfc2cd --- /dev/null +++ b/src/cmd/traces.test.ts @@ -0,0 +1,366 @@ +import { ApiException } from '@kubernetes/client-node' +import * as k8sModule from 'src/common/k8s' + +// Mock dependencies +jest.mock('src/common/k8s') +jest.mock('src/common/cli', () => ({ + prepareEnvironment: jest.fn(), +})) +jest.mock('src/common/debug', () => ({ + terminal: jest.fn(() => ({ + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + stream: { log: process.stdout, error: process.stderr }, + })), +})) +jest.mock('src/common/utils', () => ({ + getFilename: jest.fn(() => 'troubleshoot'), +})) +jest.mock('src/common/yargs', () => ({ + setParsedArgs: jest.fn(), +})) + +import { collectTraces } from './traces' + +class MockApiException extends ApiException { + code: number + constructor(code: number, message: string) { + super(code, message, {}, {}) + this.code = code + } +} + +describe('Collect Traces Command', () => { + let mockCoreApi: any + let mockAppsApi: any + let mockCustomApi: any + + beforeEach(() => { + mockCoreApi = { + listPodForAllNamespaces: jest.fn(), + listNamespacedEvent: jest.fn(), + listServiceForAllNamespaces: jest.fn(), + listNamespace: jest.fn(), + listNamespacedPersistentVolumeClaim: jest.fn(), + listPersistentVolume: jest.fn(), + listNode: jest.fn(), + readNamespacedConfigMap: jest.fn(), + createNamespacedConfigMap: jest.fn(), + replaceNamespacedConfigMap: jest.fn(), + } + + mockAppsApi = { + listDeploymentForAllNamespaces: jest.fn(), + listNamespacedStatefulSet: jest.fn(), + } + + mockCustomApi = { + listClusterCustomObject: jest.fn(), + } + ;(k8sModule.k8s as any) = { + core: jest.fn(() => mockCoreApi), + app: jest.fn(() => mockAppsApi), + custom: jest.fn(() => mockCustomApi), + } + }) + + afterEach(() => { + jest.clearAllMocks() + }) + + it('should detect all types of failed resources and store in ConfigMap', async () => { + // Mock various failing resources + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'crashed-pod', namespace: 'default' }, + status: { phase: 'CrashLoopBackOff', message: 'Container crashed' }, + }, + { + metadata: { name: 'oom-pod', namespace: 'default' }, + status: { + phase: 'Running', + containerStatuses: [{ name: 'main', lastState: { terminated: { reason: 'OOMKilled' } } }], + }, + }, + ], + }) + + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-deployment', namespace: 'default' }, + status: { replicas: 3, availableReplicas: 1 }, + }, + ], + }) + + mockCoreApi.listNamespace.mockResolvedValue({ + items: [{ metadata: { name: 'default' } }], + }) + + mockAppsApi.listNamespacedStatefulSet.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-sts', namespace: 'default' }, + spec: { replicas: 3 }, + status: { readyReplicas: 0 }, + }, + ], + }) + + mockCoreApi.listNode.mockResolvedValue({ + items: [ + { + metadata: { name: 'node-1' }, + status: { conditions: [{ type: 'Ready', status: 'False' }] }, + }, + ], + }) + + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-lb', namespace: 'default' }, + spec: { type: 'LoadBalancer' }, + status: { loadBalancer: {} }, + }, + ], + }) + + mockCoreApi.listNamespacedPersistentVolumeClaim.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-pvc', namespace: 'default' }, + status: { phase: 'Pending' }, + }, + ], + }) + + mockCoreApi.listPersistentVolume.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-pv' }, + status: { phase: 'Failed' }, + }, + ], + }) + + mockCustomApi.listClusterCustomObject.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-app', namespace: 'argocd' }, + status: { + health: { status: 'Degraded', message: 'Pod not ready' }, + sync: { status: 'OutOfSync' }, + }, + }, + ], + }) + + mockCoreApi.readNamespacedConfigMap.mockRejectedValue(new MockApiException(404, 'Not Found')) + mockCoreApi.createNamespacedConfigMap.mockResolvedValue({}) + + await collectTraces() + + expect(mockCoreApi.createNamespacedConfigMap).toHaveBeenCalledWith({ + namespace: 'apl-operator', + body: { + metadata: { name: 'apl-traces-report' }, + data: { report: expect.any(String) }, + }, + }) + + // eslint-disable-next-line prefer-destructuring, @typescript-eslint/no-unsafe-argument + const configMapCall = mockCoreApi.createNamespacedConfigMap.mock.calls[0][0] + const reportData = JSON.parse(configMapCall.body.data.report) + + // Should have all resource types + expect(reportData.failedResources.length).toBeGreaterThan(0) + expect(reportData.summary.byType).toEqual( + expect.objectContaining({ + Pod: expect.any(Number), + Deployment: 1, + StatefulSet: 1, + Node: 1, + Service: 1, + PersistentVolumeClaim: 1, + PersistentVolume: 1, + Application: 2, // Health and Sync issues + }), + ) + expect(reportData.timestamp).toBeDefined() + }) + + it('should report healthy cluster when no issues found', async () => { + // Mock all resources as healthy + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ items: [] }) + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + await collectTraces() + + // Should not create ConfigMap for healthy cluster + expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() + expect(mockCoreApi.replaceNamespacedConfigMap).not.toHaveBeenCalled() + }) + + it('should update existing ConfigMap instead of creating new one', async () => { + const existingConfigMap = { + metadata: { name: 'apl-traces-report' }, + data: { report: '{"old": "data"}' }, + } + + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'failed-pod', namespace: 'default' }, + status: { phase: 'Failed' }, + }, + ], + }) + + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + mockCoreApi.readNamespacedConfigMap.mockResolvedValue(existingConfigMap) + mockCoreApi.replaceNamespacedConfigMap.mockResolvedValue({}) + + await collectTraces() + + expect(mockCoreApi.replaceNamespacedConfigMap).toHaveBeenCalled() + expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() + }) + + it('should gracefully handle ArgoCD not installed', async () => { + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ items: [] }) + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockRejectedValue(new MockApiException(404, 'Not Found')) + + await collectTraces() + + // Should not throw error + expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled() + }) + + it('should continue collecting resources when one type fails', async () => { + // Mock pods to fail + mockCoreApi.listPodForAllNamespaces.mockRejectedValue(new Error('API error')) + + // Mock deployments to succeed with issues + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-deployment', namespace: 'default' }, + status: { replicas: 3, availableReplicas: 1 }, + }, + ], + }) + + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + mockCoreApi.readNamespacedConfigMap.mockRejectedValue(new MockApiException(404, 'Not Found')) + mockCoreApi.createNamespacedConfigMap.mockResolvedValue({}) + + await collectTraces() + + // Should create ConfigMap with deployment issues + expect(mockCoreApi.createNamespacedConfigMap).toHaveBeenCalled() + + // eslint-disable-next-line prefer-destructuring, @typescript-eslint/no-unsafe-argument + const configMapCall = mockCoreApi.createNamespacedConfigMap.mock.calls[0][0] + const reportData = JSON.parse(configMapCall.body.data.report) + + // Should have deployment in failed resources + expect(reportData.failedResources).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + kind: 'Deployment', + name: 'test-deployment', + }), + ]), + ) + + // Should have error entry + expect(reportData.errors).toEqual(expect.arrayContaining(['API error'])) + }) + + it('should include errors field in report when collection failures occur', async () => { + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ items: [] }) + mockAppsApi.listDeploymentForAllNamespaces.mockRejectedValue(new MockApiException(403, 'Permission denied')) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockRejectedValue(new Error('Connection timeout')) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + await collectTraces() + + // Should not throw error despite multiple failures + expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled() + }) + + it('should not include errors field when all collections succeed', async () => { + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'failed-pod', namespace: 'default' }, + status: { phase: 'Failed' }, + }, + ], + }) + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + mockCoreApi.readNamespacedConfigMap.mockRejectedValue(new MockApiException(404, 'Not Found')) + mockCoreApi.createNamespacedConfigMap.mockResolvedValue({}) + + await collectTraces() + + // eslint-disable-next-line prefer-destructuring, @typescript-eslint/no-unsafe-argument + const configMapCall = mockCoreApi.createNamespacedConfigMap.mock.calls[0][0] + const reportData = JSON.parse(configMapCall.body.data.report) + + // Should not have errors field when all collections succeed + expect(reportData.errors).toBeUndefined() + }) + + it('should handle multiple simultaneous collection failures', async () => { + // Mock multiple resource types to fail + mockCoreApi.listPodForAllNamespaces.mockRejectedValue(new Error('Pods API failed')) + mockAppsApi.listDeploymentForAllNamespaces.mockRejectedValue(new Error('Deployments API failed')) + mockCoreApi.listNamespace.mockRejectedValue(new Error('Namespace API failed')) + mockCoreApi.listNode.mockRejectedValue(new Error('Node API failed')) + mockCoreApi.listServiceForAllNamespaces.mockRejectedValue(new Error('Service API failed')) + mockCoreApi.listPersistentVolume.mockRejectedValue(new Error('PV API failed')) + mockCustomApi.listClusterCustomObject.mockRejectedValue(new Error('ArgoCD API failed')) + + await collectTraces() + + // Should complete without throwing despite all failures + expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled() + // Should not create ConfigMap when no issues found and all failed + expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() + }) +}) diff --git a/src/cmd/traces.ts b/src/cmd/traces.ts new file mode 100644 index 0000000000..898f2cd3a7 --- /dev/null +++ b/src/cmd/traces.ts @@ -0,0 +1,408 @@ +import { ApiException } from '@kubernetes/client-node' +import { prepareEnvironment } from 'src/common/cli' +import { terminal } from 'src/common/debug' +import { k8s } from 'src/common/k8s' +import { getFilename } from 'src/common/utils' +import { BasicArguments, setParsedArgs } from 'src/common/yargs' +import { Argv } from 'yargs' + +const cmdName = getFilename(__filename) + +interface ResourceReport { + kind: string + name: string + namespace: string + value: string +} + +interface TraceReport { + timestamp: string + failedResources: ResourceReport[] + summary: { + total: number + byType: Record + } + errors?: string[] +} + +/** + * Get pods with issues across all namespaces + */ +async function getPodsWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listPodForAllNamespaces() + const pods: ResourceReport[] = [] + + await Promise.all( + response.items.map(async (pod) => { + const namespace = pod.metadata?.namespace || 'unknown' + const podName = pod.metadata?.name || 'unknown' + const issues: string[] = [] + + // Check for CrashLoopBackOff and other problematic states + if (['CrashLoopBackOff', 'Failed', 'Unknown'].includes(pod.status?.phase || '')) { + issues.push(`Pod status: ${pod.status?.phase}. ${pod.status?.message || ''}`) + } + + // Check for pending pods without node assignment + if (pod.status?.phase === 'Pending' && !pod.spec?.nodeName) { + const events = await coreApi.listNamespacedEvent({ namespace }) + const schedulingEvent = events.items.find( + (event) => event.involvedObject.name === podName && event.reason === 'FailedScheduling', + ) + if (schedulingEvent?.message) { + issues.push(schedulingEvent.message) + } else { + issues.push('Pod is pending without node assignment') + } + } + + // Check container statuses + pod.status?.containerStatuses?.forEach((containerStatus) => { + if (containerStatus.lastState?.terminated?.reason === 'OOMKilled') { + issues.push( + `Container ${containerStatus.name} terminated (${containerStatus.lastState?.terminated?.reason}).`, + ) + } + if (containerStatus.state?.terminated) { + issues.push( + `Container ${containerStatus.name} terminated (${containerStatus.state?.terminated.reason}). ${containerStatus.state?.terminated.message || ''}`, + ) + } + if (containerStatus.state?.waiting?.reason) { + issues.push( + `Container ${containerStatus.name} waiting (${containerStatus.state?.waiting?.reason}). ${containerStatus.state?.waiting?.message || ''}`, + ) + } + }) + + issues.forEach((issue) => { + pods.push({ + kind: 'Pod', + name: podName, + namespace, + value: issue, + }) + }) + }), + ) + + return pods +} + +/** + * Get deployments with replica mismatches + */ +async function getDeploymentsWithIssues(): Promise { + const appsApi = k8s.app() + const response = await appsApi.listDeploymentForAllNamespaces() + + return response.items + .filter((deployment) => deployment.status?.replicas !== deployment.status?.availableReplicas) + .map((deployment) => ({ + kind: 'Deployment', + name: deployment.metadata?.name || 'unknown', + namespace: deployment.metadata?.namespace || 'default', + value: `Desired ${deployment.status?.replicas}, Available ${deployment.status?.availableReplicas}`, + })) +} + +/** + * Get statefulsets with replica mismatches + */ +async function getStatefulSetsWithIssues(): Promise { + const appsApi = k8s.app() + const coreApi = k8s.core() + const namespaces = await coreApi.listNamespace() + const statefulSets: ResourceReport[] = [] + + await Promise.all( + namespaces.items.map(async (ns) => { + const namespace = ns.metadata?.name + if (!namespace) return + + const response = await appsApi.listNamespacedStatefulSet({ namespace }) + response.items.forEach((sts) => { + const replicas = sts.spec?.replicas || 0 + const readyReplicas = sts.status?.readyReplicas || 0 + if (readyReplicas < replicas) { + statefulSets.push({ + kind: 'StatefulSet', + name: sts.metadata?.name || 'unknown', + namespace, + value: `Desired ${replicas}, Ready ${readyReplicas}`, + }) + } + }) + }), + ) + + return statefulSets +} + +/** + * Get nodes that are not ready + */ +async function getNodesWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listNode() + + return response.items + .filter((node) => node.status?.conditions?.some((cond) => cond.type === 'Ready' && cond.status !== 'True')) + .map((node) => ({ + kind: 'Node', + name: node.metadata?.name || 'unknown', + namespace: 'N/A', + value: 'Node not Ready', + })) +} + +/** + * Get services with issues (e.g., LoadBalancer without IP) + */ +async function getServicesWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listServiceForAllNamespaces() + + return response.items + .map((service) => { + const namespace = service.metadata?.namespace || 'default' + const name = service.metadata?.name || 'unknown' + const type = service.spec?.type || 'ClusterIP' + let issue: string | null = null + + if (type === 'LoadBalancer' && !service.status?.loadBalancer?.ingress) { + issue = 'LoadBalancer IP not assigned' + } + + if (issue) { + return { + kind: 'Service', + name, + namespace, + value: issue, + } + } + return null + }) + .filter((r): r is ResourceReport => r !== null) +} + +/** + * Get PersistentVolumeClaims that are not bound + */ +async function getPVCsWithIssues(): Promise { + const coreApi = k8s.core() + const namespaces = await coreApi.listNamespace() + const pvcs: ResourceReport[] = [] + + await Promise.all( + namespaces.items.map(async (ns) => { + const namespace = ns.metadata?.name + if (!namespace) return + + const response = await coreApi.listNamespacedPersistentVolumeClaim({ namespace }) + response.items.forEach((pvc) => { + if (pvc.status?.phase !== 'Bound') { + const conditions = pvc.status?.conditions?.map((c) => `${c.type}: ${c.message}`).join('; ') || '' + pvcs.push({ + kind: 'PersistentVolumeClaim', + name: pvc.metadata?.name || 'unknown', + namespace, + value: `Phase: ${pvc.status?.phase}${conditions ? `. ${conditions}` : ''}`, + }) + } + }) + }), + ) + + return pvcs +} + +/** + * Get PersistentVolumes with issues + */ +async function getPVsWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listPersistentVolume() + + return response.items + .filter((pv) => pv.status?.phase !== 'Available' && pv.status?.phase !== 'Bound') + .map((pv) => ({ + kind: 'PersistentVolume', + name: pv.metadata?.name || 'unknown', + namespace: 'N/A', + value: `Phase: ${pv.status?.phase}`, + })) +} + +/** + * Get ArgoCD Applications with health or sync issues + */ +async function getArgoApplicationsWithIssues(): Promise { + const customApi = k8s.custom() + const applications: ResourceReport[] = [] + + const response = await customApi.listClusterCustomObject({ + group: 'argoproj.io', + version: 'v1alpha1', + plural: 'applications', + }) + + const items = (response as any).items || [] + + items.forEach((app: any) => { + const name = app.metadata?.name || 'unknown' + const namespace = app.metadata?.namespace || 'unknown' + const healthStatus = app.status?.health?.status + const syncStatus = app.status?.sync?.status + const issues: string[] = [] + + if (healthStatus && healthStatus !== 'Healthy') { + const healthMessage = app.status?.health?.message || 'Unknown' + issues.push(`HealthStatus: ${healthStatus} message: ${healthMessage}`) + } + + if (syncStatus && syncStatus !== 'Synced') { + issues.push(`SyncStatus: ${syncStatus}`) + } + + const operationPhase = app.status?.operationState?.phase + if (operationPhase && operationPhase !== 'Succeeded') { + const message = app.status?.operationState?.message || 'Unknown' + issues.push(`Operation: ${operationPhase} - ${message}`) + } + + issues.forEach((issue) => { + applications.push({ + kind: 'Application', + name, + namespace, + value: issue, + }) + }) + }) + + return applications +} + +/** + * Write trace report to ConfigMap + */ +async function writeReportToConfigMap(name: string, namespace: string, report: TraceReport): Promise { + const coreApi = k8s.core() + const reportJson = JSON.stringify(report, null, 2) + + try { + const existingConfigMap = await coreApi.readNamespacedConfigMap({ name, namespace }) + + if (!existingConfigMap.data) { + existingConfigMap.data = {} + } + existingConfigMap.data.report = reportJson + + await coreApi.replaceNamespacedConfigMap({ name, namespace, body: existingConfigMap }) + } catch (error) { + if (error instanceof ApiException && error.code === 404) { + await coreApi.createNamespacedConfigMap({ + namespace, + body: { + metadata: { name }, + data: { report: reportJson }, + }, + }) + } else { + throw error + } + } +} + +/** + * Main collect traces function + */ +export async function collectTraces(): Promise { + const d = terminal(`cmd:${cmdName}:collectTraces`) + + try { + d.info('Collecting traces from cluster resources...') + + // Gather all failed resources using allSettled to continue on individual failures + const results = await Promise.allSettled([ + getPodsWithIssues(), + getDeploymentsWithIssues(), + getStatefulSetsWithIssues(), + getNodesWithIssues(), + getServicesWithIssues(), + getPVCsWithIssues(), + getPVsWithIssues(), + getArgoApplicationsWithIssues(), + ]) + + // Process results and collect both resources and errors + const failedResources: ResourceReport[] = [] + const collectionErrors: string[] = [] + + results.forEach((result) => { + if (result.status === 'fulfilled') { + failedResources.push(...result.value) + } else { + const error = result.reason + const errorMessage = error instanceof Error ? error.message : String(error) + + // Log based on error type + if (error instanceof ApiException && (error.code === 404 || error.code === 403)) { + d.info(`Resource collection skipped (expected if not installed): ${errorMessage}`) + } else { + d.warn(`Failed to collect resources: ${errorMessage}`) + } + + collectionErrors.push(errorMessage) + } + }) + + // Generate report + const report: TraceReport = { + timestamp: new Date().toISOString(), + failedResources, + summary: { + total: failedResources.length, + byType: failedResources.reduce( + (acc, r) => ({ + ...acc, + [r.kind]: (acc[r.kind] || 0) + 1, + }), + {} as Record, + ), + }, + ...(collectionErrors.length > 0 && { errors: collectionErrors }), + } + + // Store in ConfigMap + const configMapName = 'apl-traces-report' + const targetNamespace = 'apl-operator' + + if (failedResources.length === 0) { + d.info('No failing resources found. Your APL instance seems to be healthy.') + } else { + await writeReportToConfigMap(configMapName, targetNamespace, report) + d.info( + `Trace report stored in ConfigMap ${targetNamespace}/${configMapName} (${failedResources.length} failed resources)`, + ) + } + } catch (error) { + d.error('Failed to collect traces:', error) + throw error + } +} + +export const module = { + command: 'traces', + describe: 'Collect traces of failed resources and store report in ConfigMap', + builder: (parser: Argv): Argv => parser, + + handler: async (argv: BasicArguments): Promise => { + setParsedArgs(argv) + await prepareEnvironment({ skipEnvDirCheck: true, skipDecrypt: true }) + await collectTraces() + }, +} diff --git a/src/common/hf.ts b/src/common/hf.ts index de317cb04f..86b96130eb 100644 --- a/src/common/hf.ts +++ b/src/common/hf.ts @@ -215,7 +215,7 @@ export const deployEssential = async (labelOpts: string[] | null = null) => { } writeFileSync(templateFile, templateOutput) - await $`kubectl apply -f ${templateFile}` + await $`kubectl apply --server-side=true -f ${templateFile}` } return true diff --git a/tests/fixtures/env/apps/kserve.yaml b/tests/fixtures/env/apps/kserve.yaml index af83844b5e..73c29603cb 100644 --- a/tests/fixtures/env/apps/kserve.yaml +++ b/tests/fixtures/env/apps/kserve.yaml @@ -22,8 +22,8 @@ spec: memory: 300Mi inferenceService: limits: - cpu: "1" - memory: "2Gi" + cpu: '1' + memory: '2Gi' requests: - cpu: "1" - memory: "2Gi" \ No newline at end of file + cpu: '1' + memory: '2Gi' diff --git a/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml b/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml index 14a4331d23..953af4f5c9 100644 --- a/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml +++ b/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml @@ -3,4 +3,4 @@ spec: rootPassword: mysqlsomesecretvalue name: kubeflow-pipelines metadata: - name: kubeflow-pipelines \ No newline at end of file + name: kubeflow-pipelines diff --git a/tests/fixtures/env/teams/demo/agents/my-agent.yaml b/tests/fixtures/env/teams/demo/agents/my-agent.yaml index 48dd0e0787..d4da7f8519 100644 --- a/tests/fixtures/env/teams/demo/agents/my-agent.yaml +++ b/tests/fixtures/env/teams/demo/agents/my-agent.yaml @@ -1,11 +1,11 @@ apiVersion: akamai.io/v1alpha1 kind: AkamaiAgent metadata: - name: my-agent - namespace: team-demo - labels: - apl.io/teamId: demo + name: my-agent + namespace: team-demo + labels: + apl.io/teamId: demo spec: - foundationModel: llama - systemPrompt: You're a helpful AI assistant - knowledgeBase: demo-kb \ No newline at end of file + foundationModel: llama + systemPrompt: You're a helpful AI assistant + knowledgeBase: demo-kb diff --git a/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml b/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml index 366f7945c4..e50e22a976 100644 --- a/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml +++ b/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml @@ -1,10 +1,10 @@ apiVersion: postgresql.cnpg.io/v1 kind: Database metadata: - name: pgvector-db - namespace: team-demo + name: pgvector-db + namespace: team-demo spec: - cluster: - name: pgvector-cluster - name: knowledge_base - owner: app \ No newline at end of file + cluster: + name: pgvector-cluster + name: knowledge_base + owner: app diff --git a/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml b/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml index ba4228497e..96b295b632 100644 --- a/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml +++ b/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml @@ -1,18 +1,18 @@ apiVersion: akamai.io/v1alpha1 kind: AkamaiKnowledgeBase metadata: - name: demo-kb - namespace: team-demo + name: demo-kb + namespace: team-demo spec: - pipelineName: "custom-pipeline" - pipelineParameters: - url: "https://docs.example.com/api" - table_name: "custom_knowledge_base" - embedding_model: "text-embedding-3-small" - embedding_api_base: "https://api.openai.com/v1" - embed_dim: 1536 - embed_batch_size: 100 - db_host: "pgvector-cluster-rw" - secret_name: "openai-secret" - secret_namespace: "team-demo" - db_port: 5432 \ No newline at end of file + pipelineName: 'custom-pipeline' + pipelineParameters: + url: 'https://docs.example.com/api' + table_name: 'custom_knowledge_base' + embedding_model: 'text-embedding-3-small' + embedding_api_base: 'https://api.openai.com/v1' + embed_dim: 1536 + embed_batch_size: 100 + db_host: 'pgvector-cluster-rw' + secret_name: 'openai-secret' + secret_namespace: 'team-demo' + db_port: 5432 diff --git a/values/prometheus-operator/rules/orcs-compliance.yaml b/values/prometheus-operator/rules/orcs-compliance.yaml index 6bcb30d315..e48af0b488 100644 --- a/values/prometheus-operator/rules/orcs-compliance.yaml +++ b/values/prometheus-operator/rules/orcs-compliance.yaml @@ -126,4 +126,4 @@ groups: labels: severity: info component: orcs-compliance - team: platform \ No newline at end of file + team: platform