From dec6aed5b34b9f3b6fa10f4f144232c524b1ed03 Mon Sep 17 00:00:00 2001 From: Cas Lubbers Date: Wed, 29 Oct 2025 16:57:10 +0100 Subject: [PATCH 1/7] feat: add troubleshoot command --- src/cmd/apply.ts | 7 + src/cmd/index.ts | 3 + src/cmd/install.ts | 7 + src/cmd/troubleshoot.ts | 333 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 350 insertions(+) create mode 100644 src/cmd/troubleshoot.ts diff --git a/src/cmd/apply.ts b/src/cmd/apply.ts index 439ad000ef..ea0a14f656 100644 --- a/src/cmd/apply.ts +++ b/src/cmd/apply.ts @@ -14,6 +14,7 @@ import { runtimeUpgrade } from '../common/runtime-upgrade' import { applyAsApps } from './apply-as-apps' import { applyTeams } from './apply-teams' import { commit } from './commit' +import { troubleshoot } from './troubleshoot' import { upgrade } from './upgrade' const cmdName = getFilename(__filename) @@ -86,6 +87,12 @@ export const apply = async (): Promise => { await applyAll() } catch (e) { d.error(e) + // Generate troubleshooting report on apply failure + try { + await troubleshoot() + } catch (troubleshootError) { + d.error('Failed to generate troubleshooting report:', troubleshootError) + } d.info(`Retrying in ${retryOptions.maxTimeout} ms`) throw e } diff --git a/src/cmd/index.ts b/src/cmd/index.ts index 45d9d6e45f..eef6be81a8 100644 --- a/src/cmd/index.ts +++ b/src/cmd/index.ts @@ -24,6 +24,7 @@ import { module as statusModule } from './status' import { module as syncModule } from './sync' import { module as templateModule } from './template' import { module as testModule } from './test' +import { module as troubleshootModule } from './troubleshoot' import { module as upgradeModule } from './upgrade' import { module as validateClusterModule } from './validate-cluster' import { module as validateTemplatesModule } from './validate-templates' @@ -55,6 +56,7 @@ export { module as status } from './status' export { module as sync } from './sync' export { module as template } from './template' export { module as test } from './test' +export { module as troubleshoot } from './troubleshoot' export { module as upgrade } from './upgrade' export { module as validateTemplates } from './validate-templates' export { module as validateValues } from './validate-values' @@ -88,6 +90,7 @@ export const commands: CommandModule[] = [ syncModule, templateModule, testModule, + troubleshootModule, validateClusterModule, validateTemplatesModule, validateValuesModule, diff --git a/src/cmd/install.ts b/src/cmd/install.ts index d9ce0dafb2..df43b5fff0 100644 --- a/src/cmd/install.ts +++ b/src/cmd/install.ts @@ -25,6 +25,7 @@ import { printWelcomeMessage, retryIsOAuth2ProxyRunning, } from './commit' +import { troubleshoot } from './troubleshoot' const cmdName = getFilename(__filename) const dir = '/tmp/otomi/' @@ -145,6 +146,12 @@ const install = async (): Promise => { await installAll() } catch (e) { d.error(e) + // Generate troubleshooting report on installation failure + try { + await troubleshoot() + } catch (troubleshootError) { + d.error('Failed to generate troubleshooting report:', troubleshootError) + } d.info(`Retrying in ${retryOptions.maxTimeout} ms`) throw e } diff --git a/src/cmd/troubleshoot.ts b/src/cmd/troubleshoot.ts new file mode 100644 index 0000000000..95365ac9d7 --- /dev/null +++ b/src/cmd/troubleshoot.ts @@ -0,0 +1,333 @@ +import { ApiException } from '@kubernetes/client-node' +import { prepareEnvironment } from 'src/common/cli' +import { terminal } from 'src/common/debug' +import { k8s } from 'src/common/k8s' +import { getFilename } from 'src/common/utils' +import { BasicArguments, setParsedArgs } from 'src/common/yargs' +import { Argv } from 'yargs' + +const cmdName = getFilename(__filename) + +interface ResourceReport { + kind: string + name: string + namespace: string + value: string +} + +/** + * Get pods with issues across all namespaces + */ +async function getPodsWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listPodForAllNamespaces() + const pods: ResourceReport[] = [] + + await Promise.all( + response.items.map(async (pod) => { + const namespace = pod.metadata?.namespace || 'default' + const podName = pod.metadata?.name || 'unknown' + const issues: string[] = [] + + // Check for CrashLoopBackOff and other problematic states + if (['CrashLoopBackOff', 'Failed', 'Unknown'].includes(pod.status?.phase || '')) { + issues.push(`Pod status: ${pod.status?.phase}. ${pod.status?.message || ''}`) + } + + // Check for pending pods without node assignment + if (pod.status?.phase === 'Pending' && !pod.spec?.nodeName) { + const events = await coreApi.listNamespacedEvent({ namespace }) + const schedulingEvent = events.items.find( + (event) => event.involvedObject.name === podName && event.reason === 'FailedScheduling', + ) + if (schedulingEvent?.message) { + issues.push(schedulingEvent.message) + } else { + issues.push('Pod is pending without node assignment') + } + } + + // Check container statuses + pod.status?.containerStatuses?.forEach((containerStatus) => { + if (containerStatus.lastState?.terminated?.reason === 'OOMKilled') { + issues.push( + `Container ${containerStatus.name} terminated (${containerStatus.lastState?.terminated?.reason}).`, + ) + } + if (containerStatus.state?.terminated) { + issues.push( + `Container ${containerStatus.name} terminated (${containerStatus.state?.terminated.reason}). ${containerStatus.state?.terminated.message || ''}`, + ) + } + if (containerStatus.state?.waiting?.reason) { + issues.push( + `Container ${containerStatus.name} waiting (${containerStatus.state?.waiting?.reason}). ${containerStatus.state?.waiting?.message || ''}`, + ) + } + }) + + issues.forEach((issue) => { + pods.push({ + kind: 'Pod', + name: podName, + namespace, + value: issue, + }) + }) + }), + ) + + return pods +} + +/** + * Get deployments with replica mismatches + */ +async function getDeploymentsWithIssues(): Promise { + const appsApi = k8s.app() + const response = await appsApi.listDeploymentForAllNamespaces() + + return response.items + .filter((deployment) => deployment.status?.replicas !== deployment.status?.availableReplicas) + .map((deployment) => ({ + kind: 'Deployment', + name: deployment.metadata?.name || 'unknown', + namespace: deployment.metadata?.namespace || 'default', + value: `Desired ${deployment.status?.replicas}, Available ${deployment.status?.availableReplicas}`, + })) +} + +/** + * Get statefulsets with replica mismatches + */ +async function getStatefulSetsWithIssues(): Promise { + const appsApi = k8s.app() + const coreApi = k8s.core() + const namespaces = await coreApi.listNamespace() + const statefulSets: ResourceReport[] = [] + + await Promise.all( + namespaces.items.map(async (ns) => { + const namespace = ns.metadata?.name + if (!namespace) return + + const response = await appsApi.listNamespacedStatefulSet({ namespace }) + response.items.forEach((sts) => { + const replicas = sts.spec?.replicas || 0 + const readyReplicas = sts.status?.readyReplicas || 0 + if (readyReplicas < replicas) { + statefulSets.push({ + kind: 'StatefulSet', + name: sts.metadata?.name || 'unknown', + namespace, + value: `Desired ${replicas}, Ready ${readyReplicas}`, + }) + } + }) + }), + ) + + return statefulSets +} + +/** + * Get nodes that are not ready + */ +async function getNodesWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listNode() + + return response.items + .filter((node) => node.status?.conditions?.some((cond) => cond.type === 'Ready' && cond.status !== 'True')) + .map((node) => ({ + kind: 'Node', + name: node.metadata?.name || 'unknown', + namespace: 'N/A', + value: 'Node not Ready', + })) +} + +/** + * Get services with issues (e.g., LoadBalancer without IP) + */ +async function getServicesWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listServiceForAllNamespaces() + + return response.items + .map((service) => { + const namespace = service.metadata?.namespace || 'default' + const name = service.metadata?.name || 'unknown' + const type = service.spec?.type || 'ClusterIP' + let issue: string | null = null + + if (type === 'LoadBalancer' && !service.status?.loadBalancer?.ingress) { + issue = 'LoadBalancer IP not assigned' + } + + if (issue) { + return { + kind: 'Service', + name, + namespace, + value: issue, + } + } + return null + }) + .filter((r): r is ResourceReport => r !== null) +} + +/** + * Get PersistentVolumeClaims that are not bound + */ +async function getPVCsWithIssues(): Promise { + const coreApi = k8s.core() + const namespaces = await coreApi.listNamespace() + const pvcs: ResourceReport[] = [] + + await Promise.all( + namespaces.items.map(async (ns) => { + const namespace = ns.metadata?.name + if (!namespace) return + + const response = await coreApi.listNamespacedPersistentVolumeClaim({ namespace }) + response.items.forEach((pvc) => { + if (pvc.status?.phase !== 'Bound') { + const conditions = pvc.status?.conditions?.map((c) => `${c.type}: ${c.message}`).join('; ') || '' + pvcs.push({ + kind: 'PersistentVolumeClaim', + name: pvc.metadata?.name || 'unknown', + namespace, + value: `Phase: ${pvc.status?.phase}${conditions ? `. ${conditions}` : ''}`, + }) + } + }) + }), + ) + + return pvcs +} + +/** + * Get PersistentVolumes with issues + */ +async function getPVsWithIssues(): Promise { + const coreApi = k8s.core() + const response = await coreApi.listPersistentVolume() + + return response.items + .filter((pv) => pv.status?.phase !== 'Available' && pv.status?.phase !== 'Bound') + .map((pv) => ({ + kind: 'PersistentVolume', + name: pv.metadata?.name || 'unknown', + namespace: 'N/A', + value: `Phase: ${pv.status?.phase}`, + })) +} + +/** + * Write troubleshooting report to ConfigMap + */ +async function writeReportToConfigMap( + name: string, + namespace: string, + report: { timestamp: string; failedResources: ResourceReport[]; summary: any }, +): Promise { + const coreApi = k8s.core() + const reportJson = JSON.stringify(report, null, 2) + + try { + // Try to read existing ConfigMap + const existingConfigMap = await coreApi.readNamespacedConfigMap({ name, namespace }) + + // Update existing ConfigMap + if (!existingConfigMap.data) { + existingConfigMap.data = {} + } + existingConfigMap.data.report = reportJson + + await coreApi.replaceNamespacedConfigMap({ name, namespace, body: existingConfigMap }) + } catch (error) { + if (error instanceof ApiException && error.code === 404) { + // ConfigMap doesn't exist, create it + await coreApi.createNamespacedConfigMap({ + namespace, + body: { + metadata: { name }, + data: { report: reportJson }, + }, + }) + } else { + throw error + } + } +} + +/** + * Main troubleshoot function + */ +export async function troubleshoot(): Promise { + const d = terminal(`cmd:${cmdName}:troubleshoot`) + + try { + d.info('Starting troubleshooting scan...') + + // Gather all failed resources + const [pods, deployments, statefulSets, nodes, services, pvcs, pvs] = await Promise.all([ + getPodsWithIssues(), + getDeploymentsWithIssues(), + getStatefulSetsWithIssues(), + getNodesWithIssues(), + getServicesWithIssues(), + getPVCsWithIssues(), + getPVsWithIssues(), + ]) + + const failedResources = [...pods, ...deployments, ...statefulSets, ...nodes, ...services, ...pvcs, ...pvs] + + // Generate report + const report = { + timestamp: new Date().toISOString(), + failedResources, + summary: { + total: failedResources.length, + byType: failedResources.reduce( + (acc, r) => { + acc[r.kind] = (acc[r.kind] || 0) + 1 + return acc + }, + {} as Record, + ), + }, + } + + // Store in ConfigMap + const configMapName = 'apl-troubleshooting-report' + const targetNamespace = 'apl-operator' + + if (failedResources.length === 0) { + d.info('Your APL instance seems to be healthy.') + } else { + await writeReportToConfigMap(configMapName, targetNamespace, report) + d.info( + `Troubleshooting report stored in ConfigMap ${targetNamespace}/${configMapName} (${failedResources.length} failed resources)`, + ) + } + } catch (error) { + d.error('Troubleshooting scan failed:', error) + throw error + } +} + +export const module = { + command: cmdName, + describe: 'Generate troubleshooting report of failed resources and store in ConfigMap', + builder: (parser: Argv): Argv => parser, + + handler: async (argv: BasicArguments): Promise => { + setParsedArgs(argv) + await prepareEnvironment({ skipEnvDirCheck: true, skipDecrypt: true }) + await troubleshoot() + }, +} From b49a5292ed62f1aeff3c0865c0438e959ab5ed6b Mon Sep 17 00:00:00 2001 From: Cas Lubbers Date: Wed, 29 Oct 2025 17:01:40 +0100 Subject: [PATCH 2/7] feat: add troubleshoot command --- src/cmd/troubleshoot.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cmd/troubleshoot.ts b/src/cmd/troubleshoot.ts index 95365ac9d7..cc2be80f14 100644 --- a/src/cmd/troubleshoot.ts +++ b/src/cmd/troubleshoot.ts @@ -293,10 +293,10 @@ export async function troubleshoot(): Promise { summary: { total: failedResources.length, byType: failedResources.reduce( - (acc, r) => { - acc[r.kind] = (acc[r.kind] || 0) + 1 - return acc - }, + (acc, r) => ({ + ...acc, + [r.kind]: (acc[r.kind] || 0) + 1, + }), {} as Record, ), }, From fe812d2aaed54d0e733f437f8bd37446d4802966 Mon Sep 17 00:00:00 2001 From: Cas Lubbers Date: Thu, 30 Oct 2025 09:11:22 +0100 Subject: [PATCH 3/7] feat: add argocd application --- src/cmd/troubleshoot.ts | 75 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/src/cmd/troubleshoot.ts b/src/cmd/troubleshoot.ts index cc2be80f14..3ab03adf3b 100644 --- a/src/cmd/troubleshoot.ts +++ b/src/cmd/troubleshoot.ts @@ -226,6 +226,64 @@ async function getPVsWithIssues(): Promise { })) } +/** + * Get ArgoCD Applications with health or sync issues + */ +async function getArgoApplicationsWithIssues(): Promise { + const customApi = k8s.custom() + const applications: ResourceReport[] = [] + + try { + const response = await customApi.listClusterCustomObject({ + group: 'argoproj.io', + version: 'v1alpha1', + plural: 'applications', + }) + + const items = (response as any).items || [] + + items.forEach((app: any) => { + const name = app.metadata?.name || 'unknown' + const namespace = app.metadata?.namespace || 'unknown' + const healthStatus = app.status?.health?.status + const syncStatus = app.status?.sync?.status + const issues: string[] = [] + + if (healthStatus && healthStatus !== 'Healthy') { + const healthMessage = app.status?.health?.message || '' + issues.push(`Health: ${healthStatus}${healthMessage ? ` - ${healthMessage}` : 'Unknown'}`) + } + + if (syncStatus && syncStatus !== 'Synced') { + issues.push(`Sync: ${syncStatus}`) + } + + const operationPhase = app.status?.operationState?.phase + if (operationPhase !== 'Succeeded') { + const message = app.status?.operationState?.message || 'Unknown' + issues.push(`Operation: ${operationPhase} - ${message}`) + } + + issues.forEach((issue) => { + applications.push({ + kind: 'Application', + name, + namespace, + value: issue, + }) + }) + }) + } catch (error) { + // If ArgoCD is not installed or CRD doesn't exist, silently skip + if (error instanceof ApiException && (error.code === 404 || error.code === 403)) { + return [] + } + throw error + } + + return applications +} + /** * Write troubleshooting report to ConfigMap */ @@ -238,10 +296,8 @@ async function writeReportToConfigMap( const reportJson = JSON.stringify(report, null, 2) try { - // Try to read existing ConfigMap const existingConfigMap = await coreApi.readNamespacedConfigMap({ name, namespace }) - // Update existing ConfigMap if (!existingConfigMap.data) { existingConfigMap.data = {} } @@ -250,7 +306,6 @@ async function writeReportToConfigMap( await coreApi.replaceNamespacedConfigMap({ name, namespace, body: existingConfigMap }) } catch (error) { if (error instanceof ApiException && error.code === 404) { - // ConfigMap doesn't exist, create it await coreApi.createNamespacedConfigMap({ namespace, body: { @@ -274,7 +329,7 @@ export async function troubleshoot(): Promise { d.info('Starting troubleshooting scan...') // Gather all failed resources - const [pods, deployments, statefulSets, nodes, services, pvcs, pvs] = await Promise.all([ + const [pods, deployments, statefulSets, nodes, services, pvcs, pvs, argoApps] = await Promise.all([ getPodsWithIssues(), getDeploymentsWithIssues(), getStatefulSetsWithIssues(), @@ -282,9 +337,19 @@ export async function troubleshoot(): Promise { getServicesWithIssues(), getPVCsWithIssues(), getPVsWithIssues(), + getArgoApplicationsWithIssues(), ]) - const failedResources = [...pods, ...deployments, ...statefulSets, ...nodes, ...services, ...pvcs, ...pvs] + const failedResources = [ + ...pods, + ...deployments, + ...statefulSets, + ...nodes, + ...services, + ...pvcs, + ...pvs, + ...argoApps, + ] // Generate report const report = { From 71686153d6d01655bc88fd0a1cc10000ebfd4933 Mon Sep 17 00:00:00 2001 From: Cas Lubbers Date: Thu, 30 Oct 2025 09:31:39 +0100 Subject: [PATCH 4/7] feat: update argocd application --- src/cmd/troubleshoot.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cmd/troubleshoot.ts b/src/cmd/troubleshoot.ts index 3ab03adf3b..b46a42c58e 100644 --- a/src/cmd/troubleshoot.ts +++ b/src/cmd/troubleshoot.ts @@ -250,16 +250,16 @@ async function getArgoApplicationsWithIssues(): Promise { const issues: string[] = [] if (healthStatus && healthStatus !== 'Healthy') { - const healthMessage = app.status?.health?.message || '' - issues.push(`Health: ${healthStatus}${healthMessage ? ` - ${healthMessage}` : 'Unknown'}`) + const healthMessage = app.status?.health?.message || 'Unknown' + issues.push(`HealthStatus: ${healthStatus} message: ${healthMessage}`) } if (syncStatus && syncStatus !== 'Synced') { - issues.push(`Sync: ${syncStatus}`) + issues.push(`SyncStatus: ${syncStatus}`) } const operationPhase = app.status?.operationState?.phase - if (operationPhase !== 'Succeeded') { + if (operationPhase && operationPhase !== 'Succeeded') { const message = app.status?.operationState?.message || 'Unknown' issues.push(`Operation: ${operationPhase} - ${message}`) } From 97689cd6d21dbc345c95cf9c2eed5ac520ea38a2 Mon Sep 17 00:00:00 2001 From: Cas Lubbers Date: Thu, 30 Oct 2025 15:24:19 +0100 Subject: [PATCH 5/7] feat: add tests --- src/cmd/troubleshoot.test.ts | 257 +++++++++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 src/cmd/troubleshoot.test.ts diff --git a/src/cmd/troubleshoot.test.ts b/src/cmd/troubleshoot.test.ts new file mode 100644 index 0000000000..a3f1c595f5 --- /dev/null +++ b/src/cmd/troubleshoot.test.ts @@ -0,0 +1,257 @@ +import { ApiException } from '@kubernetes/client-node' +import * as k8sModule from 'src/common/k8s' + +// Mock dependencies +jest.mock('src/common/k8s') +jest.mock('src/common/cli', () => ({ + prepareEnvironment: jest.fn(), +})) +jest.mock('src/common/debug', () => ({ + terminal: jest.fn(() => ({ + info: jest.fn(), + error: jest.fn(), + stream: { log: process.stdout, error: process.stderr }, + })), +})) +jest.mock('src/common/utils', () => ({ + getFilename: jest.fn(() => 'troubleshoot'), +})) +jest.mock('src/common/yargs', () => ({ + setParsedArgs: jest.fn(), +})) + +import { troubleshoot } from './troubleshoot' + +class MockApiException extends ApiException { + code: number + constructor(code: number, message: string) { + super(code, message, {}, {}) + this.code = code + } +} + +describe('Troubleshoot Command', () => { + let mockCoreApi: any + let mockAppsApi: any + let mockCustomApi: any + + beforeEach(() => { + mockCoreApi = { + listPodForAllNamespaces: jest.fn(), + listNamespacedEvent: jest.fn(), + listServiceForAllNamespaces: jest.fn(), + listNamespace: jest.fn(), + listNamespacedPersistentVolumeClaim: jest.fn(), + listPersistentVolume: jest.fn(), + listNode: jest.fn(), + readNamespacedConfigMap: jest.fn(), + createNamespacedConfigMap: jest.fn(), + replaceNamespacedConfigMap: jest.fn(), + } + + mockAppsApi = { + listDeploymentForAllNamespaces: jest.fn(), + listNamespacedStatefulSet: jest.fn(), + } + + mockCustomApi = { + listClusterCustomObject: jest.fn(), + } + ;(k8sModule.k8s as any) = { + core: jest.fn(() => mockCoreApi), + app: jest.fn(() => mockAppsApi), + custom: jest.fn(() => mockCustomApi), + } + }) + + afterEach(() => { + jest.clearAllMocks() + }) + + it('should detect all types of failed resources and store in ConfigMap', async () => { + // Mock various failing resources + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'crashed-pod', namespace: 'default' }, + status: { phase: 'CrashLoopBackOff', message: 'Container crashed' }, + }, + { + metadata: { name: 'oom-pod', namespace: 'default' }, + status: { + phase: 'Running', + containerStatuses: [{ name: 'main', lastState: { terminated: { reason: 'OOMKilled' } } }], + }, + }, + ], + }) + + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-deployment', namespace: 'default' }, + status: { replicas: 3, availableReplicas: 1 }, + }, + ], + }) + + mockCoreApi.listNamespace.mockResolvedValue({ + items: [{ metadata: { name: 'default' } }], + }) + + mockAppsApi.listNamespacedStatefulSet.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-sts', namespace: 'default' }, + spec: { replicas: 3 }, + status: { readyReplicas: 0 }, + }, + ], + }) + + mockCoreApi.listNode.mockResolvedValue({ + items: [ + { + metadata: { name: 'node-1' }, + status: { conditions: [{ type: 'Ready', status: 'False' }] }, + }, + ], + }) + + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-lb', namespace: 'default' }, + spec: { type: 'LoadBalancer' }, + status: { loadBalancer: {} }, + }, + ], + }) + + mockCoreApi.listNamespacedPersistentVolumeClaim.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-pvc', namespace: 'default' }, + status: { phase: 'Pending' }, + }, + ], + }) + + mockCoreApi.listPersistentVolume.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-pv' }, + status: { phase: 'Failed' }, + }, + ], + }) + + mockCustomApi.listClusterCustomObject.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-app', namespace: 'argocd' }, + status: { + health: { status: 'Degraded', message: 'Pod not ready' }, + sync: { status: 'OutOfSync' }, + }, + }, + ], + }) + + mockCoreApi.readNamespacedConfigMap.mockRejectedValue(new MockApiException(404, 'Not Found')) + mockCoreApi.createNamespacedConfigMap.mockResolvedValue({}) + + await troubleshoot() + + expect(mockCoreApi.createNamespacedConfigMap).toHaveBeenCalledWith({ + namespace: 'apl-operator', + body: { + metadata: { name: 'apl-troubleshooting-report' }, + data: { report: expect.any(String) }, + }, + }) + + // eslint-disable-next-line prefer-destructuring, @typescript-eslint/no-unsafe-argument + const configMapCall = mockCoreApi.createNamespacedConfigMap.mock.calls[0][0] + const reportData = JSON.parse(configMapCall.body.data.report) + + // Should have all resource types + expect(reportData.failedResources.length).toBeGreaterThan(0) + expect(reportData.summary.byType).toEqual( + expect.objectContaining({ + Pod: expect.any(Number), + Deployment: 1, + StatefulSet: 1, + Node: 1, + Service: 1, + PersistentVolumeClaim: 1, + PersistentVolume: 1, + Application: 2, // Health and Sync issues + }), + ) + expect(reportData.timestamp).toBeDefined() + }) + + it('should report healthy cluster when no issues found', async () => { + // Mock all resources as healthy + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ items: [] }) + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + await troubleshoot() + + // Should not create ConfigMap for healthy cluster + expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() + expect(mockCoreApi.replaceNamespacedConfigMap).not.toHaveBeenCalled() + }) + + it('should update existing ConfigMap instead of creating new one', async () => { + const existingConfigMap = { + metadata: { name: 'apl-troubleshooting-report' }, + data: { report: '{"old": "data"}' }, + } + + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'failed-pod', namespace: 'default' }, + status: { phase: 'Failed' }, + }, + ], + }) + + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + mockCoreApi.readNamespacedConfigMap.mockResolvedValue(existingConfigMap) + mockCoreApi.replaceNamespacedConfigMap.mockResolvedValue({}) + + await troubleshoot() + + expect(mockCoreApi.replaceNamespacedConfigMap).toHaveBeenCalled() + expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() + }) + + it('should gracefully handle ArgoCD not installed', async () => { + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ items: [] }) + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockRejectedValue(new MockApiException(404, 'Not Found')) + + await troubleshoot() + + // Should not throw error + expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled() + }) +}) From 52c4c6bc85115713022c180c2be1316c4ef4ac72 Mon Sep 17 00:00:00 2001 From: Cas Lubbers Date: Thu, 30 Oct 2025 15:42:09 +0100 Subject: [PATCH 6/7] feat: add server side to kubectl apply --- src/common/hf.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/hf.ts b/src/common/hf.ts index de317cb04f..86b96130eb 100644 --- a/src/common/hf.ts +++ b/src/common/hf.ts @@ -215,7 +215,7 @@ export const deployEssential = async (labelOpts: string[] | null = null) => { } writeFileSync(templateFile, templateOutput) - await $`kubectl apply -f ${templateFile}` + await $`kubectl apply --server-side=true -f ${templateFile}` } return true From 789945ea2a51bdba450a87fa364554a6e64151bd Mon Sep 17 00:00:00 2001 From: Cas Lubbers Date: Tue, 4 Nov 2025 14:13:14 +0100 Subject: [PATCH 7/7] feat: change to collect traces --- .cspell.json | 25 +-- .devcontainer/devcontainer.json | 40 ++--- .github/dependabot.yml | 18 +-- .github/stale.yml | 2 +- .github/templates/wiz-admission-control.yaml | 12 +- .../templates/wiz-kubernetes-integration.yaml | 14 +- .github/workflows/dependabot-auto-approve.yml | 2 +- README.md | 3 + SECURITY.md | 1 - adr/index.md | 50 +++--- core.yaml | 4 +- package-lock.json | 35 +--- schemas/Readme.md | 5 +- src/cmd/apply.ts | 10 +- src/cmd/collect.ts | 13 ++ src/cmd/index.ts | 8 +- src/cmd/install.ts | 10 +- .../{troubleshoot.test.ts => traces.test.ts} | 125 ++++++++++++++- src/cmd/{troubleshoot.ts => traces.ts} | 150 ++++++++++-------- tests/fixtures/env/apps/kserve.yaml | 8 +- .../env/apps/secrets.kubeflow-pipelines.yaml | 2 +- .../env/teams/demo/agents/my-agent.yaml | 14 +- .../env/teams/demo/databases/pgvector-db.yaml | 12 +- .../teams/demo/knowledgebases/demo-kb.yaml | 28 ++-- .../rules/orcs-compliance.yaml | 2 +- 25 files changed, 343 insertions(+), 250 deletions(-) create mode 100644 src/cmd/collect.ts rename src/cmd/{troubleshoot.test.ts => traces.test.ts} (60%) rename src/cmd/{troubleshoot.ts => traces.ts} (74%) diff --git a/.cspell.json b/.cspell.json index ec6606a109..1a7f30edfd 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,27 +1,10 @@ { "version": "0.2", "allowCompoundWords": true, - "enableFiletypes": [ - "dockerfile", - "md", - "yaml" - ], - "enabledLanguageIds": [ - "json", - "jsonc", - "markdown", - "typescript", - "typescriptreact", - "yaml", - "yml" - ], - "ignorePaths": [ - "CHANGELOG.md", - "*.json" - ], - "ignoreRegExpList": [ - "/'s\\b/" - ], + "enableFiletypes": ["dockerfile", "md", "yaml"], + "enabledLanguageIds": ["json", "jsonc", "markdown", "typescript", "typescriptreact", "yaml", "yml"], + "ignorePaths": ["CHANGELOG.md", "*.json"], + "ignoreRegExpList": ["/'s\\b/"], "ignoreWords": [ "adrs", "trivy", diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c5b55ff8a8..32b7d27565 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,32 +1,32 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: // https://github.com/microsoft/vscode-dev-containers/tree/v0.191.1/containers/docker-existing-dockerfile { - "name": "Existing Dockerfile", + "name": "Existing Dockerfile", - // Sets the run context to one level up instead of the .devcontainer folder. - "context": "..", + // Sets the run context to one level up instead of the .devcontainer folder. + "context": "..", - // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. - "dockerFile": "../Dockerfile", + // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. + "dockerFile": "../Dockerfile", - // Set *default* container specific settings.json values on container create. - "settings": {}, - - // Add the IDs of extensions you want installed when the container is created. - "extensions": [] + // Set *default* container specific settings.json values on container create. + "settings": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], + // Add the IDs of extensions you want installed when the container is created. + "extensions": [] - // Uncomment the next line to run commands after the container is created - for example installing curl. - // "postCreateCommand": "apt-get update && apt-get install -y curl", + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], - // Uncomment when using a ptrace-based debugger like C++, Go, and Rust - // "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ], + // Uncomment the next line to run commands after the container is created - for example installing curl. + // "postCreateCommand": "apt-get update && apt-get install -y curl", - // Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker. - // "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], + // Uncomment when using a ptrace-based debugger like C++, Go, and Rust + // "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ], - // Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root. - // "remoteUser": "vscode" + // Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker. + // "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], + + // Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "vscode" } diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4f9b2443db..10487663ab 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -18,10 +18,10 @@ updates: groups: npm-dependencies: patterns: - - "*" + - '*' update-types: - - "minor" - - "patch" + - 'minor' + - 'patch' # Docker - package-ecosystem: 'docker' @@ -36,10 +36,10 @@ updates: groups: docker-dependencies: patterns: - - "*" + - '*' update-types: - - "minor" - - "patch" + - 'minor' + - 'patch' ignore: - dependency-name: 'node' versions: ['>=23'] @@ -57,7 +57,7 @@ updates: groups: github-actions-dependencies: patterns: - - "*" + - '*' update-types: - - "minor" - - "patch" + - 'minor' + - 'patch' diff --git a/.github/stale.yml b/.github/stale.yml index 42da5926d4..299eb23d58 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -5,7 +5,7 @@ daysUntilClose: 7 # Issues with these labels will never be considered stale exemptLabels: - pinned -# Set to true to ignore issues with an assignee (defaults to false) +# Set to true to ignore issues with an assignee (defaults to false) exemptAssignees: true # Only mark stale when these labels are found onlyLabels: diff --git a/.github/templates/wiz-admission-control.yaml b/.github/templates/wiz-admission-control.yaml index e3a9406284..596acfbeb7 100644 --- a/.github/templates/wiz-admission-control.yaml +++ b/.github/templates/wiz-admission-control.yaml @@ -1,14 +1,14 @@ wizApiToken: - clientId: "__WIZ_CLIENT_ID__" - clientToken: "__WIZ_CLIENT_TOKEN__" - clientEndpoint: "" + clientId: '__WIZ_CLIENT_ID__' + clientToken: '__WIZ_CLIENT_TOKEN__' + clientEndpoint: '' wiz-kubernetes-connector: enabled: true autoCreateConnector: - connectorName: "" + connectorName: '' webhook: - clusterExternalId: "" + clusterExternalId: '' wiz-admission-controller: enabled: true kubernetesAuditLogsWebhook: - enabled: true \ No newline at end of file + enabled: true diff --git a/.github/templates/wiz-kubernetes-integration.yaml b/.github/templates/wiz-kubernetes-integration.yaml index 38963ff5aa..b6431aec70 100644 --- a/.github/templates/wiz-kubernetes-integration.yaml +++ b/.github/templates/wiz-kubernetes-integration.yaml @@ -1,14 +1,14 @@ global: wizApiToken: - clientId: "__WIZ_CLIENT_ID__" - clientToken: "__WIZ_CLIENT_TOKEN__" - clientEndpoint: "" + clientId: '__WIZ_CLIENT_ID__' + clientToken: '__WIZ_CLIENT_TOKEN__' + clientEndpoint: '' wiz-kubernetes-connector: enabled: true autoCreateConnector: - connectorName: "" - clusterExternalId: "" + connectorName: '' + clusterExternalId: '' wiz-broker: enabled: true @@ -16,7 +16,7 @@ wiz-sensor: enabled: true imagePullSecret: create: false - name: "sensor-image-pull" + name: 'sensor-image-pull' wiz-admission-controller: enabled: true @@ -27,4 +27,4 @@ wiz-admission-controller: imageIntegrityWebhook: enabled: false policies: - - my-image-trust-policy \ No newline at end of file + - my-image-trust-policy diff --git a/.github/workflows/dependabot-auto-approve.yml b/.github/workflows/dependabot-auto-approve.yml index 854aa6d5bd..6824c1dc51 100644 --- a/.github/workflows/dependabot-auto-approve.yml +++ b/.github/workflows/dependabot-auto-approve.yml @@ -16,4 +16,4 @@ jobs: - name: Auto approve dependabot PRs uses: hmarr/auto-approve-action@v4 with: - github-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index daab25adee..b147c15139 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,15 @@ ## Getting started ### Step 1: [Install the App Platform](https://apl-docs.net/docs/get-started/installation/overview) + Install the App Platform on Linode Kubernetes Engine (LKE) or any other conformant Kubernetes cluster. ### Step 2: [Follow the post installation steps ](https://apl-docs.net/docs/get-started/installation/post-installation-steps) + Configure the App Platform for your use case. ### Step 3: [Explore the App Platform using the Labs](https://apl-docs.net/docs/get-started/labs/overview) + Explore the App Platform with a comprehensive set of hands-on labs. ## Akamai Application Platform supports diff --git a/SECURITY.md b/SECURITY.md index 20977f75ca..4831e8de02 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,4 +6,3 @@ We only want to support forward fixing of our code, so please always upgrade whe A lot of vulnerabilities are automatically closed when we upgrade the open source solutions in our code base. We only make explicit mention of vulnerabilities mentioned to us and or fixed by us to allow for tracking. Read the changelog to see what security related issues have been solved, if any. - diff --git a/adr/index.md b/adr/index.md index 1161080fe3..a56c70aaac 100644 --- a/adr/index.md +++ b/adr/index.md @@ -1,25 +1,25 @@ -# Architectural Decision Log - -This log lists the architectural decisions for apl-core. - - - -- [ADR-2020-10-01](2020-10-01-github-workflow.md) - Our GitHub workflow -- [ADR-2020-10-02](2020-10-02-docker-compose.md) - docker compose -- [ADR-2020-11-06](2020-11-06-bash-style-guide.md) - Bash coding standard -- [ADR-2020-11-06](2020-11-06-konstraint-policy-library.md) - Konstraint library to support Common Rego Language definitions -- [ADR-2021-10-18](2021-10-18-defaults-and-derived.md) - Derived values -- [ADR-2021-10-28](2021-10-28-internal-values.md) - Internal values -- [ADR-2022-03-24](2022-03-24-custom-ca.md) - urice: -- [ADR-2022-04-22](2022-04-22-values-migration.md) - Values migration -- [ADR-2022-04-23](2022-04-23-pre-upgrade.md) - A new otomi pre-upgrade command -- [ADR-2022-05-17](2022-05-17-destroy-upon-uninstall.md) - Extra flags to accomodate destroy upon uninstall -- [ADR-2022-06-07](2022-06-07-ingress-classes.md) - Ingress classes -- [ADR-2022-07-02](2022-07-02-node-affinity.md) - Node affinity -- [ADR-2022-08-26](2022-08-26-other-dns-provider.md) - Other DNS provider - - - -For new ADRs, please use [template.md](.template.md) as basis. -More information on MADR is available at . -General information about architectural decision records is available at . +# Architectural Decision Log + +This log lists the architectural decisions for apl-core. + + + +- [ADR-2020-10-01](2020-10-01-github-workflow.md) - Our GitHub workflow +- [ADR-2020-10-02](2020-10-02-docker-compose.md) - docker compose +- [ADR-2020-11-06](2020-11-06-bash-style-guide.md) - Bash coding standard +- [ADR-2020-11-06](2020-11-06-konstraint-policy-library.md) - Konstraint library to support Common Rego Language definitions +- [ADR-2021-10-18](2021-10-18-defaults-and-derived.md) - Derived values +- [ADR-2021-10-28](2021-10-28-internal-values.md) - Internal values +- [ADR-2022-03-24](2022-03-24-custom-ca.md) - urice: +- [ADR-2022-04-22](2022-04-22-values-migration.md) - Values migration +- [ADR-2022-04-23](2022-04-23-pre-upgrade.md) - A new otomi pre-upgrade command +- [ADR-2022-05-17](2022-05-17-destroy-upon-uninstall.md) - Extra flags to accomodate destroy upon uninstall +- [ADR-2022-06-07](2022-06-07-ingress-classes.md) - Ingress classes +- [ADR-2022-07-02](2022-07-02-node-affinity.md) - Node affinity +- [ADR-2022-08-26](2022-08-26-other-dns-provider.md) - Other DNS provider + + + +For new ADRs, please use [template.md](.template.md) as basis. +More information on MADR is available at . +General information about architectural decision records is available at . diff --git a/core.yaml b/core.yaml index 630ddc2091..0a8f4a07e7 100644 --- a/core.yaml +++ b/core.yaml @@ -36,13 +36,13 @@ k8s: - name: istio-system disableIstioInjection: true labels: - "apl.io/ingress-controller-scope": "true" + 'apl.io/ingress-controller-scope': 'true' - name: ingress # disabling istio sidecar as it does not preserve client ip (yet) # TODO: enable once it does disableIstioInjection: true labels: - "apl.io/ingress-controller-scope": "true" + 'apl.io/ingress-controller-scope': 'true' - name: jaeger app: jaeger - name: jaeger-operator diff --git a/package-lock.json b/package-lock.json index 2aae0c95f6..6100735954 100644 --- a/package-lock.json +++ b/package-lock.json @@ -150,7 +150,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -2612,8 +2611,7 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-css/-/dict-css-4.0.18.tgz", "integrity": "sha512-EF77RqROHL+4LhMGW5NTeKqfUd/e4OOv6EDFQ/UQQiFyWuqkEKyEz0NDILxOFxWUEVdjT2GQ2cC7t12B6pESwg==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-dart": { "version": "2.3.1", @@ -2753,16 +2751,14 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-html/-/dict-html-4.0.12.tgz", "integrity": "sha512-JFffQ1dDVEyJq6tCDWv0r/RqkdSnV43P2F/3jJ9rwLgdsOIXwQbXrz6QDlvQLVvNSnORH9KjDtenFTGDyzfCaA==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-html-symbol-entities": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@cspell/dict-html-symbol-entities/-/dict-html-symbol-entities-4.0.4.tgz", "integrity": "sha512-afea+0rGPDeOV9gdO06UW183Qg6wRhWVkgCFwiO3bDupAoyXRuvupbb5nUyqSTsLXIKL8u8uXQlJ9pkz07oVXw==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-java": { "version": "5.0.12", @@ -2960,8 +2956,7 @@ "resolved": "https://registry.npmjs.org/@cspell/dict-typescript/-/dict-typescript-3.2.3.tgz", "integrity": "sha512-zXh1wYsNljQZfWWdSPYwQhpwiuW0KPW1dSd8idjMRvSD0aSvWWHoWlrMsmZeRl4qM4QCEAjua8+cjflm41cQBg==", "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@cspell/dict-vue": { "version": "3.0.5", @@ -5563,7 +5558,6 @@ "integrity": "sha512-z+j7DixNnfpdToYsOutStDgeRzJSMnbj8T1C/oQjB6Aa+kRfNjs/Fn7W6c8bmlt6mfy3FkgeKBRnDjxQow5dow==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^5.0.0", "@octokit/graphql": "^8.1.2", @@ -7188,8 +7182,7 @@ "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/@types/json5": { "version": "0.0.29", @@ -7254,7 +7247,6 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz", "integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==", "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -7456,7 +7448,6 @@ "integrity": "sha512-BnOroVl1SgrPLywqxyqdJ4l3S2MsKVLDVxZvjI1Eoe8ev2r3kGDo+PcMihNmDE+6/KjkTubSJnmqGZZjQSBq/g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.46.2", "@typescript-eslint/types": "8.46.2", @@ -7998,7 +7989,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -8860,7 +8850,6 @@ "url": "https://github.com/sponsors/ai" } ], - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001726", "electron-to-chromium": "^1.5.173", @@ -11236,7 +11225,6 @@ "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "env-paths": "^2.2.1", "import-fresh": "^3.3.0", @@ -12581,7 +12569,6 @@ "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -12642,7 +12629,6 @@ "integrity": "sha512-82GZUjRS0p/jganf6q1rEO25VSoHH0hKPCTrgillPjdI/3bgBhAE1QzHrHTizjpRvy6pGAvKjDJtk2pF9NDq8w==", "dev": true, "license": "MIT", - "peer": true, "bin": { "eslint-config-prettier": "bin/cli.js" }, @@ -12777,7 +12763,6 @@ "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -18889,7 +18874,6 @@ "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz", "integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==", "license": "MIT", - "peer": true, "engines": { "node": ">= 10.16.0" } @@ -19955,7 +19939,6 @@ "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==", "dev": true, "license": "MIT", - "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -23065,7 +23048,6 @@ "dev": true, "inBundle": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -24179,7 +24161,6 @@ "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz", "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==", "dev": true, - "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -25164,7 +25145,6 @@ "integrity": "sha512-phCkJ6pjDi9ANdhuF5ElS10GGdAKY6R1Pvt9lT3SFhOwM4T7QZE7MLpBDbNruUx/Q3gFD92/UOFringGipRqZA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@semantic-release/commit-analyzer": "^13.0.0-beta.1", "@semantic-release/error": "^4.0.0", @@ -27156,7 +27136,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -27360,7 +27339,6 @@ "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@cspotcode/source-map-support": "^0.8.0", "@tsconfig/node10": "^1.0.7", @@ -27644,7 +27622,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -27813,7 +27790,6 @@ "dev": true, "hasInstallScript": true, "license": "MIT", - "peer": true, "dependencies": { "napi-postinstall": "^0.2.4" }, @@ -28309,7 +28285,6 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz", "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=10.0.0" }, diff --git a/schemas/Readme.md b/schemas/Readme.md index fb2a8dd16f..0ed80cf213 100644 --- a/schemas/Readme.md +++ b/schemas/Readme.md @@ -29,13 +29,13 @@ pip3 install openapi2jsonschema 3. Observe new archive created # Generate missing CRD's + When there is a new CRD added and used in different helm-charts it needs to be added to the `generated-crd-schemas.tar.gz` otherwise the tests from `NODE_ENV=test binzx/otomi validate-templates` will fail. **Prerequsits** Have a Kubernetes cluster available with the correct CRD's you want to add or want to build a new list with - **Steps for adding new CRD to the list:** 1. On line 21 in `gen-missing-crd-schemas.sh` set the correct shorthand for the CRD you want to add. The CRD is selected by a `kubectl get crd | grep $shorthand` @@ -46,10 +46,9 @@ Have a Kubernetes cluster available with the correct CRD's you want to add or wa 6. Generate a new tar.gz from the diretory with: `tar -zcvf ../generated-crd-schemas.tar.gz .` 7. Run the tests `NODE_ENV=test binzx/otomi validate-templates` they shouldn't fail anymore. - **Steps for generating new CRD list:** 1. On line 21 in `gen-missing-crd-schemas.sh` set the correct shorthand for the CRD's you want in your new list. The CRD is selected by a `kubectl get crd | grep $shorthand` 2. Execute the script `gen-missing-crd-schemas.sh` 3. This wil generate a new directory `generated-crd-schemas` in there are your CRD's. Check if they are correct. Or unpack the new `generated-crd-schemas.tar.gz` and check if the correct CRD's are in there -4. Run the tests `NODE_ENV=test binzx/otomi validate-templates` they shouldn't fail anymore. \ No newline at end of file +4. Run the tests `NODE_ENV=test binzx/otomi validate-templates` they shouldn't fail anymore. diff --git a/src/cmd/apply.ts b/src/cmd/apply.ts index ea0a14f656..d2f4b4228b 100644 --- a/src/cmd/apply.ts +++ b/src/cmd/apply.ts @@ -14,7 +14,7 @@ import { runtimeUpgrade } from '../common/runtime-upgrade' import { applyAsApps } from './apply-as-apps' import { applyTeams } from './apply-teams' import { commit } from './commit' -import { troubleshoot } from './troubleshoot' +import { collectTraces } from './traces' import { upgrade } from './upgrade' const cmdName = getFilename(__filename) @@ -87,11 +87,11 @@ export const apply = async (): Promise => { await applyAll() } catch (e) { d.error(e) - // Generate troubleshooting report on apply failure + // Collect traces on apply failure try { - await troubleshoot() - } catch (troubleshootError) { - d.error('Failed to generate troubleshooting report:', troubleshootError) + await collectTraces() + } catch (traceError) { + d.error('Failed to collect traces:', traceError) } d.info(`Retrying in ${retryOptions.maxTimeout} ms`) throw e diff --git a/src/cmd/collect.ts b/src/cmd/collect.ts new file mode 100644 index 0000000000..a336ec60d9 --- /dev/null +++ b/src/cmd/collect.ts @@ -0,0 +1,13 @@ +import { Argv, CommandModule } from 'yargs' +import { module as tracesModule } from './traces' + +export const module: CommandModule = { + command: 'collect ', + describe: 'Collect diagnostic information from the cluster', + builder: (yargs: Argv): Argv => { + return yargs.command(tracesModule as CommandModule).demandCommand(1, 'You must specify a subcommand') + }, + handler: (): void => { + // Handler is not called when subcommands are used + }, +} diff --git a/src/cmd/index.ts b/src/cmd/index.ts index eef6be81a8..c8e286d117 100644 --- a/src/cmd/index.ts +++ b/src/cmd/index.ts @@ -24,7 +24,8 @@ import { module as statusModule } from './status' import { module as syncModule } from './sync' import { module as templateModule } from './template' import { module as testModule } from './test' -import { module as troubleshootModule } from './troubleshoot' +import { module as collectModule } from './collect' +import { module as tracesModule } from './traces' import { module as upgradeModule } from './upgrade' import { module as validateClusterModule } from './validate-cluster' import { module as validateTemplatesModule } from './validate-templates' @@ -56,7 +57,8 @@ export { module as status } from './status' export { module as sync } from './sync' export { module as template } from './template' export { module as test } from './test' -export { module as troubleshoot } from './troubleshoot' +export { module as collect } from './collect' +export { module as traces } from './traces' export { module as upgrade } from './upgrade' export { module as validateTemplates } from './validate-templates' export { module as validateValues } from './validate-values' @@ -90,7 +92,7 @@ export const commands: CommandModule[] = [ syncModule, templateModule, testModule, - troubleshootModule, + collectModule, validateClusterModule, validateTemplatesModule, validateValuesModule, diff --git a/src/cmd/install.ts b/src/cmd/install.ts index df43b5fff0..eff47f3add 100644 --- a/src/cmd/install.ts +++ b/src/cmd/install.ts @@ -25,7 +25,7 @@ import { printWelcomeMessage, retryIsOAuth2ProxyRunning, } from './commit' -import { troubleshoot } from './troubleshoot' +import { collectTraces } from './traces' const cmdName = getFilename(__filename) const dir = '/tmp/otomi/' @@ -146,11 +146,11 @@ const install = async (): Promise => { await installAll() } catch (e) { d.error(e) - // Generate troubleshooting report on installation failure + // Collect traces on installation failure try { - await troubleshoot() - } catch (troubleshootError) { - d.error('Failed to generate troubleshooting report:', troubleshootError) + await collectTraces() + } catch (traceError) { + d.error('Failed to collect traces:', traceError) } d.info(`Retrying in ${retryOptions.maxTimeout} ms`) throw e diff --git a/src/cmd/troubleshoot.test.ts b/src/cmd/traces.test.ts similarity index 60% rename from src/cmd/troubleshoot.test.ts rename to src/cmd/traces.test.ts index a3f1c595f5..f98fdfc2cd 100644 --- a/src/cmd/troubleshoot.test.ts +++ b/src/cmd/traces.test.ts @@ -9,6 +9,7 @@ jest.mock('src/common/cli', () => ({ jest.mock('src/common/debug', () => ({ terminal: jest.fn(() => ({ info: jest.fn(), + warn: jest.fn(), error: jest.fn(), stream: { log: process.stdout, error: process.stderr }, })), @@ -20,7 +21,7 @@ jest.mock('src/common/yargs', () => ({ setParsedArgs: jest.fn(), })) -import { troubleshoot } from './troubleshoot' +import { collectTraces } from './traces' class MockApiException extends ApiException { code: number @@ -30,7 +31,7 @@ class MockApiException extends ApiException { } } -describe('Troubleshoot Command', () => { +describe('Collect Traces Command', () => { let mockCoreApi: any let mockAppsApi: any let mockCustomApi: any @@ -161,12 +162,12 @@ describe('Troubleshoot Command', () => { mockCoreApi.readNamespacedConfigMap.mockRejectedValue(new MockApiException(404, 'Not Found')) mockCoreApi.createNamespacedConfigMap.mockResolvedValue({}) - await troubleshoot() + await collectTraces() expect(mockCoreApi.createNamespacedConfigMap).toHaveBeenCalledWith({ namespace: 'apl-operator', body: { - metadata: { name: 'apl-troubleshooting-report' }, + metadata: { name: 'apl-traces-report' }, data: { report: expect.any(String) }, }, }) @@ -202,7 +203,7 @@ describe('Troubleshoot Command', () => { mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) - await troubleshoot() + await collectTraces() // Should not create ConfigMap for healthy cluster expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() @@ -211,7 +212,7 @@ describe('Troubleshoot Command', () => { it('should update existing ConfigMap instead of creating new one', async () => { const existingConfigMap = { - metadata: { name: 'apl-troubleshooting-report' }, + metadata: { name: 'apl-traces-report' }, data: { report: '{"old": "data"}' }, } @@ -234,7 +235,7 @@ describe('Troubleshoot Command', () => { mockCoreApi.readNamespacedConfigMap.mockResolvedValue(existingConfigMap) mockCoreApi.replaceNamespacedConfigMap.mockResolvedValue({}) - await troubleshoot() + await collectTraces() expect(mockCoreApi.replaceNamespacedConfigMap).toHaveBeenCalled() expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() @@ -249,9 +250,117 @@ describe('Troubleshoot Command', () => { mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) mockCustomApi.listClusterCustomObject.mockRejectedValue(new MockApiException(404, 'Not Found')) - await troubleshoot() + await collectTraces() // Should not throw error expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled() }) + + it('should continue collecting resources when one type fails', async () => { + // Mock pods to fail + mockCoreApi.listPodForAllNamespaces.mockRejectedValue(new Error('API error')) + + // Mock deployments to succeed with issues + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'test-deployment', namespace: 'default' }, + status: { replicas: 3, availableReplicas: 1 }, + }, + ], + }) + + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + mockCoreApi.readNamespacedConfigMap.mockRejectedValue(new MockApiException(404, 'Not Found')) + mockCoreApi.createNamespacedConfigMap.mockResolvedValue({}) + + await collectTraces() + + // Should create ConfigMap with deployment issues + expect(mockCoreApi.createNamespacedConfigMap).toHaveBeenCalled() + + // eslint-disable-next-line prefer-destructuring, @typescript-eslint/no-unsafe-argument + const configMapCall = mockCoreApi.createNamespacedConfigMap.mock.calls[0][0] + const reportData = JSON.parse(configMapCall.body.data.report) + + // Should have deployment in failed resources + expect(reportData.failedResources).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + kind: 'Deployment', + name: 'test-deployment', + }), + ]), + ) + + // Should have error entry + expect(reportData.errors).toEqual(expect.arrayContaining(['API error'])) + }) + + it('should include errors field in report when collection failures occur', async () => { + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ items: [] }) + mockAppsApi.listDeploymentForAllNamespaces.mockRejectedValue(new MockApiException(403, 'Permission denied')) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockRejectedValue(new Error('Connection timeout')) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + await collectTraces() + + // Should not throw error despite multiple failures + expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled() + }) + + it('should not include errors field when all collections succeed', async () => { + mockCoreApi.listPodForAllNamespaces.mockResolvedValue({ + items: [ + { + metadata: { name: 'failed-pod', namespace: 'default' }, + status: { phase: 'Failed' }, + }, + ], + }) + mockAppsApi.listDeploymentForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listNamespace.mockResolvedValue({ items: [] }) + mockCoreApi.listNode.mockResolvedValue({ items: [] }) + mockCoreApi.listServiceForAllNamespaces.mockResolvedValue({ items: [] }) + mockCoreApi.listPersistentVolume.mockResolvedValue({ items: [] }) + mockCustomApi.listClusterCustomObject.mockResolvedValue({ items: [] }) + + mockCoreApi.readNamespacedConfigMap.mockRejectedValue(new MockApiException(404, 'Not Found')) + mockCoreApi.createNamespacedConfigMap.mockResolvedValue({}) + + await collectTraces() + + // eslint-disable-next-line prefer-destructuring, @typescript-eslint/no-unsafe-argument + const configMapCall = mockCoreApi.createNamespacedConfigMap.mock.calls[0][0] + const reportData = JSON.parse(configMapCall.body.data.report) + + // Should not have errors field when all collections succeed + expect(reportData.errors).toBeUndefined() + }) + + it('should handle multiple simultaneous collection failures', async () => { + // Mock multiple resource types to fail + mockCoreApi.listPodForAllNamespaces.mockRejectedValue(new Error('Pods API failed')) + mockAppsApi.listDeploymentForAllNamespaces.mockRejectedValue(new Error('Deployments API failed')) + mockCoreApi.listNamespace.mockRejectedValue(new Error('Namespace API failed')) + mockCoreApi.listNode.mockRejectedValue(new Error('Node API failed')) + mockCoreApi.listServiceForAllNamespaces.mockRejectedValue(new Error('Service API failed')) + mockCoreApi.listPersistentVolume.mockRejectedValue(new Error('PV API failed')) + mockCustomApi.listClusterCustomObject.mockRejectedValue(new Error('ArgoCD API failed')) + + await collectTraces() + + // Should complete without throwing despite all failures + expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled() + // Should not create ConfigMap when no issues found and all failed + expect(mockCoreApi.createNamespacedConfigMap).not.toHaveBeenCalled() + }) }) diff --git a/src/cmd/troubleshoot.ts b/src/cmd/traces.ts similarity index 74% rename from src/cmd/troubleshoot.ts rename to src/cmd/traces.ts index b46a42c58e..898f2cd3a7 100644 --- a/src/cmd/troubleshoot.ts +++ b/src/cmd/traces.ts @@ -15,6 +15,16 @@ interface ResourceReport { value: string } +interface TraceReport { + timestamp: string + failedResources: ResourceReport[] + summary: { + total: number + byType: Record + } + errors?: string[] +} + /** * Get pods with issues across all namespaces */ @@ -25,7 +35,7 @@ async function getPodsWithIssues(): Promise { await Promise.all( response.items.map(async (pod) => { - const namespace = pod.metadata?.namespace || 'default' + const namespace = pod.metadata?.namespace || 'unknown' const podName = pod.metadata?.name || 'unknown' const issues: string[] = [] @@ -233,65 +243,53 @@ async function getArgoApplicationsWithIssues(): Promise { const customApi = k8s.custom() const applications: ResourceReport[] = [] - try { - const response = await customApi.listClusterCustomObject({ - group: 'argoproj.io', - version: 'v1alpha1', - plural: 'applications', - }) + const response = await customApi.listClusterCustomObject({ + group: 'argoproj.io', + version: 'v1alpha1', + plural: 'applications', + }) - const items = (response as any).items || [] + const items = (response as any).items || [] - items.forEach((app: any) => { - const name = app.metadata?.name || 'unknown' - const namespace = app.metadata?.namespace || 'unknown' - const healthStatus = app.status?.health?.status - const syncStatus = app.status?.sync?.status - const issues: string[] = [] + items.forEach((app: any) => { + const name = app.metadata?.name || 'unknown' + const namespace = app.metadata?.namespace || 'unknown' + const healthStatus = app.status?.health?.status + const syncStatus = app.status?.sync?.status + const issues: string[] = [] - if (healthStatus && healthStatus !== 'Healthy') { - const healthMessage = app.status?.health?.message || 'Unknown' - issues.push(`HealthStatus: ${healthStatus} message: ${healthMessage}`) - } + if (healthStatus && healthStatus !== 'Healthy') { + const healthMessage = app.status?.health?.message || 'Unknown' + issues.push(`HealthStatus: ${healthStatus} message: ${healthMessage}`) + } - if (syncStatus && syncStatus !== 'Synced') { - issues.push(`SyncStatus: ${syncStatus}`) - } + if (syncStatus && syncStatus !== 'Synced') { + issues.push(`SyncStatus: ${syncStatus}`) + } - const operationPhase = app.status?.operationState?.phase - if (operationPhase && operationPhase !== 'Succeeded') { - const message = app.status?.operationState?.message || 'Unknown' - issues.push(`Operation: ${operationPhase} - ${message}`) - } + const operationPhase = app.status?.operationState?.phase + if (operationPhase && operationPhase !== 'Succeeded') { + const message = app.status?.operationState?.message || 'Unknown' + issues.push(`Operation: ${operationPhase} - ${message}`) + } - issues.forEach((issue) => { - applications.push({ - kind: 'Application', - name, - namespace, - value: issue, - }) + issues.forEach((issue) => { + applications.push({ + kind: 'Application', + name, + namespace, + value: issue, }) }) - } catch (error) { - // If ArgoCD is not installed or CRD doesn't exist, silently skip - if (error instanceof ApiException && (error.code === 404 || error.code === 403)) { - return [] - } - throw error - } + }) return applications } /** - * Write troubleshooting report to ConfigMap + * Write trace report to ConfigMap */ -async function writeReportToConfigMap( - name: string, - namespace: string, - report: { timestamp: string; failedResources: ResourceReport[]; summary: any }, -): Promise { +async function writeReportToConfigMap(name: string, namespace: string, report: TraceReport): Promise { const coreApi = k8s.core() const reportJson = JSON.stringify(report, null, 2) @@ -320,16 +318,16 @@ async function writeReportToConfigMap( } /** - * Main troubleshoot function + * Main collect traces function */ -export async function troubleshoot(): Promise { - const d = terminal(`cmd:${cmdName}:troubleshoot`) +export async function collectTraces(): Promise { + const d = terminal(`cmd:${cmdName}:collectTraces`) try { - d.info('Starting troubleshooting scan...') + d.info('Collecting traces from cluster resources...') - // Gather all failed resources - const [pods, deployments, statefulSets, nodes, services, pvcs, pvs, argoApps] = await Promise.all([ + // Gather all failed resources using allSettled to continue on individual failures + const results = await Promise.allSettled([ getPodsWithIssues(), getDeploymentsWithIssues(), getStatefulSetsWithIssues(), @@ -340,19 +338,30 @@ export async function troubleshoot(): Promise { getArgoApplicationsWithIssues(), ]) - const failedResources = [ - ...pods, - ...deployments, - ...statefulSets, - ...nodes, - ...services, - ...pvcs, - ...pvs, - ...argoApps, - ] + // Process results and collect both resources and errors + const failedResources: ResourceReport[] = [] + const collectionErrors: string[] = [] + + results.forEach((result) => { + if (result.status === 'fulfilled') { + failedResources.push(...result.value) + } else { + const error = result.reason + const errorMessage = error instanceof Error ? error.message : String(error) + + // Log based on error type + if (error instanceof ApiException && (error.code === 404 || error.code === 403)) { + d.info(`Resource collection skipped (expected if not installed): ${errorMessage}`) + } else { + d.warn(`Failed to collect resources: ${errorMessage}`) + } + + collectionErrors.push(errorMessage) + } + }) // Generate report - const report = { + const report: TraceReport = { timestamp: new Date().toISOString(), failedResources, summary: { @@ -365,34 +374,35 @@ export async function troubleshoot(): Promise { {} as Record, ), }, + ...(collectionErrors.length > 0 && { errors: collectionErrors }), } // Store in ConfigMap - const configMapName = 'apl-troubleshooting-report' + const configMapName = 'apl-traces-report' const targetNamespace = 'apl-operator' if (failedResources.length === 0) { - d.info('Your APL instance seems to be healthy.') + d.info('No failing resources found. Your APL instance seems to be healthy.') } else { await writeReportToConfigMap(configMapName, targetNamespace, report) d.info( - `Troubleshooting report stored in ConfigMap ${targetNamespace}/${configMapName} (${failedResources.length} failed resources)`, + `Trace report stored in ConfigMap ${targetNamespace}/${configMapName} (${failedResources.length} failed resources)`, ) } } catch (error) { - d.error('Troubleshooting scan failed:', error) + d.error('Failed to collect traces:', error) throw error } } export const module = { - command: cmdName, - describe: 'Generate troubleshooting report of failed resources and store in ConfigMap', + command: 'traces', + describe: 'Collect traces of failed resources and store report in ConfigMap', builder: (parser: Argv): Argv => parser, handler: async (argv: BasicArguments): Promise => { setParsedArgs(argv) await prepareEnvironment({ skipEnvDirCheck: true, skipDecrypt: true }) - await troubleshoot() + await collectTraces() }, } diff --git a/tests/fixtures/env/apps/kserve.yaml b/tests/fixtures/env/apps/kserve.yaml index af83844b5e..73c29603cb 100644 --- a/tests/fixtures/env/apps/kserve.yaml +++ b/tests/fixtures/env/apps/kserve.yaml @@ -22,8 +22,8 @@ spec: memory: 300Mi inferenceService: limits: - cpu: "1" - memory: "2Gi" + cpu: '1' + memory: '2Gi' requests: - cpu: "1" - memory: "2Gi" \ No newline at end of file + cpu: '1' + memory: '2Gi' diff --git a/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml b/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml index 14a4331d23..953af4f5c9 100644 --- a/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml +++ b/tests/fixtures/env/apps/secrets.kubeflow-pipelines.yaml @@ -3,4 +3,4 @@ spec: rootPassword: mysqlsomesecretvalue name: kubeflow-pipelines metadata: - name: kubeflow-pipelines \ No newline at end of file + name: kubeflow-pipelines diff --git a/tests/fixtures/env/teams/demo/agents/my-agent.yaml b/tests/fixtures/env/teams/demo/agents/my-agent.yaml index 48dd0e0787..d4da7f8519 100644 --- a/tests/fixtures/env/teams/demo/agents/my-agent.yaml +++ b/tests/fixtures/env/teams/demo/agents/my-agent.yaml @@ -1,11 +1,11 @@ apiVersion: akamai.io/v1alpha1 kind: AkamaiAgent metadata: - name: my-agent - namespace: team-demo - labels: - apl.io/teamId: demo + name: my-agent + namespace: team-demo + labels: + apl.io/teamId: demo spec: - foundationModel: llama - systemPrompt: You're a helpful AI assistant - knowledgeBase: demo-kb \ No newline at end of file + foundationModel: llama + systemPrompt: You're a helpful AI assistant + knowledgeBase: demo-kb diff --git a/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml b/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml index 366f7945c4..e50e22a976 100644 --- a/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml +++ b/tests/fixtures/env/teams/demo/databases/pgvector-db.yaml @@ -1,10 +1,10 @@ apiVersion: postgresql.cnpg.io/v1 kind: Database metadata: - name: pgvector-db - namespace: team-demo + name: pgvector-db + namespace: team-demo spec: - cluster: - name: pgvector-cluster - name: knowledge_base - owner: app \ No newline at end of file + cluster: + name: pgvector-cluster + name: knowledge_base + owner: app diff --git a/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml b/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml index ba4228497e..96b295b632 100644 --- a/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml +++ b/tests/fixtures/env/teams/demo/knowledgebases/demo-kb.yaml @@ -1,18 +1,18 @@ apiVersion: akamai.io/v1alpha1 kind: AkamaiKnowledgeBase metadata: - name: demo-kb - namespace: team-demo + name: demo-kb + namespace: team-demo spec: - pipelineName: "custom-pipeline" - pipelineParameters: - url: "https://docs.example.com/api" - table_name: "custom_knowledge_base" - embedding_model: "text-embedding-3-small" - embedding_api_base: "https://api.openai.com/v1" - embed_dim: 1536 - embed_batch_size: 100 - db_host: "pgvector-cluster-rw" - secret_name: "openai-secret" - secret_namespace: "team-demo" - db_port: 5432 \ No newline at end of file + pipelineName: 'custom-pipeline' + pipelineParameters: + url: 'https://docs.example.com/api' + table_name: 'custom_knowledge_base' + embedding_model: 'text-embedding-3-small' + embedding_api_base: 'https://api.openai.com/v1' + embed_dim: 1536 + embed_batch_size: 100 + db_host: 'pgvector-cluster-rw' + secret_name: 'openai-secret' + secret_namespace: 'team-demo' + db_port: 5432 diff --git a/values/prometheus-operator/rules/orcs-compliance.yaml b/values/prometheus-operator/rules/orcs-compliance.yaml index 6bcb30d315..e48af0b488 100644 --- a/values/prometheus-operator/rules/orcs-compliance.yaml +++ b/values/prometheus-operator/rules/orcs-compliance.yaml @@ -126,4 +126,4 @@ groups: labels: severity: info component: orcs-compliance - team: platform \ No newline at end of file + team: platform