tracebloc · saadqbal · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/client/Chart.yaml b/client/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
 name: client
 description: A unified Helm chart for tracebloc on AKS, EKS, bare-metal, and OpenShift
 type: application
-version: 1.0.8
-appVersion: "1.0.8"
+version: 1.1.0
+appVersion: "1.1.0"
 keywords:
   - tracebloc
   - kubernetes

@@ -34,13 +34,18 @@ spec:
           allowPrivilegeEscalation: false
           capabilities:
             drop: ["ALL"]
+        # 256Mi was too tight under load: on a busy cluster the Service Bus
+        # client + pod-event watcher push RSS over 256Mi within minutes, the
+        # pod becomes the OOM-killer's preferred target on the node, and
+        # mysqld dies as collateral. 512Mi/1Gi covers steady state with
+        # headroom; revisit if heap profiling shows the working set growing.
         resources:
           requests:
-            cpu: 100m
-            memory: 256Mi
+            cpu: {{ .Values.resources.jobsManager.requests.cpu | default "100m" | quote }}
+            memory: {{ .Values.resources.jobsManager.requests.memory | default "512Mi" | quote }}
           limits:
-            cpu: 500m
-            memory: 512Mi
+            cpu: {{ .Values.resources.jobsManager.limits.cpu | default "1000m" | quote }}
+            memory: {{ .Values.resources.jobsManager.limits.memory | default "1Gi" | quote }}
         volumeMounts:
           - name: shared-volume
             mountPath: "/data/shared"
@@ -92,11 +97,11 @@ spec:
             drop: ["ALL"]
         resources:
           requests:
-            cpu: 50m
-            memory: 128Mi
+            cpu: {{ .Values.resources.podsMonitor.requests.cpu | default "50m" | quote }}
+            memory: {{ .Values.resources.podsMonitor.requests.memory | default "256Mi" | quote }}
           limits:
-            cpu: 200m
-            memory: 256Mi
+            cpu: {{ .Values.resources.podsMonitor.limits.cpu | default "500m" | quote }}
+            memory: {{ .Values.resources.podsMonitor.limits.memory | default "512Mi" | quote }}
         volumeMounts:
         - name: logs-volume
           mountPath: "/data/logs"

@@ -1,3 +1,4 @@
+{{- if .Values.podDisruptionBudget.jobsManager.create }}
 apiVersion: policy/v1
 kind: PodDisruptionBudget
 metadata:
@@ -6,7 +7,12 @@ metadata:
   labels:
     {{- include "tracebloc.labels" . | nindent 4 }}
 spec:
-  maxUnavailable: 1
+  # Single-replica deployment: maxUnavailable: 1 is a no-op (the only pod
+  # can always go away). Use minAvailable: 1 to actually block voluntary
+  # disruptions while replicas == 1. Disable when running multi-replica
+  # jobs-manager managed externally.
+  minAvailable: 1
   selector:
     matchLabels:
       app: manager
+{{- end }}
@@ -19,6 +19,16 @@ spec:
         app: mysql-client
     spec:
       terminationGracePeriodSeconds: 60
+      {{- /*
+        Reference the PriorityClass whenever a name is set, regardless of
+        priorityClass.create. Operators on GitOps / shared-platform setups
+        manage the PriorityClass out-of-band (create: false) but still want
+        the pod to reference it — gating on `create` would silently strip
+        the OOM protection this chart's mysql tuning relies on.
+      */}}
+      {{- with .Values.priorityClass.name }}
+      priorityClassName: {{ . | quote }}
+      {{- end }}
       securityContext:
         fsGroup: 999
         fsGroupChangePolicy: "OnRootMismatch"
@@ -66,32 +76,55 @@ spec:
             - ALL
           seccompProfile:
             type: RuntimeDefault
+        # requests.memory == limits.memory pins the cgroup memory budget so
+        # mysqld is never the largest victim during node-level OOM. CPU limit
+        # is intentionally omitted: throttled CPU causes InnoDB lock-wait
+        # timeouts (mysqld can't service heartbeats) and is a far more common
+        # cause of mysql crashes than a noisy neighbour. PriorityClass +
+        # PodDisruptionBudget cover the eviction angle instead.
         resources:
           requests:
-            memory: 256Mi
-            cpu: 100m
+            memory: {{ .Values.resources.mysql.requests.memory | default "1Gi" | quote }}
+            cpu: {{ .Values.resources.mysql.requests.cpu | default "250m" | quote }}
           limits:
-            memory: 1Gi
-            cpu: 1000m
+            memory: {{ .Values.resources.mysql.limits.memory | default "1Gi" | quote }}
+            {{- /*
+              CPU limit is intentionally unset by default (see comment block
+              above). Schema permits it, so honour an explicit override here
+              instead of silently dropping it. Operators flipping this on
+              should know what they're doing.
+            */}}
+            {{- with .Values.resources.mysql.limits.cpu }}
+            cpu: {{ . | quote }}
+            {{- end }}
         ports:
         - containerPort: 3306
           name: mysql-client
+        # timeoutSeconds defaults to 1; under CPU contention `mysqladmin ping`
+        # routinely needs >1s to even spawn, which kubelet reports as a probe
+        # failure and kills the container. 5s is generous enough to survive
+        # transient CPU pressure but still catch a real hang within ~25s.
         startupProbe:
           exec:
             command: ["mysqladmin", "ping", "-h", "localhost"]
           initialDelaySeconds: 10
           periodSeconds: 5
+          timeoutSeconds: 5
           failureThreshold: 24
         livenessProbe:
           exec:
             command: ["mysqladmin", "ping", "-h", "localhost"]
           initialDelaySeconds: 0
           periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 5
         readinessProbe:
           exec:
             command: ["mysqladmin", "ping", "-h", "localhost"]
           initialDelaySeconds: 0
           periodSeconds: 5
+          timeoutSeconds: 3
+          failureThreshold: 3
         volumeMounts:
         - name: mysql-persistent-storage
           mountPath: /var/lib/mysql/

@@ -1,12 +1,21 @@
+{{- if .Values.podDisruptionBudget.mysql.create }}
 apiVersion: policy/v1
 kind: PodDisruptionBudget
 metadata:
   name: mysql-client
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "tracebloc.labels" . | nindent 4 }}
+    app: mysql-client
 spec:
+  # Single-replica deployment; minAvailable: 1 blocks *voluntary* disruptions
+  # (node drains, surge upgrades, cluster-autoscaler scale-downs) while the
+  # only mysql pod is, well, the only mysql pod. Involuntary kills (kernel
+  # OOM, node failure) still happen — PriorityClass + memory parity cover
+  # those. Set podDisruptionBudget.mysql.create=false on multi-replica
+  # setups managed externally.
   minAvailable: 1
   selector:
     matchLabels:
       app: mysql-client
+{{- end }}
@@ -0,0 +1,19 @@
+{{- if .Values.priorityClass.create }}
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: {{ .Values.priorityClass.name | quote }}
+  labels:
+    {{- include "tracebloc.labels" . | nindent 4 }}
+  annotations:
+    # PriorityClass is cluster-scoped, so we never let helm uninstall yank it —
+    # other releases may share it. Re-installs reuse the existing object.
+    helm.sh/resource-policy: keep
+value: {{ .Values.priorityClass.value | int }}
+globalDefault: false
+description: >-
+  Tracebloc data-plane workloads (mysql-client). Sits above default user
+  workloads so the scheduler preempts noisy training jobs to keep mysql
+  running, but stays below system-cluster-critical (2,000,000,000) so it
+  cannot starve cluster-essential pods.
+{{- end }}
diff --git a/client/tests/jobs_manager_test.yaml b/client/tests/jobs_manager_test.yaml
@@ -57,6 +57,19 @@ tests:
           path: spec.template.spec.containers[0].resources.requests
       - isNotEmpty:
           path: spec.template.spec.containers[0].resources.limits
+      # v1.1.0 floor: 512Mi/1Gi for api, after the prod OOM investigation.
+      - equal:
+          path: spec.template.spec.containers[0].resources.requests.memory
+          value: 512Mi
+      - equal:
+          path: spec.template.spec.containers[0].resources.limits.memory
+          value: 1Gi
+      - equal:
+          path: spec.template.spec.containers[1].resources.requests.memory
+          value: 256Mi
+      - equal:
+          path: spec.template.spec.containers[1].resources.limits.memory
+          value: 512Mi
 
   - it: should reference secret via helper
     asserts:

diff --git a/client/tests/mysql_test.yaml b/client/tests/mysql_test.yaml
@@ -35,15 +35,78 @@ tests:
           path: spec.template.spec.containers[0].livenessProbe.exec.command[0]
           value: mysqladmin
 
-  - it: should have resource limits
+  - it: should have memory parity (requests == limits) and no cpu limit on mysql
     template: templates/mysql-deployment.yaml
     asserts:
       - equal:
           path: spec.template.spec.containers[0].resources.requests.memory
-          value: 256Mi
+          value: 1Gi
       - equal:
           path: spec.template.spec.containers[0].resources.limits.memory
           value: 1Gi
+      - equal:
+          path: spec.template.spec.containers[0].resources.requests.cpu
+          value: 250m
+      # CPU limit deliberately unset — InnoDB lock-wait timeouts under throttling.
+      - notExists:
+          path: spec.template.spec.containers[0].resources.limits.cpu
+
+  - it: "liveness probe should tolerate CPU pressure (timeoutSeconds 5)"
+    template: templates/mysql-deployment.yaml
+    asserts:
+      - equal:
+          path: spec.template.spec.containers[0].livenessProbe.timeoutSeconds
+          value: 5
+      - equal:
+          path: spec.template.spec.containers[0].livenessProbe.failureThreshold
+          value: 5
+
+  - it: should reference the data-plane PriorityClass when enabled
+    template: templates/mysql-deployment.yaml
+    asserts:
+      - equal:
+          path: spec.template.spec.priorityClassName
+          value: tracebloc-data-plane
+
+  - it: should keep priorityClassName when create=false (externally managed PriorityClass)
+    template: templates/mysql-deployment.yaml
+    set:
+      priorityClass:
+        create: false
+        name: tracebloc-data-plane
+    asserts:
+      - equal:
+          path: spec.template.spec.priorityClassName
+          value: tracebloc-data-plane
+
+  - it: should drop priorityClassName only when name is empty
+    template: templates/mysql-deployment.yaml
+    set:
+      priorityClass:
+        # Schema rejects create=true with empty name (no point templating a
+        # nameless PriorityClass), so disable creation as well to model the
+        # "no PriorityClass at all" path.
+        create: false
+        name: ""
+    asserts:
+      - notExists:
+          path: spec.template.spec.priorityClassName
+
+  - it: should render an explicit mysql cpu limit when operator opts in
+    template: templates/mysql-deployment.yaml
+    set:
+      resources:
+        mysql:
+          limits:
+            cpu: "1500m"
+            memory: "2Gi"
+    asserts:
+      - equal:
+          path: spec.template.spec.containers[0].resources.limits.cpu
+          value: 1500m
+      - equal:
+          path: spec.template.spec.containers[0].resources.limits.memory
+          value: 2Gi
 
   - it: should create ConfigMap with MySQL config
     template: templates/mysql-configmap.yaml

diff --git a/client/tests/priority_class_pdb_test.yaml b/client/tests/priority_class_pdb_test.yaml
@@ -0,0 +1,98 @@
+suite: PriorityClass and PodDisruptionBudget
+templates:
+  - templates/priority-class.yaml
+  - templates/mysql-pdb.yaml
+  - templates/jobs-manager-pdb.yaml
+set:
+  clientId: "test-id"
+  clientPassword: "test"
+tests:
+  - it: should create the data-plane PriorityClass by default
+    template: templates/priority-class.yaml
+    asserts:
+      - isKind:
+          of: PriorityClass
+      - equal:
+          path: metadata.name
+          value: tracebloc-data-plane
+      - equal:
+          path: value
+          value: 1000000
+      - equal:
+          path: metadata.annotations["helm.sh/resource-policy"]
+          value: keep
+      - equal:
+          path: globalDefault
+          value: false
+
+  - it: should respect a custom PriorityClass name and value
+    template: templates/priority-class.yaml
+    set:
+      priorityClass:
+        create: true
+        name: tb-prio
+        value: 500000
+    asserts:
+      - equal:
+          path: metadata.name
+          value: tb-prio
+      - equal:
+          path: value
+          value: 500000
+
+  - it: should not render PriorityClass when create is false
+    template: templates/priority-class.yaml
+    set:
+      priorityClass:
+        create: false
+    asserts:
+      - hasDocuments:
+          count: 0
+
+  - it: schema should reject create=true with an empty name
+    template: templates/priority-class.yaml
+    set:
+      priorityClass:
+        create: true
+        name: ""
+    asserts:
+      - failedTemplate:
+          errorPattern: "name: String length must be greater than or equal to 1"
+
+  - it: should create the mysql PDB by default
+    template: templates/mysql-pdb.yaml
+    asserts:
+      - isKind:
+          of: PodDisruptionBudget
+      - equal:
+          path: metadata.name
+          value: mysql-client
+      - equal:
+          path: spec.minAvailable
+          value: 1
+      - equal:
+          path: spec.selector.matchLabels.app
+          value: mysql-client
+
+  - it: should not render mysql PDB when disabled
+    template: templates/mysql-pdb.yaml
+    set:
+      podDisruptionBudget:
+        mysql:
+          create: false
+    asserts:
+      - hasDocuments:
+          count: 0
+
+  - it: jobs-manager PDB should use minAvailable (not maxUnavailable) to actually protect a 1-replica deployment
+    templates:
+      - templates/jobs-manager-pdb.yaml
+    template: templates/jobs-manager-pdb.yaml
+    asserts:
+      - isKind:
+          of: PodDisruptionBudget
+      - equal:
+          path: spec.minAvailable
+          value: 1
+      - notExists:
+          path: spec.maxUnavailable