Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 86 additions & 7 deletions apps/supervisor/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,16 @@ const Env = z
KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods
// Large machine affinity settings - large-* presets prefer a dedicated pool
KUBERNETES_LARGE_MACHINE_AFFINITY_ENABLED: BoolEnv.default(false),
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_KEY: z.string().trim().min(1).default("node.cluster.x-k8s.io/machinepool"),
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_VALUE: z.string().trim().min(1).default("large-machines"),
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_KEY: z
.string()
.trim()
.min(1)
.default("node.cluster.x-k8s.io/machinepool"),
KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_VALUE: z
.string()
.trim()
.min(1)
.default("large-machines"),
KUBERNETES_LARGE_MACHINE_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(100),

// Project affinity settings - pods from the same project prefer the same node
Expand All @@ -137,11 +145,82 @@ const Env = z
.default("kubernetes.io/hostname"),

// Schedule affinity settings - runs from schedule trees prefer a dedicated pool
KUBERNETES_SCHEDULE_AFFINITY_ENABLED: BoolEnv.default(false),
KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY: z.string().trim().min(1).default("node.cluster.x-k8s.io/machinepool"),
KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE: z.string().trim().min(1).default("scheduled-runs"),
KUBERNETES_SCHEDULE_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(80),
KUBERNETES_SCHEDULE_ANTI_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(20),
KUBERNETES_SCHEDULED_RUN_AFFINITY_ENABLED: BoolEnv.default(false),
KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_KEY: z
.string()
.trim()
.min(1)
.default("node.cluster.x-k8s.io/machinepool"),
KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE: z
.string()
.trim()
.min(1)
.default("scheduled-runs"),
KUBERNETES_SCHEDULED_RUN_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(80),
KUBERNETES_SCHEDULED_RUN_ANTI_AFFINITY_WEIGHT: z.coerce
.number()
.int()
.min(1)
.max(100)
.default(20),

// Schedule toleration settings - scheduled runs tolerate taints on the dedicated pool
// Comma-separated list of tolerations in the format: key=value:effect
// For Exists operator (no value): key:effect
KUBERNETES_SCHEDULED_RUN_TOLERATIONS: z
.string()
.transform((val, ctx) => {
const tolerations = val
.split(",")
.map((entry) => entry.trim())
.filter((entry) => entry.length > 0)
.map((entry) => {
const colonIdx = entry.lastIndexOf(":");
if (colonIdx === -1) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Invalid toleration format (missing effect): "${entry}"`,
});
return z.NEVER;
}

const effect = entry.slice(colonIdx + 1);
const validEffects = ["NoSchedule", "NoExecute", "PreferNoSchedule"];
if (!validEffects.includes(effect)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Invalid toleration effect "${effect}" in "${entry}". Must be one of: ${validEffects.join(", ")}`,
});
return z.NEVER;
}

const keyValue = entry.slice(0, colonIdx);
const eqIdx = keyValue.indexOf("=");
const key = eqIdx === -1 ? keyValue : keyValue.slice(0, eqIdx);

if (!key) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Invalid toleration format (empty key): "${entry}"`,
});
return z.NEVER;
}

if (eqIdx === -1) {
return { key, operator: "Exists" as const, effect };
}

return {
key,
operator: "Equal" as const,
value: keyValue.slice(eqIdx + 1),
effect,
};
});

return tolerations;
})
.optional(),

// Placement tags settings
PLACEMENT_TAGS_ENABLED: BoolEnv.default(false),
Expand Down
23 changes: 16 additions & 7 deletions apps/supervisor/src/workloadManager/kubernetes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
spec: {
...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags),
affinity: this.#getAffinity(opts),
tolerations: this.#getScheduleTolerations(this.#isScheduledRun(opts)),
terminationGracePeriodSeconds: 60 * 60,
containers: [
{
Expand Down Expand Up @@ -485,7 +486,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
}

#getScheduleNodeAffinityRules(isScheduledRun: boolean): k8s.V1NodeAffinity | undefined {
if (!env.KUBERNETES_SCHEDULE_AFFINITY_ENABLED || !env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE) {
if (!env.KUBERNETES_SCHEDULED_RUN_AFFINITY_ENABLED || !env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE) {
return undefined;
}

Expand All @@ -494,13 +495,13 @@ export class KubernetesWorkloadManager implements WorkloadManager {
return {
preferredDuringSchedulingIgnoredDuringExecution: [
{
weight: env.KUBERNETES_SCHEDULE_AFFINITY_WEIGHT,
weight: env.KUBERNETES_SCHEDULED_RUN_AFFINITY_WEIGHT,
preference: {
matchExpressions: [
{
key: env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY,
key: env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_KEY,
operator: "In",
values: [env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE],
values: [env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE],
},
],
},
Expand All @@ -513,13 +514,13 @@ export class KubernetesWorkloadManager implements WorkloadManager {
return {
preferredDuringSchedulingIgnoredDuringExecution: [
{
weight: env.KUBERNETES_SCHEDULE_ANTI_AFFINITY_WEIGHT,
weight: env.KUBERNETES_SCHEDULED_RUN_ANTI_AFFINITY_WEIGHT,
preference: {
matchExpressions: [
{
key: env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_KEY,
key: env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_KEY,
operator: "NotIn",
values: [env.KUBERNETES_SCHEDULE_AFFINITY_POOL_LABEL_VALUE],
values: [env.KUBERNETES_SCHEDULED_RUN_AFFINITY_POOL_LABEL_VALUE],
},
],
},
Expand All @@ -528,6 +529,14 @@ export class KubernetesWorkloadManager implements WorkloadManager {
};
}

#getScheduleTolerations(isScheduledRun: boolean): k8s.V1Toleration[] | undefined {
if (!isScheduledRun || !env.KUBERNETES_SCHEDULED_RUN_TOLERATIONS?.length) {
return undefined;
}

return env.KUBERNETES_SCHEDULED_RUN_TOLERATIONS;
}

#getProjectPodAffinity(projectId: string): k8s.V1PodAffinity | undefined {
if (!env.KUBERNETES_PROJECT_AFFINITY_ENABLED) {
return undefined;
Expand Down
Loading