Skip to content

Commit

Permalink
helm charts (#55)
Browse files Browse the repository at this point in the history
* adding helm chart

Signed-off-by: ssundaresha2 <sandeep_sundaresha@intuit.com>

* Templatize image tag

image tag is templatized.

Signed-off-by: ssundaresha2 <sandeep_sundaresha@intuit.com>

* Updating cluster role for pod-reaper

Signed-off-by: ssundaresha2 <sandeep_sundaresha@intuit.com>

* Adding missing conditions for nodereaper and pdbreaper

Signed-off-by: ssundaresha2 <sandeep_sundaresha@intuit.com>

Co-authored-by: ssundaresha2 <sandeep_sundaresha@intuit.com>
  • Loading branch information
sandeeps83 and ssundaresha2 committed Sep 3, 2021
1 parent fc4eb61 commit 7a30831
Show file tree
Hide file tree
Showing 11 changed files with 354 additions and 1 deletion.
23 changes: 23 additions & 0 deletions examples/helm-chart/governor/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
6 changes: 6 additions & 0 deletions examples/helm-chart/governor/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: governor
description: A Helm chart for governor. https://github.com/keikoproj/governor
type: application
version: 0.1.0
appVersion: "0.3.0-aplha"
9 changes: 9 additions & 0 deletions examples/helm-chart/governor/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{{- define "image.version" }}
{{- if .Values.reaper.imageVersion }}
{{ .Values.reaper.imageVersion }}
{{- else if (eq .Chart.AppVersion "0.3.0-aplha") }}
0.3.0
{{- else }}
latest
{{- end }}
{{- end }}
53 changes: 53 additions & 0 deletions examples/helm-chart/governor/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{{ if .Values.reaper.podreaper }}
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pod-reaper
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "delete", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["list"]
---
{{ end }}
{{ if .Values.reaper.pdbreaper }}
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pdb-reaper
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["list"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["list", "delete"]
---
{{ end }}
{{ if .Values.reaper.nodereaper }}
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: node-reaper
rules:
- apiGroups: [""]
resources: ["nodes", "pods"]
verbs: ["get", "list", "patch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["get", "list", "create"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["batch"]
resources: ["cronjobs"]
verbs: ["get", "patch"]
- apiGroups: ["extensions", "apps"]
resources: ["daemonsets"]
verbs: ["get"]
{{ end }}
44 changes: 44 additions & 0 deletions examples/helm-chart/governor/templates/clusterrolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{{ if .Values.reaper.podreaper }}
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pod-reaper
subjects:
- kind: ServiceAccount
name: pod-reaper
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: pod-reaper
apiGroup: rbac.authorization.k8s.io
---
{{ end }}
{{ if .Values.reaper.pdbreaper }}
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pdb-reaper
subjects:
- kind: ServiceAccount
name: pdb-reaper
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: pdb-reaper
apiGroup: rbac.authorization.k8s.io
---
{{ end }}
{{ if .Values.reaper.nodereaper }}
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: node-reaper
subjects:
- kind: ServiceAccount
name: node-reaper
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: node-reaper
apiGroup: rbac.authorization.k8s.io
{{ end }}
62 changes: 62 additions & 0 deletions examples/helm-chart/governor/templates/node-reaper.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{{- if .Values.reaper.nodereaper }}
apiVersion: batch/v1
kind: CronJob
metadata:
name: node-reaper
namespace: {{ .Release.Namespace }}
spec:
schedule: "{{ .Values.reaper.nodereaper.schedule }}"
failedJobsHistoryLimit: {{ .Values.reaper.nodereaper.failedJobsHistoryLimit }}
successfulJobsHistoryLimit: {{ .Values.reaper.nodereaper.successfulJobsHistoryLimit }}
# Disallow concurrent executions
concurrencyPolicy: {{ .Values.reaper.nodereaper.concurrencyPolicy }}
startingDeadlineSeconds: {{ .Values.reaper.nodereaper.startingDeadlineSeconds }}
jobTemplate:
spec:
# Terminate long running pods
activeDeadlineSeconds: {{ .Values.reaper.nodereaper.activeDeadlineSeconds }}
backoffLimit: 0
template:
spec:
serviceAccountName: node-reaper
restartPolicy: Never
containers:
- name: node-reaper
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
# Expose self nodeName as NODE_NAME environment variable - required for reapOld
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
image: keikoproj/governor:{{- include "image.version" . | trim}}
args:
- governor
- reap
- node
- --dry-run={{ .Values.reaper.nodereaper.dryrun }}
- --region={{ .Values.reaper.nodereaper.region }}
- --reap-after={{ .Values.reaper.nodereaper.reapafter }}
- --soft-reap={{ .Values.reaper.nodereaper.softreap }}
- --reap-unknown={{ .Values.reaper.nodereaper.reapunknown }}
- --reap-unready={{ .Values.reaper.nodereaper.reapunready }}
- --reap-throttle={{ .Values.reaper.nodereaper.reapthreshold }}
- --reap-ghost={{ .Values.reaper.nodereaper.reapghost }}
- --reap-unjoined={{ .Values.reaper.nodereaper.reapunjoined }}
- --reap-unjoined-threshold-minutes={{ .Values.reaper.nodereaper.reapunjoinedthreshold }}
- --reap-unjoined-tag-key={{ .Values.reaper.nodereaper.reapunjoinedkey }}
- --reap-unjoined-tag-value={{ .Values.reaper.nodereaper.reapunjoinedtagvalue }}
{{- if .Values.reaper.nodereaper.reapold }}
- --reap-old
- --reap-old-threshold-minutes={{ .Values.reaper.nodereaper.reapoldthreshold }}
- --reap-old-throttle={{ .Values.reaper.nodereaper.reapoldthrottle }}
{{- end }}
- --max-kill-nodes={{ .Values.reaper.nodereaper.maxkillnodes }}
{{- end }}
33 changes: 33 additions & 0 deletions examples/helm-chart/governor/templates/pdb-reaper.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{- if .Values.reaper.pdbreaper }}
apiVersion: batch/v1
kind: CronJob
metadata:
name: pdb-reaper
namespace: {{ .Release.Namespace }}
spec:
# Scan every 10 minutes
schedule: "{{ .Values.reaper.pdbreaper.schedule }}"
failedJobsHistoryLimit: {{ .Values.reaper.pdbreaper.failedJobsHistoryLimit }}
successfulJobsHistoryLimit: {{ .Values.reaper.pdbreaper.successfulJobsHistoryLimit }}
# Disallow concurrent executions
concurrencyPolicy: {{ .Values.reaper.pdbreaper.concurrencyPolicy }}
startingDeadlineSeconds: {{ .Values.reaper.pdbreaper.startingDeadlineSeconds }}
jobTemplate:
spec:
# Terminate long running pods
activeDeadlineSeconds: {{ .Values.reaper.pdbreaper.activeDeadlineSeconds }}
backoffLimit: 0
template:
spec:
serviceAccountName: pdb-reaper
restartPolicy: Never
containers:
- name: pdb-reaper
image: keikoproj/governor:{{- include "image.version" . | trim}}
args:
- governor
- reap
- pdb
# Reap PDBs blocking when there are pods in crashloop backoff with N > 3 restarts
- --reap-crashloop={{ .Values.reaper.pdbreaper.reapcrashloop }}
{{- end }}
33 changes: 33 additions & 0 deletions examples/helm-chart/governor/templates/pod-reaper.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{ if .Values.reaper.podreaper }}
apiVersion: batch/v1
kind: CronJob
metadata:
name: pod-reaper
namespace: {{ .Release.Namespace }}
spec:
# Scan every 10 minutes
schedule: "{{ .Values.reaper.podreaper.schedule }}"
failedJobsHistoryLimit: {{ .Values.reaper.podreaper.failedJobsHistoryLimit }}
successfulJobsHistoryLimit: {{ .Values.reaper.podreaper.successfulJobsHistoryLimit }}
# Disallow concurrent executions
concurrencyPolicy: Forbid
startingDeadlineSeconds: {{ .Values.reaper.podreaper.startingDeadlineSeconds }}
jobTemplate:
spec:
# Terminate long running pods
activeDeadlineSeconds: {{ .Values.reaper.podreaper.activeDeadlineSeconds }}
backoffLimit: 0
template:
spec:
serviceAccountName: pod-reaper
restartPolicy: Never
containers:
- name: pod-reaper
image: keikoproj/governor:{{- include "image.version" . | trim}}
args:
- governor
- reap
- pod
- --reap-after={{ .Values.reaper.podreaper.reapafter }}
- --soft-reap={{ .Values.reaper.podreaper.softreap }}
{{ end }}
23 changes: 23 additions & 0 deletions examples/helm-chart/governor/templates/serviceAccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{ if .Values.reaper.podreaper }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: pod-reaper
namespace: {{ .Release.Namespace }}
---
{{ end }}
{{ if .Values.reaper.pdbreaper }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: pdb-reaper
namespace: {{ .Release.Namespace }}
---
{{ end }}
{{ if .Values.reaper.nodereaper }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-reaper
namespace: {{ .Release.Namespace }}
{{ end }}
64 changes: 64 additions & 0 deletions examples/helm-chart/governor/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Default values for governor.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
reaper:
# The image version is derived from the app version. However, if you wish to have a different one, it can be overridden here
imageVersion:
podreaper:
schedule: "*/10 * * * *"
failedJobsHistoryLimit: 3
successfulJobsHistoryLimit: 3
startingDeadlineSeconds: 900
activeDeadlineSeconds: 1800
#Duration (in minutes) in Termination after which a pod is considered reapable
reapafter: 10
#Only reap pods without containers running
softreap: true
pdbreaper:
schedule: "*/10 * * * *"
failedJobsHistoryLimit: 3
successfulJobsHistoryLimit: 3
concurrencyPolicy: Forbid
startingDeadlineSeconds: 900
# terminate long running pods
activeDeadlineSeconds: 1800
# Reap PDBs blocking when there are pods in crashloop backoff with N > 3 restarts
reapcrashloop: true
nodereaper:
schedule: "*/10 * * * *"
failedJobsHistoryLimit: 3
successfulJobsHistoryLimit: 3
concurrencyPolicy: Forbid
startingDeadlineSeconds: 900
activeDeadlineSeconds: 1800
dryrun: true
# Only operate in us-west-2
region: west-2
# Consider nodes reapable after 10 mintues in a bad state
reapafter: 10
# Only reap nodes without active pods
softreap: true
# Unknown nodes are reapable
reapunknown: true
# NotReady nodes are reapable
reapunready: true
# Wait 180 seconds after each unhealthy node termination
reapthreshold: 180
# Nodes with an invalid/incorrect instance-id are reapable
reapghost: true
# Nodes which fail to join the cluster are reapable
reapunjoined: true
# Nodes should join the cluster within 15 mintues
reapunjoinedthreshold: 15
# Tag key and value used to identify an unjoined node, this is an EC2 tag that should
# exist on the instances as they are launched (part of ASG), be careful not to get this
# wrong as it can cause termination of other instances if they are not detected as nodes
# in the cluster
reapunjoinedkey: KubernetesCluster
reapunjoinedtagvalue: my-cluster-name
# Old nodes are reapable
reapold: true
reapoldthreshold: 43200
reapoldthrottle: 3600
# Kill a maximum of 1 nodes per run considering throttle wait times
maxkillnodes: 1
5 changes: 4 additions & 1 deletion examples/pod-reaper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ metadata:
name: pod-reaper
rules:
- apiGroups: [""]
resources: ["pods", ]
resources: ["pods"]
verbs: ["get", "delete", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["list"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
Expand Down

0 comments on commit 7a30831

Please sign in to comment.