Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.terraform
.terraform.lock.hcl
terraform.tfstate
terraform.tfstate.backup
182 changes: 182 additions & 0 deletions cluster_autoscaler.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["events", "endpoints"]
verbs: ["create", "patch"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: [""]
resources: ["pods/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["endpoints"]
resourceNames: ["cluster-autoscaler"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch", "list", "get", "patch", "update"]
- apiGroups: [""]
resources:
- "pods"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
- "persistentvolumes"
- "namespaces"
verbs: ["watch", "list", "get"]
- apiGroups: ["extensions"]
resources: ["replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["watch", "list"]
- apiGroups: ["apps"]
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes", "csistoragecapacities", "csidrivers"]
verbs: ["watch", "list", "get"]
- apiGroups: ["batch", "extensions"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
- apiGroups: ["coordination.k8s.io"]
resourceNames: ["cluster-autoscaler"]
resources: ["leases"]
verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["create","list","watch"]
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"]
verbs: ["delete", "get", "update", "watch"]

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system

---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
app: cluster-autoscaler
spec:
replicas: 3
selector:
matchLabels:
app: cluster-autoscaler
template:
metadata:
labels:
app: cluster-autoscaler
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '8085'
spec:
serviceAccountName: cluster-autoscaler
containers:
- image: ${image}
name: cluster-autoscaler
resources:
limits:
cpu: 100m
memory: 300Mi
requests:
cpu: 100m
memory: 300Mi
command:
- ./cluster-autoscaler
- --v=4
- --stderrthreshold=info
- --cloud-provider=oci-oke
- --max-node-provision-time=25m
${node_pools_settings}
- --scale-down-delay-after-add=10m
- --scale-down-unneeded-time=10m
- --unremovable-node-recheck-timeout=5m
- --balance-similar-node-groups
- --balancing-ignore-label=displayName
- --balancing-ignore-label=hostname
- --balancing-ignore-label=internal_addr
- --balancing-ignore-label=oci.oraclecloud.com/fault-domain
imagePullPolicy: "Always"
env:
- name: OKE_USE_INSTANCE_PRINCIPAL
value: "true"
- name: OCI_SDK_APPEND_USER_AGENT
value: "oci-oke-cluster-autoscaler"
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: coredns-pdb
namespace: kube-system
spec:
maxUnavailable: 1
selector:
matchLabels:
k8s-app: kube-dns
46 changes: 0 additions & 46 deletions datasources.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,36 +43,6 @@ data "oci_identity_availability_domains" "ADs" {
compartment_id = var.tenancy_ocid
}

# data "oci_containerengine_cluster_kube_config" "KubeConfig" {
# cluster_id = oci_containerengine_cluster.oci_oke_cluster.id
# token_version = var.cluster_kube_config_token_version
# }


# locals {
# gpu = {
# sources = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.sources : s if length(regexall(".*Gen2-GPU.*", s.source_name)) > 0]
# shapes = {
# BM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("BM[.]GPU.*", s)) > 0]
# VM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("VM[.]GPU.*", s)) > 0]
# }
# }
# arm = {
# sources = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.sources : s if length(regexall(".*aarch64.*", s.source_name)) > 0]
# shapes = {
# BM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("BM[.]Standard[.]A1.*", s)) > 0]
# VM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("VM[.]Standard[.]A1.*", s)) > 0]
# }
# }
# x86 = {
# sources = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.sources : s if length(regexall(".*(aarch64|Gen2-GPU).*", s.source_name)) == 0]
# shapes = {
# BM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall(".*(GPU|A1).*", s)) == 0 && length(regexall("BM.*", s)) > 0]
# VM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall(".*(GPU|A1).*", s)) == 0 && length(regexall("VM.*", s)) > 0]
# }
# }
# }

data "oci_limits_limit_definitions" "limit_def" {
compartment_id = var.tenancy_ocid
service_name = "compute"
Expand Down Expand Up @@ -104,22 +74,6 @@ data "oci_core_shapes" "valid_shapes" {
availability_domain = data.oci_identity_availability_domains.ADs.availability_domains[count.index].name
}

# output valid_shapes {
# value = data.oci_core_shapes.valid_shapes.*.shapes
# }

# output "shape_ad_availability" {
# value = local.shape_ad_availability
# }

# output "limits" {
# value = local.availability_map
# }

# output "limits_definitions" {
# value = local.limits_definitions
# }

resource "random_string" "deploy_id" {
length = 4
special = false
Expand Down
87 changes: 69 additions & 18 deletions helm_cert_manager.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,75 @@ locals {
enable_cert_manager = var.enable_flink ? true : var.enable_cert_manager
}

resource "helm_release" "cert_manager" {
count = local.enable_cert_manager ? 1 : 0
name = "cert-manager"
repository = "https://charts.jetstack.io"
chart = "cert-manager"
version = "1.8.2"
namespace = "cert-manager"
create_namespace = true
wait = true # wait to allow the webhook be properly configured

set {
name = "installCRDs"
value = true
# resource "helm_release" "cert_manager" {
# count = local.enable_cert_manager ? 1 : 0
# name = "cert-manager"
# repository = "https://charts.jetstack.io"
# chart = "cert-manager"
# version = "1.8.2"
# namespace = "cert-manager"
# create_namespace = true
# wait = true # wait to allow the webhook be properly configured

# set {
# name = "installCRDs"
# value = true
# }

# set {
# name = "webhook.timeoutSeconds"
# value = "30"
# }
# depends_on = [oci_containerengine_cluster.oci_oke_cluster]
# }


resource "null_resource" "cert_manager" {
count = local.enable_cert_manager ? 1 : 0

provisioner "local-exec" {
command = "mkdir -p ~/.kube/ && oci ce cluster create-kubeconfig --cluster-id $CLUSTER_ID --file ~/.kube/config --region us-sanjose-1 --token-version 2.0.0 --kube-endpoint $ENDPOINT_TYPE"

environment = {
CLUSTER_ID = oci_containerengine_cluster.oci_oke_cluster.id
ENDPOINT_TYPE = var.is_endpoint_public ? "PUBLIC_ENDPOINT" : "PRIVATE_ENDPOINT"
}
}

provisioner "local-exec" {
command = "helm repo add cert-manager https://charts.jetstack.io"
}


provisioner "local-exec" {
command = "kubectl create ns cert-manager"
}

provisioner "local-exec" {
command = "helm install cert-manager cert-manager/cert-manager -n cert-manager --version 1.8.2 --set installCRDs=true --set webhook.timeoutSeconds=30 --wait"
}

provisioner "local-exec" {
when = destroy
command = "helm uninstall cert-manager -n cert-manager"
on_failure = continue
}

set {
name = "webhook.timeoutSeconds"
value = "30"
provisioner "local-exec" {
when = destroy
command = "helm repo remove cert-manager"
on_failure = continue
}
depends_on = [oci_containerengine_cluster.oci_oke_cluster]
}

provisioner "local-exec" {
when = destroy
command = "kubectl delete ns cert-manager"
on_failure = continue
}

depends_on = [
oci_containerengine_cluster.oci_oke_cluster,
oci_containerengine_node_pool.oci_oke_node_pool
]

}
Loading