diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5def054 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.terraform +.terraform.lock.hcl +terraform.tfstate +terraform.tfstate.backup diff --git a/cluster_autoscaler.yaml b/cluster_autoscaler.yaml new file mode 100644 index 0000000..5737ec0 --- /dev/null +++ b/cluster_autoscaler.yaml @@ -0,0 +1,182 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "patch", "update"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + - "namespaces" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes", "csistoragecapacities", "csidrivers"] + verbs: ["watch", "list", "get"] + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "patch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resourceNames: ["cluster-autoscaler"] + resources: ["leases"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 3 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '8085' + spec: + serviceAccountName: cluster-autoscaler + containers: + - image: ${image} + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 100m + memory: 300Mi + command: + - ./cluster-autoscaler + - --v=4 + - --stderrthreshold=info + - --cloud-provider=oci-oke + - --max-node-provision-time=25m + ${node_pools_settings} + - --scale-down-delay-after-add=10m + - --scale-down-unneeded-time=10m + - --unremovable-node-recheck-timeout=5m + - --balance-similar-node-groups + - --balancing-ignore-label=displayName + - --balancing-ignore-label=hostname + - --balancing-ignore-label=internal_addr + - --balancing-ignore-label=oci.oraclecloud.com/fault-domain + imagePullPolicy: "Always" + env: + - name: OKE_USE_INSTANCE_PRINCIPAL + value: "true" + - name: OCI_SDK_APPEND_USER_AGENT + value: "oci-oke-cluster-autoscaler" +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: coredns-pdb + namespace: kube-system +spec: + maxUnavailable: 1 + selector: + matchLabels: + k8s-app: kube-dns \ No newline at end of file diff --git a/datasources.tf b/datasources.tf index 930a4f4..69e26b8 100644 --- a/datasources.tf +++ b/datasources.tf @@ -43,36 +43,6 @@ data "oci_identity_availability_domains" "ADs" { compartment_id = var.tenancy_ocid } -# data "oci_containerengine_cluster_kube_config" "KubeConfig" { -# cluster_id = oci_containerengine_cluster.oci_oke_cluster.id -# token_version = var.cluster_kube_config_token_version -# } - - -# locals { -# gpu = { -# sources = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.sources : s if length(regexall(".*Gen2-GPU.*", s.source_name)) > 0] -# shapes = { -# BM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("BM[.]GPU.*", s)) > 0] -# VM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("VM[.]GPU.*", s)) > 0] -# } -# } -# arm = { -# sources = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.sources : s if length(regexall(".*aarch64.*", s.source_name)) > 0] -# shapes = { -# BM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("BM[.]Standard[.]A1.*", s)) > 0] -# VM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall("VM[.]Standard[.]A1.*", s)) > 0] -# } -# } -# x86 = { -# sources = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.sources : s if length(regexall(".*(aarch64|Gen2-GPU).*", s.source_name)) == 0] -# shapes = { -# BM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall(".*(GPU|A1).*", s)) == 0 && length(regexall("BM.*", s)) > 0] -# VM = [for s in data.oci_containerengine_node_pool_option.oci_oke_node_pool_option.shapes : s if length(regexall(".*(GPU|A1).*", s)) == 0 && length(regexall("VM.*", s)) > 0] -# } -# } -# } - data "oci_limits_limit_definitions" "limit_def" { compartment_id = var.tenancy_ocid service_name = "compute" @@ -104,22 +74,6 @@ data "oci_core_shapes" "valid_shapes" { availability_domain = data.oci_identity_availability_domains.ADs.availability_domains[count.index].name } -# output valid_shapes { -# value = data.oci_core_shapes.valid_shapes.*.shapes -# } - -# output "shape_ad_availability" { -# value = local.shape_ad_availability -# } - -# output "limits" { -# value = local.availability_map -# } - -# output "limits_definitions" { -# value = local.limits_definitions -# } - resource "random_string" "deploy_id" { length = 4 special = false diff --git a/helm_cert_manager.tf b/helm_cert_manager.tf index 6a3632e..aab83e4 100644 --- a/helm_cert_manager.tf +++ b/helm_cert_manager.tf @@ -8,24 +8,75 @@ locals { enable_cert_manager = var.enable_flink ? true : var.enable_cert_manager } -resource "helm_release" "cert_manager" { - count = local.enable_cert_manager ? 1 : 0 - name = "cert-manager" - repository = "https://charts.jetstack.io" - chart = "cert-manager" - version = "1.8.2" - namespace = "cert-manager" - create_namespace = true - wait = true # wait to allow the webhook be properly configured - - set { - name = "installCRDs" - value = true +# resource "helm_release" "cert_manager" { +# count = local.enable_cert_manager ? 1 : 0 +# name = "cert-manager" +# repository = "https://charts.jetstack.io" +# chart = "cert-manager" +# version = "1.8.2" +# namespace = "cert-manager" +# create_namespace = true +# wait = true # wait to allow the webhook be properly configured + +# set { +# name = "installCRDs" +# value = true +# } + +# set { +# name = "webhook.timeoutSeconds" +# value = "30" +# } +# depends_on = [oci_containerengine_cluster.oci_oke_cluster] +# } + + +resource "null_resource" "cert_manager" { + count = local.enable_cert_manager ? 1 : 0 + + provisioner "local-exec" { + command = "mkdir -p ~/.kube/ && oci ce cluster create-kubeconfig --cluster-id $CLUSTER_ID --file ~/.kube/config --region us-sanjose-1 --token-version 2.0.0 --kube-endpoint $ENDPOINT_TYPE" + + environment = { + CLUSTER_ID = oci_containerengine_cluster.oci_oke_cluster.id + ENDPOINT_TYPE = var.is_endpoint_public ? "PUBLIC_ENDPOINT" : "PRIVATE_ENDPOINT" + } + } + + provisioner "local-exec" { + command = "helm repo add cert-manager https://charts.jetstack.io" + } + + + provisioner "local-exec" { + command = "kubectl create ns cert-manager" + } + + provisioner "local-exec" { + command = "helm install cert-manager cert-manager/cert-manager -n cert-manager --version 1.8.2 --set installCRDs=true --set webhook.timeoutSeconds=30 --wait" + } + + provisioner "local-exec" { + when = destroy + command = "helm uninstall cert-manager -n cert-manager" + on_failure = continue } - set { - name = "webhook.timeoutSeconds" - value = "30" + provisioner "local-exec" { + when = destroy + command = "helm repo remove cert-manager" + on_failure = continue } - depends_on = [oci_containerengine_cluster.oci_oke_cluster] -} \ No newline at end of file + + provisioner "local-exec" { + when = destroy + command = "kubectl delete ns cert-manager" + on_failure = continue + } + + depends_on = [ + oci_containerengine_cluster.oci_oke_cluster, + oci_containerengine_node_pool.oci_oke_node_pool + ] + +} diff --git a/helm_flink.tf b/helm_flink.tf index d69eb64..b3e36f1 100644 --- a/helm_flink.tf +++ b/helm_flink.tf @@ -1,17 +1,68 @@ ## Copyright © 2022, Oracle and/or its affiliates. ## All rights reserved. The Universal Permissive License (UPL), Version 1.0 as shown at http://oss.oracle.com/licenses/upl -resource "helm_release" "flink_operator" { - count = var.enable_flink ? 1 : 0 - name = "flink-operator" - repository = "https://downloads.apache.org/flink/flink-kubernetes-operator-1.3.1/" - chart = "flink-kubernetes-operator" - namespace = "flink" - create_namespace = true - wait = true +# resource "helm_release" "flink_operator" { +# count = var.enable_flink ? 1 : 0 +# name = "flink-operator" +# repository = "https://downloads.apache.org/flink/flink-kubernetes-operator-1.3.1/" +# chart = "flink-kubernetes-operator" +# namespace = "flink" +# create_namespace = true +# wait = true + +# depends_on = [ +# oci_containerengine_cluster.oci_oke_cluster, +# helm_release.cert_manager +# ] +# } + + +resource "null_resource" "flink_operator" { + count = var.enable_flink ? 1 : 0 + + provisioner "local-exec" { + command = "mkdir -p ~/.kube/ && oci ce cluster create-kubeconfig --cluster-id $CLUSTER_ID --file ~/.kube/config --region us-sanjose-1 --token-version 2.0.0 --kube-endpoint $ENDPOINT_TYPE" + + environment = { + CLUSTER_ID = oci_containerengine_cluster.oci_oke_cluster.id + ENDPOINT_TYPE = var.is_endpoint_public ? "PUBLIC_ENDPOINT" : "PRIVATE_ENDPOINT" + } + } + + provisioner "local-exec" { + command = "helm repo add flink-kubernetes-operator https://downloads.apache.org/flink/flink-kubernetes-operator-1.3.1/" + } + + provisioner "local-exec" { + command = "kubectl create ns flink" + } + + provisioner "local-exec" { + command = "helm install flink-operator flink-kubernetes-operator/flink-kubernetes-operator -n flink --wait" + } + + provisioner "local-exec" { + when = destroy + command = "helm uninstall flink-operator -n flink" + on_failure = continue + } + + provisioner "local-exec" { + when = destroy + command = "helm repo remove flink-kubernetes-operator" + on_failure = continue + } + + provisioner "local-exec" { + when = destroy + command = "kubectl delete ns flink" + on_failure = continue + } depends_on = [ oci_containerengine_cluster.oci_oke_cluster, - helm_release.cert_manager + oci_containerengine_node_pool.oci_oke_node_pool, + null_resource.cert_manager ] + } diff --git a/helm_metrics.tf b/helm_metrics.tf index 4093919..f2bbf04 100644 --- a/helm_metrics.tf +++ b/helm_metrics.tf @@ -5,18 +5,48 @@ locals { enable_metrics_server = var.np1_enable_autoscaler || var.np2_enable_autoscaler || var.np3_enable_autoscaler ? true : var.enable_metrics_server } -resource "helm_release" "metrics_server" { - count = local.enable_metrics_server ? 1 : 0 - name = "metrics-server" - repository = "https://kubernetes-sigs.github.io/metrics-server/" - chart = "metrics-server" - namespace = "kube-system" - version = "3.8.2" - wait = false - - set { - name = "replicas" - value = "3" +# resource "helm_release" "metrics_server" { +# count = local.enable_metrics_server ? 1 : 0 +# name = "metrics-server" +# repository = "https://kubernetes-sigs.github.io/metrics-server/" +# chart = "metrics-server" +# namespace = "kube-system" +# version = "3.8.2" +# wait = false + +# set { +# name = "replicas" +# value = "3" +# } +# depends_on = [oci_containerengine_cluster.oci_oke_cluster] +# } + +resource "null_resource" "metrics_server" { + count = local.enable_metrics_server ? 1 : 0 + + provisioner "local-exec" { + command = "mkdir -p ~/.kube/ && oci ce cluster create-kubeconfig --cluster-id $CLUSTER_ID --file ~/.kube/config --region us-sanjose-1 --token-version 2.0.0 --kube-endpoint $ENDPOINT_TYPE" + + environment = { + CLUSTER_ID = oci_containerengine_cluster.oci_oke_cluster.id + ENDPOINT_TYPE = var.is_endpoint_public ? "PUBLIC_ENDPOINT" : "PRIVATE_ENDPOINT" + } + } + + provisioner "local-exec" { + command = "helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/" } - depends_on = [oci_containerengine_cluster.oci_oke_cluster] + + provisioner "local-exec" { + command = "helm upgrade --install metrics-server metrics-server/metrics-server -n kube-system --set replicas=1" + } + + provisioner "local-exec" { + when = destroy + command = "helm uninstall metrics-server -n kube-system" + on_failure = continue + } + + depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] } + diff --git a/k8s_autoscaler.tf b/k8s_autoscaler.tf index ba8de9c..e7a41ef 100644 --- a/k8s_autoscaler.tf +++ b/k8s_autoscaler.tf @@ -17,285 +17,374 @@ locals { k8s_minor_version = regex("^\\d+", replace(local.kubernetes_version, "v1.", "")) } -resource "kubernetes_service_account" "cluster_autoscaler_sa" { - count = local.cluster_autoscaler_enabled ? 1 : 0 +# resource "kubernetes_service_account" "cluster_autoscaler_sa" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 - metadata { - name = "cluster-autoscaler" - namespace = "kube-system" - labels = { - k8s-addon = "cluster-autoscaler.addons.k8s.io" - k8s-app = "cluster-autoscaler" - } - } - automount_service_account_token = true # workaround to support ORM deprecated terraform providers +# metadata { +# name = "cluster-autoscaler" +# namespace = "kube-system" +# labels = { +# k8s-addon = "cluster-autoscaler.addons.k8s.io" +# k8s-app = "cluster-autoscaler" +# } +# } +# automount_service_account_token = false - depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] -} +# depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] +# } -resource "kubernetes_cluster_role" "cluster_autoscaler_cr" { - count = local.cluster_autoscaler_enabled ? 1 : 0 +# resource "kubernetes_secret" "cluster_autoscaler_sa_token" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 - metadata { - name = "cluster-autoscaler" - labels = { - k8s-addon = "cluster-autoscaler.addons.k8s.io" - k8s-app = "cluster-autoscaler" - } - } +# metadata { +# name = "cluster-autoscaler-token" +# namespace = "kube-system" +# annotations = { +# "kubernetes.io/service-account.name" = "cluster-autoscaler" +# } +# } +# type = "kubernetes.io/service-account-token" - rule { - api_groups = [""] - resources = ["events", "endpoints"] - verbs = ["create", "patch"] - } - rule { - api_groups = [""] - resources = ["pods/eviction"] - verbs = ["create"] - } - rule { - api_groups = [""] - resources = ["pods/status"] - verbs = ["update"] - } - rule { - api_groups = [""] - resource_names = ["cluster-autoscaler"] - resources = ["endpoints"] - verbs = ["get", "update"] - } - rule { - api_groups = [""] - resources = ["nodes"] - verbs = ["watch", "list", "get", "patch", "update"] - } - rule { - api_groups = [""] - resources = ["pods", "services", "replicationcontrollers", "persistentvolumeclaims", "persistentvolumes", "namespaces"] - verbs = ["watch", "list", "get"] - } - rule { - api_groups = ["extensions"] - resources = ["replicasets", "daemonsets"] - verbs = ["watch", "list", "get"] - } - rule { - api_groups = ["policy"] - resources = ["poddisruptionbudgets"] - verbs = ["watch", "list"] - } - rule { - api_groups = ["apps"] - resources = ["statefulsets", "replicasets", "daemonsets"] - verbs = ["watch", "list", "get"] - } - rule { - api_groups = ["storage.k8s.io"] - resources = ["storageclasses", "csinodes", "csistoragecapacities", "csidrivers"] - verbs = ["watch", "list", "get"] - } - rule { - api_groups = ["batch", "extensions"] - resources = ["jobs"] - verbs = ["get", "list", "watch", "patch"] - } - rule { - api_groups = ["coordination.k8s.io"] - resources = ["leases"] - verbs = ["create"] - } - rule { - api_groups = ["coordination.k8s.io"] - resource_names = ["cluster-autoscaler"] - resources = ["leases"] - verbs = ["get", "update"] - } +# depends_on = [kubernetes_service_account.cluster_autoscaler_sa, oci_containerengine_node_pool.oci_oke_node_pool] +# } - depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] -} +# resource "kubernetes_cluster_role" "cluster_autoscaler_cr" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 -resource "kubernetes_role" "cluster_autoscaler_role" { - count = local.cluster_autoscaler_enabled ? 1 : 0 +# metadata { +# name = "cluster-autoscaler" +# labels = { +# k8s-addon = "cluster-autoscaler.addons.k8s.io" +# k8s-app = "cluster-autoscaler" +# } +# } - metadata { - name = "cluster-autoscaler" - namespace = "kube-system" - labels = { - k8s-addon = "cluster-autoscaler.addons.k8s.io" - k8s-app = "cluster-autoscaler" - } - } +# rule { +# api_groups = [""] +# resources = ["events", "endpoints"] +# verbs = ["create", "patch"] +# } +# rule { +# api_groups = [""] +# resources = ["pods/eviction"] +# verbs = ["create"] +# } +# rule { +# api_groups = [""] +# resources = ["pods/status"] +# verbs = ["update"] +# } +# rule { +# api_groups = [""] +# resource_names = ["cluster-autoscaler"] +# resources = ["endpoints"] +# verbs = ["get", "update"] +# } +# rule { +# api_groups = [""] +# resources = ["nodes"] +# verbs = ["watch", "list", "get", "patch", "update"] +# } +# rule { +# api_groups = [""] +# resources = ["pods", "services", "replicationcontrollers", "persistentvolumeclaims", "persistentvolumes", "namespaces"] +# verbs = ["watch", "list", "get"] +# } +# rule { +# api_groups = ["extensions"] +# resources = ["replicasets", "daemonsets"] +# verbs = ["watch", "list", "get"] +# } +# rule { +# api_groups = ["policy"] +# resources = ["poddisruptionbudgets"] +# verbs = ["watch", "list"] +# } +# rule { +# api_groups = ["apps"] +# resources = ["statefulsets", "replicasets", "daemonsets"] +# verbs = ["watch", "list", "get"] +# } +# rule { +# api_groups = ["storage.k8s.io"] +# resources = ["storageclasses", "csinodes", "csistoragecapacities", "csidrivers"] +# verbs = ["watch", "list", "get"] +# } +# rule { +# api_groups = ["batch", "extensions"] +# resources = ["jobs"] +# verbs = ["get", "list", "watch", "patch"] +# } +# rule { +# api_groups = ["coordination.k8s.io"] +# resources = ["leases"] +# verbs = ["create"] +# } +# rule { +# api_groups = ["coordination.k8s.io"] +# resource_names = ["cluster-autoscaler"] +# resources = ["leases"] +# verbs = ["get", "update"] +# } - rule { - api_groups = [""] - resources = ["configmaps"] - verbs = ["create", "list", "watch"] - } - rule { - api_groups = [""] - resource_names = ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] - resources = ["configmaps"] - verbs = ["delete", "get", "update", "watch"] - } +# depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] +# } - depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] -} +# resource "kubernetes_role" "cluster_autoscaler_role" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 -resource "kubernetes_cluster_role_binding" "cluster_autoscaler_crb" { - count = local.cluster_autoscaler_enabled ? 1 : 0 - metadata { - name = "cluster-autoscaler" - labels = { - k8s-addon = "cluster-autoscaler.addons.k8s.io" - k8s-app = "cluster-autoscaler" - } - } +# metadata { +# name = "cluster-autoscaler" +# namespace = "kube-system" +# labels = { +# k8s-addon = "cluster-autoscaler.addons.k8s.io" +# k8s-app = "cluster-autoscaler" +# } +# } - role_ref { - api_group = "rbac.authorization.k8s.io" - kind = "ClusterRole" - name = "cluster-autoscaler" - } - subject { - kind = "ServiceAccount" - name = "cluster-autoscaler" - namespace = "kube-system" - } +# rule { +# api_groups = [""] +# resources = ["configmaps"] +# verbs = ["create", "list", "watch"] +# } +# rule { +# api_groups = [""] +# resource_names = ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] +# resources = ["configmaps"] +# verbs = ["delete", "get", "update", "watch"] +# } - depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] -} +# depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] +# } -resource "kubernetes_role_binding" "cluster_autoscaler_rb" { - count = local.cluster_autoscaler_enabled ? 1 : 0 +# resource "kubernetes_cluster_role_binding" "cluster_autoscaler_crb" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 +# metadata { +# name = "cluster-autoscaler" +# labels = { +# k8s-addon = "cluster-autoscaler.addons.k8s.io" +# k8s-app = "cluster-autoscaler" +# } +# } - metadata { - name = "cluster-autoscaler" - namespace = "kube-system" - labels = { - k8s-addon = "cluster-autoscaler.addons.k8s.io" - k8s-app = "cluster-autoscaler" - } - } +# role_ref { +# api_group = "rbac.authorization.k8s.io" +# kind = "ClusterRole" +# name = "cluster-autoscaler" +# } +# subject { +# kind = "ServiceAccount" +# name = "cluster-autoscaler" +# namespace = "kube-system" +# } - role_ref { - api_group = "rbac.authorization.k8s.io" - kind = "Role" - name = "cluster-autoscaler" - } - subject { - kind = "ServiceAccount" - name = "cluster-autoscaler" - namespace = "kube-system" - } +# depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] +# } - depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] +# resource "kubernetes_role_binding" "cluster_autoscaler_rb" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 + +# metadata { +# name = "cluster-autoscaler" +# namespace = "kube-system" +# labels = { +# k8s-addon = "cluster-autoscaler.addons.k8s.io" +# k8s-app = "cluster-autoscaler" +# } +# } + +# role_ref { +# api_group = "rbac.authorization.k8s.io" +# kind = "Role" +# name = "cluster-autoscaler" +# } +# subject { +# kind = "ServiceAccount" +# name = "cluster-autoscaler" +# namespace = "kube-system" +# } + +# depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] +# } + +# resource "kubernetes_deployment" "cluster_autoscaler_deployment" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 + +# metadata { +# name = "cluster-autoscaler" +# namespace = "kube-system" +# labels = { +# app = "cluster-autoscaler" +# } +# } + +# spec { +# replicas = 3 + +# selector { +# match_labels = { +# app = "cluster-autoscaler" +# } +# } + +# template { +# metadata { +# labels = { +# app = "cluster-autoscaler" +# } +# annotations = { +# "prometheus.io/scrape" = true +# "prometheus.io/port" = 8085 +# } +# } + +# spec { +# service_account_name = "cluster-autoscaler" + +# container { +# image = local.cluster_autoscaler_image +# name = "cluster-autoscaler" + +# resources { +# limits = { +# cpu = "100m" +# memory = "300Mi" +# } +# requests = { +# cpu = "100m" +# memory = "300Mi" +# } +# } +# command = compact([ +# "./cluster-autoscaler", +# "--v=${local.cluster_autoscaler_log_level_verbosity}", +# "--stderrthreshold=info", +# "--cloud-provider=${local.cluster_autoscaler_cloud_provider}", +# "--max-node-provision-time=${local.cluster_autoscaler_max_node_provision_time}", +# "--nodes=${var.np1_autoscaler_min_nodes}:${var.np1_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[0].id}", +# var.node_pool_count >= 2 ? "--nodes=${var.np2_autoscaler_min_nodes}:${var.np2_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[1].id}" : "", +# var.node_pool_count >= 3 ? "--nodes=${var.np3_autoscaler_min_nodes}:${var.np3_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[2].id}" : "", +# "--scale-down-delay-after-add=${local.cluster_autoscaler_scale_down_delay_after_add}", +# "--scale-down-unneeded-time=${local.cluster_autoscaler_scale_down_unneeded_time}", +# "--unremovable-node-recheck-timeout=${local.cluster_autoscaler_unremovable_node_recheck_timeout}", +# "--balance-similar-node-groups", +# "--balancing-ignore-label=displayName", +# "--balancing-ignore-label=hostname", +# "--balancing-ignore-label=internal_addr", +# "--balancing-ignore-label=oci.oraclecloud.com/fault-domain" +# ]) +# image_pull_policy = "Always" +# env { +# name = "OKE_USE_INSTANCE_PRINCIPAL" +# value = "true" +# } +# env { +# name = "OCI_SDK_APPEND_USER_AGENT" +# value = "oci-oke-cluster-autoscaler" +# } +# } +# } +# } +# } + +# depends_on = [ +# oci_containerengine_node_pool.oci_oke_node_pool, +# helm_release.metrics_server +# ] +# } + +# resource "kubernetes_pod_disruption_budget_v1" "core_dns_pod_disruption_budget" { +# count = local.cluster_autoscaler_enabled ? 1 : 0 + +# metadata { +# name = "coredns-pdb" +# namespace = "kube-system" +# labels = { +# k8s-app = "cluster-autoscaler" +# } +# } +# spec { +# max_unavailable = "1" +# selector { +# match_labels = { +# k8s-app= "kube-dns" +# } +# } +# } + +# depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] +# } + +resource "local_file" "cluster_autoscaler_yaml" { + count = local.cluster_autoscaler_enabled ? 1 : 0 + + content = templatefile("cluster_autoscaler.yaml", { + image = local.cluster_autoscaler_image + node_pools_settings = join("\n", compact([ + "- --nodes=${var.np1_autoscaler_min_nodes}:${var.np1_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[0].id}", + var.node_pool_count >= 2 ? " - --nodes=${var.np2_autoscaler_min_nodes}:${var.np2_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[1].id}" : "", + var.node_pool_count >= 3 ? " - --nodes=${var.np3_autoscaler_min_nodes}:${var.np3_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[2].id}" : "", + ])) + }) + filename = "cluster_autoscaler_rendered.yaml" } -resource "kubernetes_deployment" "cluster_autoscaler_deployment" { +resource "null_resource" "cluster_autoscaler" { count = local.cluster_autoscaler_enabled ? 1 : 0 - metadata { - name = "cluster-autoscaler" - namespace = "kube-system" - labels = { - app = "cluster-autoscaler" + provisioner "local-exec" { + command = "mkdir -p ~/.kube/ && oci ce cluster create-kubeconfig --cluster-id $CLUSTER_ID --file ~/.kube/config --region us-sanjose-1 --token-version 2.0.0 --kube-endpoint $ENDPOINT_TYPE" + + environment = { + CLUSTER_ID = oci_containerengine_cluster.oci_oke_cluster.id + ENDPOINT_TYPE = var.is_endpoint_public ? "PUBLIC_ENDPOINT" : "PRIVATE_ENDPOINT" } } - spec { - replicas = 3 - - selector { - match_labels = { - app = "cluster-autoscaler" - } - } + provisioner "local-exec" { + command = "kubectl apply -f cluster_autoscaler_rendered.yaml" + } - template { - metadata { - labels = { - app = "cluster-autoscaler" - } - annotations = { - "prometheus.io/scrape" = true - "prometheus.io/port" = 8085 - } - } - - spec { - service_account_name = "cluster-autoscaler" - - container { - image = local.cluster_autoscaler_image - name = "cluster-autoscaler" - - resources { - limits = { - cpu = "100m" - memory = "300Mi" - } - requests = { - cpu = "100m" - memory = "300Mi" - } - } - command = compact([ - "./cluster-autoscaler", - "--v=${local.cluster_autoscaler_log_level_verbosity}", - "--stderrthreshold=info", - "--cloud-provider=${local.cluster_autoscaler_cloud_provider}", - "--max-node-provision-time=${local.cluster_autoscaler_max_node_provision_time}", - "--nodes=${var.np1_autoscaler_min_nodes}:${var.np1_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[0].id}", - var.node_pool_count >= 2 ? "--nodes=${var.np2_autoscaler_min_nodes}:${var.np2_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[1].id}" : "", - var.node_pool_count >= 3 ? "--nodes=${var.np3_autoscaler_min_nodes}:${var.np3_autoscaler_max_nodes}:${oci_containerengine_node_pool.oci_oke_node_pool[1].id}" : "", - "--scale-down-delay-after-add=${local.cluster_autoscaler_scale_down_delay_after_add}", - "--scale-down-unneeded-time=${local.cluster_autoscaler_scale_down_unneeded_time}", - "--unremovable-node-recheck-timeout=${local.cluster_autoscaler_unremovable_node_recheck_timeout}", - "--balance-similar-node-groups", - "--balancing-ignore-label=displayName", - "--balancing-ignore-label=hostname", - "--balancing-ignore-label=internal_addr", - "--balancing-ignore-label=oci.oraclecloud.com/fault-domain" - ]) - image_pull_policy = "Always" - env { - name = "OKE_USE_INSTANCE_PRINCIPAL" - value = "true" - } - env { - name = "OCI_SDK_APPEND_USER_AGENT" - value = "oci-oke-cluster-autoscaler" - } - } - } - } + provisioner "local-exec" { + when = destroy + command = "cp ./config ~/.kube/ && kubectl delete -f cluster_autoscaler_rendered.yaml" + on_failure = continue } depends_on = [ oci_containerengine_node_pool.oci_oke_node_pool, - helm_release.metrics_server + local_file.cluster_autoscaler_yaml ] } -resource "kubernetes_pod_disruption_budget_v1" "core_dns_pod_disruption_budget" { - count = local.cluster_autoscaler_enabled ? 1 : 0 +resource "null_resource" "cluster_autoscaler_sa_token" { - metadata { - name = "coredns-pdb" - namespace = "kube-system" - labels = { - k8s-app = "cluster-autoscaler" + count = local.cluster_autoscaler_enabled && parseint(local.k8s_minor_version, 10) > 24 ? 1 : 0 + + provisioner "local-exec" { + command = "mkdir -p ~/.kube/ && oci ce cluster create-kubeconfig --cluster-id $CLUSTER_ID --file ~/.kube/config --region us-sanjose-1 --token-version 2.0.0 --kube-endpoint $ENDPOINT_TYPE" + + environment = { + CLUSTER_ID = oci_containerengine_cluster.oci_oke_cluster.id + ENDPOINT_TYPE = var.is_endpoint_public ? "PUBLIC_ENDPOINT" : "PRIVATE_ENDPOINT" } } - spec { - max_unavailable = "1" - selector { - match_labels = { - k8s-app= "kube-dns" - } - } + + provisioner "local-exec" { + command = "kubectl apply -f sa-token.yaml" + } + + provisioner "local-exec" { + when = destroy + command = "ls -lah ~/.kube/" + on_failure = continue + } + + provisioner "local-exec" { + when = destroy + command = "kubectl delete -f sa-token.yaml" + on_failure = continue } depends_on = [oci_containerengine_node_pool.oci_oke_node_pool] -} +} \ No newline at end of file diff --git a/oke_node_pools.tf b/oke_node_pools.tf index 7c23db1..6864c6d 100644 --- a/oke_node_pools.tf +++ b/oke_node_pools.tf @@ -94,4 +94,13 @@ resource "oci_containerengine_node_pool" "oci_oke_node_pool" { } } defined_tags = local.node_pools[count.index]["tags"] + + lifecycle { + ignore_changes = [ + node_config_details[0].size, + node_config_details[1].size, + node_config_details[2].size + ] + } + } diff --git a/providers.tf b/providers.tf index 2a5d2ec..0045cdc 100644 --- a/providers.tf +++ b/providers.tf @@ -1,6 +1,14 @@ # Copyright (c) 2022 Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at http://oss.oracle.com/licenses/upl. + +locals { + cluster_endpoint = yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["clusters"][0]["cluster"]["server"] + cluster_ca_certificate = base64decode(yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["clusters"][0]["cluster"]["certificate-authority-data"]) + cluster_id = yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["users"][0]["user"]["exec"]["args"][4] + cluster_region = yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["users"][0]["user"]["exec"]["args"][6] +} + terraform { required_version = ">= 1.0" required_providers { @@ -9,16 +17,16 @@ terraform { version = ">= 4.85.0" # https://registry.terraform.io/providers/hashicorp/oci/4.85.0 } - kubernetes = { - source = "hashicorp/kubernetes" - version = "2.13.0" # Latest version as January 2022 = 2.7.1. Using 2.2.0 (May, 2021) for ORM compatibility (12 releases behind) - # https://registry.terraform.io/providers/hashicorp/kubernetes/2.13.0 - } - helm = { - source = "hashicorp/helm" - version = "2.1.0" # Latest version as January 2022 = 2.4.1. Using 2.1.0 (March, 2021) for ORM compatibility (7 releases behind) - # https://registry.terraform.io/providers/hashicorp/helm/2.1.0 - } + # kubernetes = { + # source = "hashicorp/kubernetes" + # version = "2.18.1" # Latest version as January 2022 = 2.7.1. Using 2.2.0 (May, 2021) for ORM compatibility (12 releases behind) + # # https://registry.terraform.io/providers/hashicorp/kubernetes/2.13.0 + # } + # helm = { + # source = "hashicorp/helm" + # version = "2.1.0" # Latest version as January 2022 = 2.4.1. Using 2.1.0 (March, 2021) for ORM compatibility (7 releases behind) + # # https://registry.terraform.io/providers/hashicorp/helm/2.1.0 + # } tls = { source = "hashicorp/tls" version = "3.1.0" # Latest version as January 2022 = 3.1.0. @@ -65,33 +73,27 @@ provider "oci" { # New configuration to avoid Terraform Kubernetes provider interpolation. https://registry.terraform.io/providers/hashicorp/kubernetes/2.2.0/docs#stacking-with-managed-kubernetes-cluster-resources # Currently need to uncheck to refresh (--refresh=false) when destroying or else the terraform destroy will fail -# https://docs.cloud.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengdownloadkubeconfigfile.htm#notes -provider "kubernetes" { - host = local.cluster_endpoint - cluster_ca_certificate = local.cluster_ca_certificate - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["ce", "cluster", "generate-token", "--cluster-id", local.cluster_id, "--region", local.cluster_region] - command = "oci" - } -} +# # https://docs.cloud.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengdownloadkubeconfigfile.htm#notes +# provider "kubernetes" { +# host = local.cluster_endpoint +# cluster_ca_certificate = local.cluster_ca_certificate +# exec { +# api_version = "client.authentication.k8s.io/v1beta1" +# args = ["ce", "cluster", "generate-token", "--cluster-id", local.cluster_id, "--region", local.cluster_region] +# command = "oci" +# } +# } -# https://docs.cloud.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengdownloadkubeconfigfile.htm#notes -provider "helm" { - kubernetes { - host = local.cluster_endpoint - cluster_ca_certificate = local.cluster_ca_certificate - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["ce", "cluster", "generate-token", "--cluster-id", local.cluster_id, "--region", local.cluster_region] - command = "oci" - } - } -} +# # https://docs.cloud.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengdownloadkubeconfigfile.htm#notes +# provider "helm" { +# kubernetes { +# host = local.cluster_endpoint +# cluster_ca_certificate = local.cluster_ca_certificate -locals { - cluster_endpoint = yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["clusters"][0]["cluster"]["server"] - cluster_ca_certificate = base64decode(yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["clusters"][0]["cluster"]["certificate-authority-data"]) - cluster_id = yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["users"][0]["user"]["exec"]["args"][4] - cluster_region = yamldecode(data.oci_containerengine_cluster_kube_config.oke.content)["users"][0]["user"]["exec"]["args"][6] -} \ No newline at end of file +# exec { +# api_version = "client.authentication.k8s.io/v1beta1" +# args = ["ce", "cluster", "generate-token", "--cluster-id", local.cluster_id, "--region", local.cluster_region] +# command = "oci" +# } +# } +# } diff --git a/sa-token.yaml b/sa-token.yaml new file mode 100644 index 0000000..f546df1 --- /dev/null +++ b/sa-token.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + kubernetes.io/service-account.name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: kube-system +type: kubernetes.io/service-account-token \ No newline at end of file