Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/cluster-cleaner/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM python:3-slim-buster

ADD https://storage.googleapis.com/kubernetes-release/release/v1.13.3/bin/linux/amd64/kubectl /usr/bin
ADD https://dl.k8s.io/release/v1.34.0/bin/linux/amd64/kubectl /usr/bin
RUN chmod +x /usr/bin/kubectl

COPY scripts/* /
4 changes: 2 additions & 2 deletions docker/cluster-cleaner/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
IMAGE_VERSION=0.15
IMAGE_VERSION=0.18

.PHONY: all
all: build push install
Expand All @@ -18,7 +18,7 @@ install: build push
kubectl create namespace cluster-cleaner || true
helm template . \
--set cleanerVersion=$(IMAGE_VERSION) \
--set namespace=cluster-cleaner\
--set namespace=cluster-cleaner \
--set cleanerNamespace=cluster-cleaner > cluster-cleaner.yaml
kubectl apply -f cluster-cleaner.yaml
rm cluster-cleaner.yaml
23 changes: 15 additions & 8 deletions docker/cluster-cleaner/scripts/clean-failed-namespaces.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
#!/usr/bin/env sh

touch error.log
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we see/dump this error.log somewhere ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, on stdout, the tail is line below

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's just a hack to see error messages from expressions var=$(command ... )
when command is failing then we didn't see the reason

tail -F error.log &

delete_resources_safely() {
resource_type="$1"
namespace="$2"

echo "Attempting normal deletion of $resource_type in $namespace..."
kubectl delete "${resource_type}" --all -n "${namespace}" --wait=true --timeout=10s || true
kubectl delete "${resource_type}" --all -n "${namespace}" --wait=true --timeout=10s 2>error.log|| true

# Check if any resources are still stuck
# Let's not fail here and continue deletion
resources=$(kubectl get "$resource_type" -n "${namespace}" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true)
resources=$(kubectl get "$resource_type" -n "${namespace}" --no-headers -o custom-columns=":metadata.name" 2>error.log || true)

for resource in ${resources}; do
echo "${resource_type}/${resource} is still present, force deleting..."

kubectl patch "${resource_type}" "${resource}" -n "${namespace}" -p '{"metadata":{"finalizers":null}}' --type=merge || true
kubectl delete "${resource_type}" "${resource}" -n "${namespace}" --force --grace-period=0 || true
kubectl patch "${resource_type}" "${resource}" -n "${namespace}" -p '{"metadata":{"finalizers":null}}' --type=merge 2>error.log || true
kubectl delete "${resource_type}" "${resource}" -n "${namespace}" --force --grace-period=0 2>error.log || true
done
}

Expand All @@ -29,11 +32,12 @@ if [ -z ${LABELS+x} ]; then
exit 1
fi


echo "Deleting namespaces for evg tasks that are older than ${DELETE_OLDER_THAN_AMOUNT} ${DELETE_OLDER_THAN_UNIT} with label ${LABELS}"
echo "Which are:"
kubectl get namespace -l "${LABELS}" -o name
for namespace in $(kubectl get namespace -l "${LABELS}" -o name); do
creation_time=$(kubectl get "${namespace}" -o jsonpath='{.metadata.creationTimestamp}' 2>/dev/null || echo "")
for namespace in $(kubectl get namespace -l "${LABELS}" -o name 2>error.log); do
creation_time=$(kubectl get "${namespace}" -o jsonpath='{.metadata.creationTimestamp}' 2>error.log || echo "")

if [ -z "$creation_time" ]; then
echo "Namespace ${namespace} does not exist or has no creation timestamp, skipping."
Expand All @@ -49,14 +53,17 @@ for namespace in $(kubectl get namespace -l "${LABELS}" -o name); do

echo "Deleting ${namespace_name}"

csrs_in_namespace=$(kubectl get csr -o name | grep "${namespace_name}" || true)
csrs_in_namespace=$(kubectl get csr -o name 2>error.log | grep "${namespace_name}" 2>/dev/null || true)
if [ -n "${csrs_in_namespace}" ]; then
kubectl delete "${csrs_in_namespace}"
kubectl delete "${csrs_in_namespace}" 2>error.log
fi

delete_resources_safely "mdb" "${namespace_name}"
delete_resources_safely "mdbu" "${namespace_name}"
delete_resources_safely "mdbc" "${namespace_name}"
delete_resources_safely "mdbmc" "${namespace_name}"
delete_resources_safely "om" "${namespace_name}"
delete_resources_safely "clustermongodbroles" "${namespace_name}"

echo "Attempting to delete namespace: ${namespace_name}"

Expand Down
11 changes: 0 additions & 11 deletions docker/cluster-cleaner/scripts/clean-ops-manager.sh

This file was deleted.

9 changes: 0 additions & 9 deletions docker/cluster-cleaner/scripts/construction-site.sh

This file was deleted.

23 changes: 0 additions & 23 deletions docker/cluster-cleaner/scripts/delete-old-builder-pods.sh

This file was deleted.

28 changes: 0 additions & 28 deletions docker/cluster-cleaner/templates/job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,34 +84,6 @@ spec:
- name: LABELS
value: "evg=task"

# Clean old builder pods
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: cluster-cleaner-delete-builder-pods
namespace: {{ .Values.cleanerNamespace }}
spec:
# Runs every hour
schedule: "0 * * * *"
jobTemplate:
spec:
template:
spec:
serviceAccountName: cluster-cleaner
restartPolicy: Never

containers:
- name: cluster-cleaner
image: 268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/cluster-cleaner:{{ .Values.cleanerVersion }}
imagePullPolicy: Always
command: ["./delete-old-builder-pods.sh"]
env:
- name: DELETE_OLDER_THAN_UNIT
value: "minutes"
- name: DELETE_OLDER_THAN_AMOUNT
value: "20"

# Clean old certificates
---
apiVersion: batch/v1
Expand Down
48 changes: 0 additions & 48 deletions docker/cluster-cleaner/templates/ops_manager_cleaner_job.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion scripts/evergreen/e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ echo "TEST_NAME is set to: ${TEST_NAME}"

delete_operator "${NAMESPACE}"

# We'll have the task running for the alloca ted time, minus the time it took us
# We'll have the task running for the allocated time, minus the time it took us
# to get all the way here, assuming configuring and deploying the operator can
# take a bit of time. This is needed because Evergreen kills the process *AND*
# Docker containers running on the host when it hits a timeout. Under these
Expand Down
46 changes: 35 additions & 11 deletions scripts/funcs/kubernetes
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,15 @@ create_image_registries_secret() {
context=$1
namespace=$2
secret_name=$3

# Detect the correct config file path based on container runtime
local config_file
local temp_config_file=""
if command -v podman &> /dev/null && (podman info &> /dev/null || sudo podman info &> /dev/null); then
# For Podman, use root's auth.json since minikube uses sudo podman
config_file="/root/.config/containers/auth.json"
echo "Using Podman config: ${config_file}"

# Create a temporary copy that the current user can read
temp_config_file=$(mktemp)
sudo cp "${config_file}" "${temp_config_file}"
Expand All @@ -117,7 +117,7 @@ create_image_registries_secret() {
config_file="${HOME}/.docker/config.json"
echo "Using Docker config: ${config_file}"
fi

# shellcheck disable=SC2154
if kubectl --context "${context}" get namespace "${namespace}"; then
kubectl --context "${context}" -n "${namespace}" delete secret "${secret_name}" --ignore-not-found
Expand All @@ -127,7 +127,7 @@ create_image_registries_secret() {
else
echo "Skipping creating pull secret in ${context}/${namespace}. The namespace doesn't exist yet."
fi

# Clean up temporary file
if [[ -n "${temp_config_file}" ]] && [[ -f "${temp_config_file}" ]]; then
rm -f "${temp_config_file}"
Expand Down Expand Up @@ -156,6 +156,26 @@ create_image_registries_secret() {
fi
}

force_delete_all_resources_from_namespace() {
resource_type="$1"
namespace="$2"

echo "Attempting normal deletion of ${resource_type} in ${namespace}..."
kubectl delete "${resource_type}" --all -n "${namespace}" --wait=true --timeout=10s || true

# Check if any resources are still stuck
echo "Checking if any resources are still stuck:"
kubectl get "${resource_type}" -n "${namespace}" --no-headers -o custom-columns=":metadata.name" || true
resources=$(kubectl get "${resource_type}" -n "${namespace}" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true)

for resource in ${resources}; do
echo "${resource_type}/${resource} is still present, force deleting..."

kubectl patch "${resource_type}" "${resource}" -n "${namespace}" -p '{"metadata":{"finalizers":null}}' --type=merge || true
kubectl delete "${resource_type}" "${resource}" -n "${namespace}" --force --grace-period=0 || true
done
}

reset_namespace() {
context=$1
namespace=$2
Expand All @@ -166,19 +186,23 @@ reset_namespace() {

set +e

helm uninstall --kube-context="${context}" mongodb-kubernetes-operator || true &
helm uninstall --kube-context="${context}" mongodb-kubernetes-operator-multi-cluster || true &

# Cleans the namespace. Note, that fine-grained cleanup is performed instead of just deleting the namespace as it takes
# considerably less time
title "Cleaning Kubernetes resources in context: ${context}"

ensure_namespace "${namespace}"

kubectl delete --context "${context}" mdb --all -n "${namespace}" || true
kubectl delete --context "${context}" mdbu --all -n "${namespace}" || true
kubectl delete --context "${context}" mdbmc --all -n "${namespace}" || true
kubectl delete --context "${context}" om --all -n "${namespace}" || true
force_delete_all_resources_from_namespace "mdb" "${namespace}"
force_delete_all_resources_from_namespace "mdbu" "${namespace}"
force_delete_all_resources_from_namespace "mdbc" "${namespace}"
force_delete_all_resources_from_namespace "mdbmc" "${namespace}"
force_delete_all_resources_from_namespace "om" "${namespace}"
force_delete_all_resources_from_namespace "clustermongodbroles" "${namespace}"

echo "Sleeping to allow the operator to perform cleanups"
sleep 10
helm uninstall --kube-context="${context}" mongodb-kubernetes-operator || true &
helm uninstall --kube-context="${context}" mongodb-kubernetes-operator-multi-cluster || true &

# Openshift variant runs all tests sequentially. In order to avoid clashes between tests, we need to wait till
# the namespace is gone. This trigger OpenShift Project deletion, which is a "Namespace on Steroids" and it takes
Expand Down