diff --git a/.evergreen-functions.yml b/.evergreen-functions.yml index f5766d127..daac22d9a 100644 --- a/.evergreen-functions.yml +++ b/.evergreen-functions.yml @@ -297,7 +297,6 @@ functions: params: env: SKIP_MINIKUBE_SETUP: ${skip_minikube_setup!|false} - SKIP_INSTALL_REQUIREMENTS: ${skip_install_python_requirements!|true} working_dir: src/github.com/mongodb/mongodb-kubernetes add_to_path: - ${workdir}/bin diff --git a/scripts/dev/recreate_python_venv.sh b/scripts/dev/recreate_python_venv.sh index bc86bd455..4b8df991d 100755 --- a/scripts/dev/recreate_python_venv.sh +++ b/scripts/dev/recreate_python_venv.sh @@ -118,14 +118,8 @@ PYENV_VERSION="${PYTHON_VERSION}" python -m venv venv source venv/bin/activate pip install --upgrade pip -skip_requirements="${SKIP_INSTALL_REQUIREMENTS:-false}" -if [[ "${skip_requirements}" != "true" ]]; then - echo "Installing requirements.txt..." - pip install -r requirements.txt -else - echo "Skipping requirements.txt installation." - pip install requests -fi +echo "Installing requirements.txt..." +pip install -r requirements.txt echo "Python venv was recreated successfully." echo "Using Python: $(which python) ($(python --version))" >&2 diff --git a/scripts/dev/setup_ibm_container_runtime.sh b/scripts/dev/setup_ibm_container_runtime.sh index afd544679..c750939a8 100755 --- a/scripts/dev/setup_ibm_container_runtime.sh +++ b/scripts/dev/setup_ibm_container_runtime.sh @@ -1,43 +1,74 @@ #!/usr/bin/env bash -set -Eeou pipefail +set -Eeoux pipefail -echo "Cleaning DNF cache..." -sudo dnf clean all && sudo rm -r /var/cache/dnf +echo "Setting up IBM container runtime (rootful podman for minikube)" -echo "Installing/upgrading crun..." -sudo dnf upgrade -y crun --disableplugin=subscription-manager || \ -sudo dnf install -y crun --disableplugin=subscription-manager || \ -sudo yum upgrade -y crun --disableplugin=subscription-manager || \ -sudo yum install -y crun --disableplugin=subscription-manager - -if ! crun --version &>/dev/null; then - echo "❌ crun installation failed" - exit 1 +# Install crun if not present (OCI runtime for cgroup v2) +if ! command -v crun &>/dev/null; then + echo "Installing crun..." + sudo dnf install -y crun --disableplugin=subscription-manager 2>/dev/null || \ + sudo yum install -y crun --disableplugin=subscription-manager 2>/dev/null || \ + echo "Warning: Could not install crun" +else + echo "crun already installed: $(crun --version | head -1)" fi -current_version=$(crun --version | head -n1) -echo "✅ Using crun: ${current_version}" +# Clean up stale container state (safe for shared CI machines) +cleanup_stale_state() { + echo "Cleaning up stale container state..." -# Clean up any existing conflicting configurations -echo "Cleaning up existing container configurations..." -rm -f ~/.config/containers/containers.conf 2>/dev/null || true -sudo rm -f /root/.config/containers/containers.conf 2>/dev/null || true -sudo rm -f /etc/containers/containers.conf 2>/dev/null || true + # Skip if minikube is running + if command -v minikube &>/dev/null && minikube status &>/dev/null 2>&1; then + echo " Minikube running - skipping cleanup" + return 0 + fi -crun_path=$(which crun) -echo "Using crun path: ${crun_path}" + # Kill orphaned root conmon processes (PPID=1 means orphaned) + for pid in $(sudo pgrep conmon 2>/dev/null); do + ppid=$(ps -o ppid= -p "$pid" 2>/dev/null | tr -d ' ') + if [[ "$ppid" == "1" ]]; then + echo " Killing orphaned conmon $pid" + sudo kill -9 "$pid" 2>/dev/null || true + fi + done -config="[containers] -cgroup_manager = \"cgroupfs\" + # Clean stale lock files + sudo find /run/crun -name "*.lock" -mmin +60 -delete 2>/dev/null || true -[engine] -runtime = \"crun\"" + # Prune exited containers and dangling volumes + sudo podman container prune -f 2>/dev/null || true + sudo podman volume prune -f 2>/dev/null || true +} -mkdir -p ~/.config/containers -echo "${config}" > ~/.config/containers/containers.conf +cleanup_stale_state -sudo mkdir -p /root/.config/containers -echo "${config}" | sudo tee /root/.config/containers/containers.conf >/dev/null +# Test sudo podman (used by minikube in rootful mode) +echo "Testing sudo podman..." +if ! sudo podman run --rm docker.io/library/alpine:latest echo "sudo podman works" 2>/dev/null; then + echo "Sudo podman not working, resetting..." + sudo podman system reset --force 2>/dev/null || true + sleep 1 + + if sudo podman run --rm docker.io/library/alpine:latest echo "sudo podman works" 2>/dev/null; then + echo "Sudo podman working after reset" + else + echo "Warning: Sudo podman still not working" + fi +else + echo "Sudo podman working" +fi + +# Configure root-level podman +sudo mkdir -p /etc/containers +sudo tee /etc/containers/containers.conf > /dev/null << 'EOF' +[containers] +cgroup_manager = "systemd" + +[engine] +runtime = "crun" +EOF -echo "✅ Configured crun" +echo "Container runtime setup complete" +echo " crun: $(crun --version 2>/dev/null | head -1 || echo 'not found')" +echo " podman: $(sudo podman --version)" diff --git a/scripts/evergreen/setup_minikube_host.sh b/scripts/evergreen/setup_minikube_host.sh index 0e0e02427..41ece4280 100755 --- a/scripts/evergreen/setup_minikube_host.sh +++ b/scripts/evergreen/setup_minikube_host.sh @@ -40,7 +40,6 @@ run_setup_step() { # Setup Python environment (needed for AWS CLI pip installation) export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1 -export SKIP_INSTALL_REQUIREMENTS=${SKIP_INSTALL_REQUIREMENTS:-true} run_setup_step "Python Virtual Environment" "scripts/dev/recreate_python_venv.sh" run_setup_step "AWS CLI Setup" "scripts/evergreen/setup_aws.sh" diff --git a/scripts/funcs/install b/scripts/funcs/install index d55b638d0..b4a960703 100644 --- a/scripts/funcs/install +++ b/scripts/funcs/install @@ -46,7 +46,9 @@ download_and_install_binary() { mkdir -p "${dir}" echo "Downloading ${url}" - curl --retry 5 --retry-delay 3 --retry-all-errors --fail --show-error --max-time 180 --silent -L "${url}" -o "${bin}" + # Use longer timeout (10 min) for large binaries like minikube (~140MB) on slow IBM networks + # Add -C - for resume capability in case of partial downloads + curl --retry 5 --retry-delay 10 --retry-all-errors --fail --show-error --max-time 600 -L "${url}" -o "${bin}" chmod +x "${bin}" mv "${bin}" "${dir}" echo "Installed ${bin} to ${dir}" diff --git a/scripts/minikube/setup_minikube.sh b/scripts/minikube/setup_minikube.sh index ee969d092..09fe9e6cb 100755 --- a/scripts/minikube/setup_minikube.sh +++ b/scripts/minikube/setup_minikube.sh @@ -4,6 +4,7 @@ source scripts/dev/set_env_context.sh source scripts/funcs/install + set -Eeou pipefail set_limits() { @@ -78,31 +79,19 @@ setup_local_registry_and_custom_image() { echo "✅ Local registry already running" fi - # Configure podman to trust local registry (both user and root level for minikube) + # Configure podman to trust local registry (rootful only since minikube uses sudo podman) echo "Configuring registries.conf to trust local registry..." - - # User-level config - mkdir -p ~/.config/containers - cat > ~/.config/containers/registries.conf << 'EOF' -[[registry]] -location = "localhost:5000" -insecure = true -EOF - - # Root-level config (since minikube uses sudo podman) sudo mkdir -p /root/.config/containers sudo tee /root/.config/containers/registries.conf << 'EOF' >/dev/null [[registry]] location = "localhost:5000" insecure = true EOF + echo "✅ Registry configuration created" - echo "✅ Registry configuration created for both user and root" - custom_image_tag="localhost:5000/kicbase:v0.0.47" - - # Determine image tag - custom_image_tag="localhost:5000/kicbase:v0.0.47" - if curl -s http://localhost:5000/v2/kicbase/tags/list | grep -q "v0.0.47"; then + # Use kicbase v0.0.48 to match minikube v1.37.0 default + custom_image_tag="localhost:5000/kicbase:v0.0.48" + if curl -s http://localhost:5000/v2/kicbase/tags/list | grep -q "v0.0.48"; then echo "Custom kicbase image already exists in local registry" return 0 fi @@ -113,7 +102,7 @@ EOF # Build custom kicbase image mkdir -p "${PROJECT_DIR:-.}/scripts/minikube/kicbase" cat > "${PROJECT_DIR:-.}/scripts/minikube/kicbase/Dockerfile" << 'EOF' -FROM gcr.io/k8s-minikube/kicbase:v0.0.47 +FROM gcr.io/k8s-minikube/kicbase:v0.0.48 RUN if [ "$(uname -m)" = "ppc64le" ]; then \ CRICTL_VERSION="v1.28.0" && \ curl -L "https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICTL_VERSION}/crictl-${CRICTL_VERSION}-linux-ppc64le.tar.gz" \ @@ -129,6 +118,21 @@ EOF echo "Failed to build custom image" return 1 } + + # Ensure registry is still running before push (may have been cleaned up during build) + if ! curl -s --max-time 2 http://localhost:5000/v2/_catalog >/dev/null 2>&1; then + echo "Registry not responding, restarting..." + sudo podman rm -f registry 2>/dev/null || true + sudo podman run -d -p 5000:5000 --name registry --restart=always docker.io/library/registry:2 + for _ in {1..10}; do + if curl -s --max-time 2 http://localhost:5000/v2/_catalog >/dev/null 2>&1; then + echo "Registry restarted successfully" + break + fi + sleep 1 + done + fi + sudo podman push "${custom_image_tag}" --tls-verify=false || { echo "Failed to push to registry" return 1 @@ -139,9 +143,20 @@ EOF return 0 } -# Start minikube with podman driver +# Start minikube with podman driver (rootful mode for reliable networking) start_minikube_cluster() { - echo ">>> Starting minikube cluster with podman driver..." + echo ">>> Starting minikube cluster with podman driver (rootful mode)..." + + # IDEMPOTENT: If minikube is already running and healthy, skip setup + if "${PROJECT_DIR:-.}/bin/minikube" status &>/dev/null; then + echo "✅ Minikube is already running - verifying health..." + if "${PROJECT_DIR:-.}/bin/minikube" kubectl -- get nodes &>/dev/null; then + echo "✅ Minikube cluster is healthy - skipping setup" + return 0 + else + echo "⚠️ Minikube running but unhealthy - will recreate" + fi + fi # Clean up any existing minikube state to avoid cached configuration issues echo "Cleaning up any existing minikube state..." @@ -153,19 +168,27 @@ start_minikube_cluster() { echo "Ensuring clean minikube state..." "${PROJECT_DIR:-.}/bin/minikube" delete 2>/dev/null || true - local start_args=("--driver=podman") + # Clean up stale podman volumes + echo "Cleaning up stale podman volumes..." + sudo podman volume rm -f minikube 2>/dev/null || true + sudo podman network rm -f minikube 2>/dev/null || true + + # Use rootful podman - rootless has iptables/CNI issues on ppc64le and s390x + local start_args=("--driver=podman" "--container-runtime=containerd" "--rootless=false") start_args+=("--cpus=4" "--memory=8g") if [[ "${ARCH}" == "ppc64le" ]]; then echo "Using custom kicbase image for ppc64le with crictl..." - start_args+=("--base-image=localhost:5000/kicbase:v0.0.47") + start_args+=("--base-image=localhost:5000/kicbase:v0.0.48") start_args+=("--insecure-registry=localhost:5000") + # Use bridge CNI for ppc64le - kindnet doesn't have ppc64le images + start_args+=("--cni=bridge") + elif [[ "${ARCH}" == "s390x" ]]; then + # Use bridge CNI for s390x to avoid potential image availability issues + start_args+=("--cni=bridge") fi - # Use default bridge CNI to avoid Docker Hub rate limiting issues - # start_args+=("--cni=bridge") - echo "Starting minikube with args: ${start_args[*]}" if "${PROJECT_DIR:-.}/bin/minikube" start "${start_args[@]}"; then echo "✅ Minikube started successfully" @@ -194,14 +217,6 @@ else exit 1 fi -if [[ "${ARCH}" == "ppc64le" ]]; then - echo "" - echo ">>> Note: crictl will be patched into the minikube container after startup" -else - echo "" - echo ">>> Using standard kicbase image (crictl included for x86_64/aarch64/s390x)" -fi - # Start the minikube cluster start_minikube_cluster