From 3afb970fea4cbf5e8d3f35104850b8afbc0670ad Mon Sep 17 00:00:00 2001 From: Robert Houghton Date: Thu, 2 Jan 2020 16:09:37 -0800 Subject: [PATCH] GEODE-7590 - Use GCESysprep as last-step in Windows Packer build. (#4522) * Fix instance cleanup per zone. * delete_instanch.sh deletes in all zones just in case. * Ask for zones, don't assume. * Use Packer release 1.4.5, not latest prerelease. * Need reboot before running GCESysPrep in case of windows update. --- ci/images/alpine-tools/Dockerfile | 4 ++-- .../windows-packer.json | 21 +++++++------------ ci/pipelines/geode-build/jinja.template.yml | 6 +++--- ci/scripts/create_instance.sh | 21 ++++++++++++++----- ci/scripts/delete_instance.sh | 10 ++++----- 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/ci/images/alpine-tools/Dockerfile b/ci/images/alpine-tools/Dockerfile index bc08fc22314f..80ccb873140e 100644 --- a/ci/images/alpine-tools/Dockerfile +++ b/ci/images/alpine-tools/Dockerfile @@ -16,8 +16,8 @@ FROM openjdk:8-jdk-alpine COPY --from=google/cloud-sdk:alpine /google-cloud-sdk /google-cloud-sdk -COPY --from=hashicorp/packer:latest /bin/packer /usr/local/bin/packer -COPY --from=hashicorp/packer:1.3.5 /bin/packer /usr/local/bin/packer135 +#COPY --from=hashicorp/packer:latest /bin/packer /usr/local/bin/packer +COPY --from=hashicorp/packer:1.4.5 /bin/packer /usr/local/bin/packer ENV PATH /google-cloud-sdk/bin:$PATH RUN apk --no-cache add \ bash \ diff --git a/ci/images/google-windows-geode-builder/windows-packer.json b/ci/images/google-windows-geode-builder/windows-packer.json index f3580a8ce1d5..926842311a1b 100644 --- a/ci/images/google-windows-geode-builder/windows-packer.json +++ b/ci/images/google-windows-geode-builder/windows-packer.json @@ -43,9 +43,6 @@ "Install-WindowsFeature Containers" ] }, - { - "type": "windows-restart" - }, { "type": "powershell", "inline": [ @@ -60,9 +57,6 @@ "Stop-Service wuauserv" ] }, - { - "type": "windows-restart" - }, { "type": "powershell", "inline": [ @@ -71,9 +65,6 @@ "Invoke-WebRequest https://chocolatey.org/install.ps1 -UseBasicParsing | Invoke-Expression" ] }, - { - "type": "windows-restart" - }, { "type": "powershell", "inline": [ @@ -103,9 +94,6 @@ "(Get-Content \"C:\\Program Files\\OpenSSH-Win64\\sshd_config_default\") -replace '(Match Group administrators)', '#$1' -replace '(\\s*AuthorizedKeysFile.*)', '#$1' | Out-File \"C:\\Program Files\\OpenSSH-Win64\\sshd_config_default\" -encoding UTF8" ] }, - { - "type": "windows-restart" - }, { "type": "powershell", "inline": [ @@ -129,7 +117,14 @@ ] }, { - "type": "windows-restart" + "type": "windows-restart", + "restart_timeout": "30m" + }, + { + "type": "powershell", + "inline": [ + "GCESysprep -NoShutDown" + ] } ] } diff --git a/ci/pipelines/geode-build/jinja.template.yml b/ci/pipelines/geode-build/jinja.template.yml index a7249ab68303..da6e399246fc 100644 --- a/ci/pipelines/geode-build/jinja.template.yml +++ b/ci/pipelines/geode-build/jinja.template.yml @@ -281,7 +281,7 @@ jobs: - name: attempts-log path: new - name: instance-data - timeout: 15m + timeout: 20m attempts: 5 on_failure: task: delete_instance @@ -560,7 +560,7 @@ jobs: - name: instance-data - name: attempts-log path: new - timeout: 15m + timeout: 20m attempts: 5 - task: rsync_code_up image: alpine-tools-image @@ -648,7 +648,7 @@ jobs: path: instance-data - name: attempts-log path: new - timeout: 15m + timeout: 20m attempts: 5 - do: - task: rsync_code_up-{{java_test_version.name}} diff --git a/ci/scripts/create_instance.sh b/ci/scripts/create_instance.sh index 7a9c7fef3117..9ec7d61a44ed 100755 --- a/ci/scripts/create_instance.sh +++ b/ci/scripts/create_instance.sh @@ -90,7 +90,7 @@ cp old/attempts new/ echo attempt >> new/attempts attempts=$(cat new/attempts | wc -l) -PERMITTED_ZONES=(us-central1-a us-central1-b us-central1-c us-central1-f) +PERMITTED_ZONES=($(gcloud compute zones list --filter="name~'us-central.*'" --format=json | jq -r .[].name)) if [ $attempts -eq 1 ]; then ZONE=${MY_ZONE} else @@ -99,7 +99,7 @@ fi echo "Deploying to zone ${ZONE}" # Ensure no existing instance with this name in any zone -for KILL_ZONE in "${PERMITTED_ZONES}"; do +for KILL_ZONE in $(echo ${PERMITTED_ZONES[*]}); do gcloud compute instances delete ${INSTANCE_NAME} --zone=${KILL_ZONE} --quiet &>/dev/null || true done @@ -127,7 +127,6 @@ INSTANCE_INFORMATION=$(gcloud compute --project=${GCP_PROJECT} instances create --labels="${LABELS}" \ --tags="heavy-lifter" \ --scopes="default,storage-rw" \ - --metadata="disable-agent-updates=true" \ --format=json) CREATE_RC=$? @@ -161,13 +160,25 @@ if [[ -z "${WINDOWS_PREFIX}" ]]; then else # Set up ssh access for Windows systems echo -n "Setting windows password via gcloud." + INSTANCE_AGENT_READY_LINE="GCEWindowsAgent: GCE Agent Started" + INSTANCE_SETUP_FINSHED_LINE="GCEInstanceSetup: Instance setup finished" + SCRAPE_COMMAND_INSTANCE_READY="gcloud compute instances get-serial-port-output ${INSTANCE_NAME} --zone=${ZONE} | grep \"${INSTANCE_AGENT_READY_LINE}\" | wc -l" + SCRAPE_COMMAND_SETUP_FINSHED="gcloud compute instances get-serial-port-output ${INSTANCE_NAME} --zone=${ZONE} | grep \"${INSTANCE_SETUP_FINSHED_LINE}\" | wc -l" + while true; do + # Check that the instance agent has started at least 2x (first boot, plus activation) + # and that the "GCEInstanceSetup" script completed + echo -n "Waiting for startup scripts and windows activation to complete" + while [[ 2 -ne $(eval ${SCRAPE_COMMAND_INSTANCE_READY} 2> /dev/null) ]] || [[ 1 -ne $(eval ${SCRAPE_COMMAND_SETUP_FINSHED} 2> /dev/null) ]]; do + echo -n . + sleep 5 + done + echo "" + # Get a password PASSWORD=$( yes | gcloud beta compute reset-windows-password ${INSTANCE_NAME} --user=geode --zone=${ZONE} --format json | jq -r .password ) if [[ -n "${PASSWORD}" ]]; then break; fi - echo -n . - sleep 5 done ssh-keygen -N "" -f ${SSHKEY_FILE} diff --git a/ci/scripts/delete_instance.sh b/ci/scripts/delete_instance.sh index 341ffafa0bfa..c61608798618 100755 --- a/ci/scripts/delete_instance.sh +++ b/ci/scripts/delete_instance.sh @@ -30,11 +30,11 @@ done SCRIPTDIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" INSTANCE_NAME="$(cat instance-data/instance-name)" -ZONE="$(cat instance-data/zone)" - +PERMITTED_ZONES=($(gcloud compute zones list --filter="name~'us-central.*'" --format=json | jq -r .[].name)) echo 'StrictHostKeyChecking no' >> /etc/ssh/ssh_config -gcloud compute instances delete ${INSTANCE_NAME} \ - --zone=${ZONE} \ - --quiet || true +# Ensure no existing instance with this name in any zone +for KILL_ZONE in $(echo ${PERMITTED_ZONES[*]}); do + gcloud compute instances delete ${INSTANCE_NAME} --zone=${KILL_ZONE} --quiet &>/dev/null || true +done