[CI] Add bootnode checking CI jobs (#6889)

* Add check_bootnode script and github action * fix mktemp for linux machines * Update check_bootnodes.sh show logs to see what's going wrong * fix ephemeral ports and fetch polkadot * fix check-bootnodes.yml * increase node spawn holdoff * disable fail-fast * refactor, separate out check_bootnodes and make it posix-compliant * add new job for detecting new bootnodes * fix check-bootnodes.yml * only check all bootnodes on release * Add test bad bootnode REVERT ME before merging PR. Should cause the test to fail, then when we remove it, we should succeed. Sadly doesn't account for a new successful bootnode, should ask if we have one we can use for testing. * fix paths * fix paths and git... hopefully * this better work... * fix * test * last test * Revert "Add test bad bootnode" This reverts commit 540dd97. * Update check_bootnodes.sh * optimisations Begin polling the RPC node right after spawning, allowing us to break early on detecting peers * increase holdoff to 5 seconds * dont delete chainspec til we kill the node * Update check-bootnodes.yml * Remove checking bootnodes on pushing of this branch --------- Co-authored-by: parity-processbot <>
paritytech · Mar 21, 2023 · 6efc49f · 6efc49f
1 parent 5dc0704
commit 6efc49f
Show file tree

Hide file tree

Showing 5 changed files with 226 additions and 0 deletions.
diff --git a/.github/workflows/check-bootnodes.yml b/.github/workflows/check-bootnodes.yml
@@ -0,0 +1,31 @@
+# checks all networks we care about (kusama, polkadot, westend) and ensures
+# the bootnodes in their respective chainspecs are contactable
+
+name: Check all bootnodes
+on:
+  push:
+    branches:
+      # Catches v1.2.3 and v1.2.3-rc1
+      - release-v[0-9]+.[0-9]+.[0-9]+*
+
+jobs:
+  check_bootnodes:
+    strategy:
+      fail-fast: false
+      matrix:
+        runtime: [westend, kusama, polkadot]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+      - name: Install polkadot
+        shell: bash
+        run: |
+          curl -L "$(curl -s https://api.github.com/repos/paritytech/polkadot/releases/latest \
+          | jq -r '.assets | .[] | select(.name == "polkadot").browser_download_url')" \
+          | sudo tee /usr/local/bin/polkadot > /dev/null
+          sudo chmod +x /usr/local/bin/polkadot
+          polkadot --version
+      - name: Check ${{ matrix.runtime }} bootnodes
+        shell: bash
+        run: scripts/ci/github/check_bootnodes.sh node/service/chain-specs/${{ matrix.runtime }}.json
diff --git a/.github/workflows/check-new-bootnodes.yml b/.github/workflows/check-new-bootnodes.yml
@@ -0,0 +1,28 @@
+# If a chainspec file is updated with new bootnodes, we check to make sure those bootnodes are contactable
+
+name: Check new bootnodes
+on:
+  pull_request:
+    paths:
+      - 'node/service/chain-specs/*.json'
+
+jobs:
+  check_bootnodes:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Install polkadot
+        shell: bash
+        run: |
+          curl -L "$(curl -s https://api.github.com/repos/paritytech/polkadot/releases/latest \
+          | jq -r '.assets | .[] | select(.name == "polkadot").browser_download_url')" \
+          | sudo tee /usr/local/bin/polkadot > /dev/null
+          sudo chmod +x /usr/local/bin/polkadot
+          polkadot --version
+      - name: Check new bootnodes
+        shell: bash
+        run: |
+          scripts/ci/github/check_new_bootnodes.sh
diff --git a/scripts/ci/common/lib.sh b/scripts/ci/common/lib.sh
@@ -139,3 +139,57 @@ has_runtime_changes() {
     return 1
   fi
 }
+
+# given a bootnode and the path to a chainspec file, this function will create a new chainspec file
+# with only the bootnode specified and test whether that bootnode provides peers
+# The optional third argument is the index of the bootnode in the list of bootnodes, this is just used to pick an ephemeral
+# port for the node to run on. If you're only testing one, it'll just use the first ephemeral port
+# BOOTNODE: /dns/polkadot-connect-0.parity.io/tcp/443/wss/p2p/12D3KooWEPmjoRpDSUuiTjvyNDd8fejZ9eNWH5bE965nyBMDrB4o
+# CHAINSPEC_FILE: /path/to/polkadot.json
+check_bootnode(){
+    BOOTNODE=$1
+    BASE_CHAINSPEC=$2
+    RUNTIME=$(basename "$BASE_CHAINSPEC" | cut -d '.' -f 1)
+    MIN_PEERS=1
+
+    # Generate a temporary chainspec file containing only the bootnode we care about
+    TMP_CHAINSPEC_FILE="$RUNTIME.$(echo "$BOOTNODE" | tr '/' '_').tmp.json"
+    jq ".bootNodes = [\"$BOOTNODE\"] " < "$CHAINSPEC_FILE" > "$TMP_CHAINSPEC_FILE"
+
+    # Grab an unused port by binding to port 0 and then immediately closing the socket
+    # This is a bit of a hack, but it's the only way to do it in the shell
+    RPC_PORT=$(python -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
+
+    echo "[+] Checking bootnode $BOOTNODE"
+    polkadot --chain "$TMP_CHAINSPEC_FILE" --no-mdns --rpc-port="$RPC_PORT" --tmp > /dev/null 2>&1 &
+    # Wait a few seconds for the node to start up
+    sleep 5
+    POLKADOT_PID=$!
+
+    MAX_POLLS=10
+    TIME_BETWEEN_POLLS=3
+    for _ in $(seq 1 "$MAX_POLLS"); do
+    # Check the health endpoint of the RPC node
+      PEERS="$(curl -s -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"system_health","params":[],"id":1}' http://localhost:"$RPC_PORT" | jq -r '.result.peers')"
+      # Sometimes due to machine load or other reasons, we don't get a response from the RPC node
+      # If $PEERS is an empty variable, make it 0 so we can still do the comparison
+      if [ -z "$PEERS" ]; then
+        PEERS=0
+      fi
+      if [ "$PEERS" -ge $MIN_PEERS ]; then
+        echo "[+] $PEERS peers found for $BOOTNODE"
+        echo "    Bootnode appears contactable"
+        kill $POLKADOT_PID
+        # Delete the temporary chainspec file now we're done running the node
+        rm "$TMP_CHAINSPEC_FILE"
+        return 0
+      fi
+      sleep "$TIME_BETWEEN_POLLS"
+    done
+    kill $POLKADOT_PID
+    # Delete the temporary chainspec file now we're done running the node
+    rm "$TMP_CHAINSPEC_FILE"
+    echo "[!] No peers found for $BOOTNODE"
+    echo "    Bootnode appears unreachable"
+    return 1
+}
diff --git a/scripts/ci/github/check_bootnodes.sh b/scripts/ci/github/check_bootnodes.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+# In this script, we check each bootnode for a given chainspec file and ensure they are contactable.
+# We do this by removing every bootnode from the chainspec with the exception of the one
+# we want to check. Then we spin up a node using this new chainspec, wait a little while
+# and then check our local node's RPC endpoint for the number of peers. If the node hasn't
+# been able to contact any other nodes, we can reason that the bootnode we used is not well-connected
+# or is otherwise uncontactable.
+
+# shellcheck source=scripts/ci/common/lib.sh
+source "$(dirname "${0}")/../common/lib.sh"
+CHAINSPEC_FILE="$1"
+RUNTIME=$(basename "$CHAINSPEC_FILE" | cut -d '.' -f 1)
+
+trap cleanup EXIT INT TERM
+
+cleanup(){
+    echo "[+] Script interrupted or ended. Cleaning up..."
+    # Kill all the polkadot processes
+    killall polkadot > /dev/null 2>&1
+    exit $1
+}
+
+# count the number of bootnodes
+BOOTNODES=$( jq -r '.bootNodes | length' "$CHAINSPEC_FILE" )
+# Make a temporary dir for chainspec files
+# Store an array of the bad bootnodes
+BAD_BOOTNODES=()
+GOOD_BOOTNODES=()
+PIDS=()
+
+echo "[+] Checking $BOOTNODES bootnodes for $RUNTIME"
+for i in $(seq 0 $((BOOTNODES-1))); do
+    BOOTNODE=$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )
+    # Check each bootnode in parallel
+    check_bootnode "$BOOTNODE" "$CHAINSPEC_FILE" &
+    PIDS+=($!)
+    # Hold off 5 seconds between attempting to spawn nodes to stop the machine from getting overloaded
+    sleep 5
+done
+RESPS=()
+# Wait for all the nodes to finish
+for pid in "${PIDS[@]}"; do
+    wait "$pid"
+    RESPS+=($?)
+done
+echo
+# For any bootnodes that failed, add them to the bad bootnodes array
+for i in "${!RESPS[@]}"; do
+    if [ "${RESPS[$i]}" -ne 0 ]; then
+        BAD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
+    fi
+done
+# For any bootnodes that succeeded, add them to the good bootnodes array
+for i in "${!RESPS[@]}"; do
+    if [ "${RESPS[$i]}" -eq 0 ]; then
+        GOOD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
+    fi
+done
+
+# If we've got any uncontactable bootnodes for this runtime, print them
+if [ ${#BAD_BOOTNODES[@]} -gt 0 ]; then
+    echo "[!] Bad bootnodes found for $RUNTIME:"
+    for i in "${BAD_BOOTNODES[@]}"; do
+        echo "    $i"
+    done
+    cleanup 1
+else
+    echo "[+] All bootnodes for $RUNTIME are contactable"
+    cleanup 0
+fi
diff --git a/scripts/ci/github/check_new_bootnodes.sh b/scripts/ci/github/check_new_bootnodes.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+set -e
+# shellcheck source=scripts/ci/common/lib.sh
+source "$(dirname "${0}")/../common/lib.sh"
+
+# This script checks any new bootnodes added since the last git commit
+
+RUNTIMES=( kusama westend polkadot )
+
+WAS_ERROR=0
+
+for RUNTIME in "${RUNTIMES[@]}"; do
+    CHAINSPEC_FILE="node/service/chain-specs/$RUNTIME.json"
+    # Get the bootnodes from master's chainspec
+    git show origin/master:"$CHAINSPEC_FILE" | jq '{"oldNodes": .bootNodes}' > "$RUNTIME-old-bootnodes.json"
+    # Get the bootnodes from the current branch's chainspec
+    git show HEAD:"$CHAINSPEC_FILE" | jq '{"newNodes": .bootNodes}' > "$RUNTIME-new-bootnodes.json"
+    # Make a chainspec containing only the new bootnodes
+    jq ".bootNodes = $(jq -rs '.[0] * .[1] | .newNodes-.oldNodes' \
+        "$RUNTIME-new-bootnodes.json" "$RUNTIME-old-bootnodes.json")" \
+        < "node/service/chain-specs/$RUNTIME.json" \
+        > "$RUNTIME-new-chainspec.json"
+    # exit early if the new chainspec has no bootnodes
+    if [ "$(jq -r '.bootNodes | length' "$RUNTIME-new-chainspec.json")" -eq 0 ]; then
+        echo "[+] No new bootnodes for $RUNTIME"
+        # Clean up the temporary files
+        rm "$RUNTIME-new-chainspec.json" "$RUNTIME-old-bootnodes.json" "$RUNTIME-new-bootnodes.json"
+        continue
+    fi
+    # Check the new bootnodes
+    if ! "scripts/ci/github/check_bootnodes.sh" "$RUNTIME-new-chainspec.json"; then
+        WAS_ERROR=1
+    fi
+    # Clean up the temporary files
+    rm "$RUNTIME-new-chainspec.json" "$RUNTIME-old-bootnodes.json" "$RUNTIME-new-bootnodes.json"
+done
+
+
+if [ $WAS_ERROR -eq 1 ]; then
+    echo "[!] One of the new bootnodes failed to connect. Please check logs above."
+    exit 1
+fi