Skip to content

Commit

Permalink
cmd/openshift-install/gather: Recognize "connection refused"
Browse files Browse the repository at this point in the history
Before this commit, bootstrap machines that failed to come up would
look like [1]:

  level=info msg="Waiting up to 30m0s for the Kubernetes API at https://api.ci-op-6266tp8r-77109.origin-ci-int-aws.dev.rhcloud.com:6443..."
  level=error msg="Attempted to gather ClusterOperator status after installation failure: listing ClusterOperator objects: Get https://api.ci-op-6266tp8r-77109.origin-ci-int-aws.dev.rhcloud.com:6443/apis/config.openshift.io/v1/clusteroperators: dial tcp 3.221.214.197:6443: connect: connection refused"
  level=info msg="Pulling debug logs from the bootstrap machine"
  level=error msg="Attempted to gather debug logs after installation failure: failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key: dial tcp 3.84.188.207:22: connect: connection refused"
  level=fatal msg="Bootstrap failed to complete: waiting for Kubernetes API: context deadline exceeded"

With this commit, that last error will look like:

  level=error msg="Attempted to gather debug logs after installation failure: failed to connect to the bootstrap machine: dial tcp 3.84.188.207:22: connect: connection refused"

without the unrelated (to this failure mode) distraction about SSH
keys.

[1]: https://prow.svc.ci.openshift.org/view/gcs/origin-ci-test/logs/release-openshift-origin-installer-e2e-aws-upgrade/12076

Updated the commit to match with the latest changes.
  • Loading branch information
wking authored and rna-afk committed May 18, 2020
1 parent 11bfe33 commit e51cdd7
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions cmd/openshift-install/gather.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"path/filepath"
"strconv"
"strings"
"syscall"
"time"

configv1 "github.com/openshift/api/config/v1"
Expand Down Expand Up @@ -142,11 +143,17 @@ func runGatherBootstrapCmd(directory string) error {
func logGatherBootstrap(bootstrap string, port int, masters []string, directory string) error {
logrus.Info("Pulling debug logs from the bootstrap machine")
client, err := ssh.NewClient("core", net.JoinHostPort(bootstrap, strconv.Itoa(port)), gatherBootstrapOpts.sshKeys)
if err != nil && strings.Contains(err.Error(), "ssh: handshake failed: ssh: unable to authenticate") {
return errors.Wrap(err, "failed to create SSH client, ensure the private key is added to your authentication agent (ssh-agent) or specified with the --key parameter")
} else if err != nil {
if err != nil {
if strings.Contains(err.Error(), "ssh: handshake failed: ssh: unable to authenticate") {
return errors.Wrap(err, "failed to create SSH client, ensure the private key is added to your authentication agent (ssh-agent) or specified with the --key parameter")
} else if errors.Is(err, syscall.ECONNREFUSED) {
return errors.Wrap(err, "failed to connect to the bootstrap machine")
} else if len(gatherBootstrapOpts.sshKeys) == 0 {
return errors.Wrap(err, "failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key")
}
return errors.Wrap(err, "failed to create SSH client")
}

gatherID := time.Now().Format("20060102150405")
if err := ssh.Run(client, fmt.Sprintf("/usr/local/bin/installer-gather.sh --id %s %s", gatherID, strings.Join(masters, " "))); err != nil {
return errors.Wrap(err, "failed to run remote command")
Expand Down

0 comments on commit e51cdd7

Please sign in to comment.