Skip to content

Commit

Permalink
🌱 Several small changes to make debugging bare-metal provisioning easier
Browse files Browse the repository at this point in the history
  • Loading branch information
guettli committed Mar 8, 2024
1 parent 503e30e commit 846813d
Show file tree
Hide file tree
Showing 20 changed files with 4,069 additions and 14 deletions.
4 changes: 4 additions & 0 deletions api/v1beta1/conditions_const.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ const (
CloudInitNotInstalledReason = "CloudInitNotInstalled"
// ServerNotFoundReason indicates that a bare metal server could not be found.
ServerNotFoundReason = "ServerNotFound"
// LinuxOnOtherDiskFoundReason indicates that the server can't be provisioned on the given WWN, since the reboot would fail.
LinuxOnOtherDiskFoundReason = "LinuxOnOtherDiskFound"
// SSHToRescueSystemFailedReason indicates that the rescue system can't be reached via ssh.
SSHToRescueSystemFailedReason = "SSHToRescueSystemFailed"
)

const (
Expand Down
18 changes: 14 additions & 4 deletions api/v1beta1/hetznerbaremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,19 @@ type RootDeviceHints struct {

// IsValid checks whether rootDeviceHint is valid.
func (rdh *RootDeviceHints) IsValid() bool {
if rdh.WWN == "" && len(rdh.Raid.WWN) == 0 ||
rdh.WWN != "" && len(rdh.Raid.WWN) > 0 {
return false
return rdh.IsValidWithMessage() == ""
}

// IsValidWithMessage checks whether rootDeviceHint is valid.
// If valid, an empty string gets returned.
func (rdh *RootDeviceHints) IsValidWithMessage() string {
if rdh.WWN == "" && len(rdh.Raid.WWN) == 0 {
return "rootDeviceHint.wwn and rootDeviceHint.raid.wwn are empty. Please specify one or the other."
}
if rdh.WWN != "" && len(rdh.Raid.WWN) > 0 {
return "WWN specified twice (rootDeviceHint.wwn and rootDeviceHint.raid.wwn). Please specify only one or the other."
}
return true
return ""
}

// ListOfWWN gives the list of WWNs - no matter if it's in WWN or Raid.
Expand Down Expand Up @@ -111,6 +119,8 @@ const (
const (
// ErrorMessageMissingRootDeviceHints specifies the error message when no root device hints are specified.
ErrorMessageMissingRootDeviceHints string = "no root device hints specified"
// ErrorMessageInvalidRootDeviceHints specifies the error message when invalid root device hints are specified.
ErrorMessageInvalidRootDeviceHints string = "invalid root device hints specified"
// ErrorMessageMissingHetznerSecret specifies the error message when no Hetzner secret was found.
ErrorMessageMissingHetznerSecret string = "could not find HetznerSecret"
// ErrorMessageMissingRescueSSHSecret specifies the error message when no RescueSSH secret was found.
Expand Down
21 changes: 21 additions & 0 deletions api/v1beta1/hetznerbaremetalmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1beta1
import (
"errors"
"fmt"
"net/url"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -194,6 +195,26 @@ func (image Image) GetDetails() (imagePath string, needsDownload bool, errorMess
return imagePath, needsDownload, errorMessage
}

// String returns a string representation. The password gets redacted from the URL.
func (image Image) String() string {
cleanURL := ""
if image.URL != "" {
u, err := url.Parse(image.URL)
if err != nil {
cleanURL = err.Error()
} else {
cleanURL = u.Redacted()
}
}
if cleanURL == "" {
cleanURL = image.Path
}
if image.Name == "" {
return cleanURL
}
return fmt.Sprintf("%s (%s)", image.Name, cleanURL)
}

// Partition defines the additional Partitions to be created.
type Partition struct {
// Mount defines the mount path for this filesystem.
Expand Down
50 changes: 50 additions & 0 deletions api/v1beta1/hetznerbaremetalmachine_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ package v1beta1

import (
"errors"
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/stretchr/testify/require"
capierrors "sigs.k8s.io/cluster-api/errors"
)

Expand Down Expand Up @@ -235,3 +237,51 @@ var _ = Describe("Test HasHostAnnotation", func() {
}),
)
})

func Test_Image_String(t *testing.T) {
for _, row := range []struct {
image Image
expected string
}{
{
Image{
URL: "",
Name: "",
Path: "",
},
"",
},
{
Image{
URL: "https://user:pwd@example.com/images/Ubuntu-2204-jammy-amd64-custom.tar.gz",
Name: "Ubuntu-2204",
Path: "",
},
"Ubuntu-2204 (https://user:xxxxx@example.com/images/Ubuntu-2204-jammy-amd64-custom.tar.gz)",
},
{
Image{
URL: "https://example.com/foo.tgz",
Name: "foo",
Path: "",
},
"foo (https://example.com/foo.tgz)",
},
{
Image{
URL: "https://example.com/nameless.tgz",
Path: "",
},
"https://example.com/nameless.tgz",
},
{
Image{
Name: "nfs",
Path: "/root/.oldroot/nfs/install/../images/Ubuntu-2004-focal-64-minimal-hwe.tar.gz",
},
"nfs (/root/.oldroot/nfs/install/../images/Ubuntu-2004-focal-64-minimal-hwe.tar.gz)",
},
} {
require.Equal(t, row.expected, row.image.String())
}
}
2 changes: 1 addition & 1 deletion controllers/hetznerbaremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
Name: bmHost.Spec.Status.HetznerClusterRef,
}
if err := r.Client.Get(ctx, hetznerClusterName, hetznerCluster); err != nil {
return reconcile.Result{}, errors.New("HetznerCluster not found")
return reconcile.Result{}, fmt.Errorf("failed to get HetznerCluster: %w", err)
}

log = log.WithValues("HetznerCluster", klog.KObj(hetznerCluster))
Expand Down
1 change: 1 addition & 0 deletions controllers/hetznerbaremetalhost_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -891,4 +891,5 @@ name="eth0" model="Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express
sshClient.On("ExecuteInstallImage", mock.Anything).Return(sshclient.Output{})
sshClient.On("Reboot").Return(sshclient.Output{})
sshClient.On("GetCloudInitOutput").Return(sshclient.Output{StdOut: "dummy content of /var/log/cloud-init-output.log"})
sshClient.On("DetectLinuxOnAnotherDisk", mock.Anything).Return(sshclient.Output{})
}
15 changes: 14 additions & 1 deletion hack/filter-caph-controller-manager-logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

keys_to_skip = ['controller', 'controllerGroup', 'controllerKind', 'reconcileID',
'HetznerCluster', 'Cluster',
'namespace', 'name', 'Machine', 'stack', 'stacktrace']
'namespace', 'name', 'Machine', 'stack', 'stacktrace',
'logger',
]

rows_to_skip = [
'controller-runtime.webhook', 'certwatcher/certwatcher', 'Registering a validating webhook',
Expand All @@ -32,6 +34,9 @@
'"Starting reconciling cluster"',
'"Completed function"',
'"Adding request."',
'Update to resource only changes insignificant fields',
'"approved csr"',
'"Registering webhook"',
]

def main():
Expand Down Expand Up @@ -64,9 +69,17 @@ def handle_line(line):
t = data.pop('time', '')
t = re.sub(r'^.*T(.+)*\..+$', r'\1', t) # '2023-04-17T12:12:53.423Z

# skip too long entries
for key, value in list(data.items()):
if not isinstance(value, str):
continue
if len(value) > 1_000:
data[key] = value[:1_000] + "...cut..."

level = data.pop('level', '').ljust(5)
file = data.pop('file', '')
message = data.pop('message', '')

if not data:
data=''

Expand Down
42 changes: 42 additions & 0 deletions pkg/services/baremetal/client/mocks/ssh/Client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

76 changes: 76 additions & 0 deletions pkg/services/baremetal/client/ssh/detect-linux-on-another-disk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# lsblk from util-linux 2.34 (Ubuntu 20.04) does not know column PARTTYPENAME

set -euo pipefail

trap 'echo "Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0")"; exit 3' ERR

function usage() {
echo "$0 wwn1 [wwn2 ...]"
echo " Check if there is a Linux partition, but skip all WWNs given as arguments"
echo " Background: If we provision a disk, then there must not be a Linux OS on another partition"
echo " otherwise it is likely that the old OS gets booted, and not the new OS."
echo " Exit 0: If there is no Linux installation found."
echo " Exit 1: There is a Linux on a different disk.".
echo " Exit 3: Unexpected error."
echo "Existing WWNs:"
lsblk -oNAME,WWN | grep -vi loop || true
}

if [ $# -eq 0 ]; then
echo "Error: No WWN was provided."
echo
usage
exit 3
fi

# Iterate over all input arguments
for wwn in "$@"; do
if ! lsblk -l -oWWN | grep -qP '^'${wwn}'$'; then
echo "$wwn is not a WWN of this machine"
echo
usage
exit 3
fi
done
fail=0

lines=$(lsblk -r -oNAME,WWN,TYPE)

while read name wwn type; do
if [[ " $* " == *" $wwn "* ]]; then
#echo "ok: skipping $name $wwn, since it was an argument to the script."
continue
fi
root_directory_content=$(grub-fstest /dev/$name ls / 2>/dev/null || true | tr ' ' '\n' | sort | tr '\n' ' ')
if [[ $root_directory_content =~ .*boot/.*etc/.* ]]; then
echo "FAIL: $name $wwn looks like a Linux root partition on another disk."
fail=1
continue
fi
if [[ $root_directory_content =~ .*initrd.*vmlinuz.* ]]; then
echo "FAIL: $name $wwn looks like a Linux /boot partition on another disk."
fail=1
continue
fi
#echo "ok: $name $wwn $parttype, does not look like root Linux partition."
done < <(echo "$lines" | grep -v NAME | grep -i part)
if [ $fail -eq 1 ]; then
exit 1
fi
echo "Looks good. No Linux root partition on another devices"

12 changes: 12 additions & 0 deletions pkg/services/baremetal/client/ssh/ssh_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package sshclient
import (
"bufio"
"bytes"
_ "embed"
"encoding/base64"
"errors"
"fmt"
Expand All @@ -35,6 +36,9 @@ const (
sshTimeOut time.Duration = 5 * time.Second
)

//go:embed detect-linux-on-another-disk.sh
var detectLinuxOnAnotherDiskShellScript string

var downloadFromOciShellScript = `#!/bin/bash
# Copyright 2023 The Kubernetes Authors.
Expand Down Expand Up @@ -200,6 +204,7 @@ type Client interface {
CleanCloudInitInstances() Output
ResetKubeadm() Output
UntarTGZ() Output
DetectLinuxOnAnotherDisk(sliceOfWwns []string) Output
}

// Factory is the interface for creating new Client objects.
Expand Down Expand Up @@ -482,6 +487,13 @@ func (c *sshClient) ResetKubeadm() Output {
return output
}

func (c *sshClient) DetectLinuxOnAnotherDisk(sliceOfWwns []string) Output {
return c.runSSH(fmt.Sprintf(`cat <<'EOF' | bash -s -- %s
%s
EOF
`, strings.Join(sliceOfWwns, " "), detectLinuxOnAnotherDiskShellScript))
}

func (c *sshClient) UntarTGZ() Output {
fileName := "/installimage.tgz"
data, err := os.ReadFile(fileName)
Expand Down
6 changes: 3 additions & 3 deletions pkg/services/baremetal/client/ssh/ssh_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ func Test_removeUselessLinesFromCloudInitOutput(t *testing.T) {
want string
}{
{
name: "ignore: 10000K .......... .......... .......... .......... .......... 6%!M(MISSING) 1s",
name: "ignore: 10000K ...",
s: "foo\n 10000K .......... .......... .......... .......... .......... 6%!M(MISSING) 1s\nbar",
want: "foo\nbar",
},
{
name: "ignore: ^10000K .......... .......... .......... .......... .......... 6%!M(MISSING) 1s",
name: "ignore: ^10000K ...2",
s: "foo\n10000K .......... .......... .......... .......... .......... 6%!M(MISSING) 1s\nbar",
want: "foo\nbar",
},
{
name: "ignore: Get:17 http://archive.ubuntu.com/ubuntu focal/universe Translation-en [5,124 kB[]",
name: "ignore: Get:17 http://...",
s: "foo\nGet:17 http://archive.ubuntu.com/ubuntu focal/universe Translation-en [5,124 kB[]\nbar",
want: "foo\nbar",
},
Expand Down

0 comments on commit 846813d

Please sign in to comment.