Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use instance ID as node name when AWS CCM supports it #12862

Merged
merged 2 commits into from
Dec 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/kops-controller/pkg/config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ type ServerOptions struct {
SigningCAs []string `json:"signingCAs"`
// CertNames is the list of active certificate names.
CertNames []string `json:"certNames"`

// UseInstanceIDForNodeName uses the instance ID instead of the hostname for the node name.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently only AWS supports verified nodes, but should we still say that this is AWS specific?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GCE also has a verifier, but it doesn't have this issue of its CCM supporting two node naming conventions.

UseInstanceIDForNodeName bool `json:"useInstanceIDForNodeName,omitempty"`
}

type ServerProviderOptions struct {
Expand Down
2 changes: 1 addition & 1 deletion cmd/kops-controller/pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func (s *Server) bootstrap(w http.ResponseWriter, r *http.Request) {

ctx := r.Context()

id, err := s.verifier.VerifyToken(ctx, r.Header.Get("Authorization"), body)
id, err := s.verifier.VerifyToken(ctx, r.Header.Get("Authorization"), body, s.opt.Server.UseInstanceIDForNodeName)
if err != nil {
klog.Infof("bootstrap %s verify err: %v", r.RemoteAddr, err)
w.WriteHeader(http.StatusForbidden)
Expand Down
3 changes: 3 additions & 0 deletions docs/releases/1.23-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ This is a document to gather the release notes prior to the release.

## Other significant changes

* If the Kubernetes version is 1.23 or later and the external AWS Cloud Controller Manager is
being used, then Kubernetes Node resources will be named after their AWS instance ID instead of their domain name.

# Breaking changes

* Support for Kubernetes version 1.17 has been removed.
Expand Down
3 changes: 2 additions & 1 deletion nodeup/pkg/model/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,5 +623,6 @@ func (b *KubeletBuilder) kubeletNames() ([]string, error) {
return nil, fmt.Errorf("error describing instances: %v", err)
}

return awsup.GetInstanceCertificateNames(result)
useInstanceIDForNodeName := b.Cluster.Spec.ExternalCloudControllerManager != nil && b.IsKubernetesGTE("1.23")
return awsup.GetInstanceCertificateNames(result, useInstanceIDForNodeName)
}
6 changes: 6 additions & 0 deletions pkg/apis/nodeup/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ type Config struct {
APIServerConfig *APIServerConfig `json:",omitempty"`
// NvidiaGPU contains the configuration for nvidia
NvidiaGPU *kops.NvidiaGPUConfig `json:",omitempty"`
// UseInstanceIDForNodeName uses the instance ID instead of the hostname for the node name.
UseInstanceIDForNodeName bool `json:"useInstanceIDForNodeName,omitempty"`
}

// BootConfig is the configuration for the nodeup binary that might be too big to fit in userdata.
Expand Down Expand Up @@ -206,6 +208,10 @@ func NewConfig(cluster *kops.Cluster, instanceGroup *kops.InstanceGroup) (*Confi
config.DefaultMachineType = fi.String(strings.Split(instanceGroup.Spec.MachineType, ",")[0])
}

if cluster.Spec.ExternalCloudControllerManager != nil && cluster.IsKubernetesGTE("1.23") && cluster.Spec.CloudProvider == string(kops.CloudProviderAWS) {
config.UseInstanceIDForNodeName = true
}

return &config, &bootConfig
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/bootstrap/authenticate.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ type VerifyResult struct {

// Verifier verifies authentication credentials for requests.
type Verifier interface {
VerifyToken(ctx context.Context, token string, body []byte) (*VerifyResult, error)
VerifyToken(ctx context.Context, token string, body []byte, useInstanceIDForNodeName bool) (*VerifyResult, error)
}
10 changes: 6 additions & 4 deletions upup/pkg/fi/cloudup/awsup/aws_cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -2071,8 +2071,8 @@ func GetRolesInInstanceProfile(c AWSCloud, profileName string) ([]string, error)
}

// GetInstanceCertificateNames returns the instance hostname and addresses that should go into certificates.
// The first value is the node name and any additional values are IP addresses.
func GetInstanceCertificateNames(instances *ec2.DescribeInstancesOutput) (addrs []string, err error) {
// The first value is the node name and any additional values are the DNS name and IP addresses.
func GetInstanceCertificateNames(instances *ec2.DescribeInstancesOutput, useInstanceIDForNodeName bool) (addrs []string, err error) {
if len(instances.Reservations) != 1 {
return nil, fmt.Errorf("too many reservations returned for the single instance-id")
}
Expand All @@ -2083,9 +2083,11 @@ func GetInstanceCertificateNames(instances *ec2.DescribeInstancesOutput) (addrs

instance := instances.Reservations[0].Instances[0]

name := *instance.PrivateDnsName
if useInstanceIDForNodeName {
addrs = append(addrs, *instance.InstanceId)
}

addrs = append(addrs, name)
addrs = append(addrs, *instance.PrivateDnsName)

// We only use data for the first interface, and only the first IP
for _, iface := range instance.NetworkInterfaces {
Expand Down
4 changes: 2 additions & 2 deletions upup/pkg/fi/cloudup/awsup/aws_verifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ type ResponseMetadata struct {
RequestId string `xml:"RequestId"`
}

func (a awsVerifier) VerifyToken(ctx context.Context, token string, body []byte) (*bootstrap.VerifyResult, error) {
func (a awsVerifier) VerifyToken(ctx context.Context, token string, body []byte, useInstanceIDForNodeName bool) (*bootstrap.VerifyResult, error) {
if !strings.HasPrefix(token, AWSAuthenticationTokenPrefix) {
return nil, fmt.Errorf("incorrect authorization type")
}
Expand Down Expand Up @@ -232,7 +232,7 @@ func (a awsVerifier) VerifyToken(ctx context.Context, token string, body []byte)

instance := instances.Reservations[0].Instances[0]

addrs, err := GetInstanceCertificateNames(instances)
addrs, err := GetInstanceCertificateNames(instances, useInstanceIDForNodeName)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func NewTPMVerifier(opt *gcetpm.TPMVerifierOptions) (bootstrap.Verifier, error)

var _ bootstrap.Verifier = &tpmVerifier{}

func (v *tpmVerifier) VerifyToken(ctx context.Context, authToken string, body []byte) (*bootstrap.VerifyResult, error) {
func (v *tpmVerifier) VerifyToken(ctx context.Context, authToken string, body []byte, useInstanceIDForNodeName bool) (*bootstrap.VerifyResult, error) {
// Reminder: we shouldn't trust any data we get from the client until we've checked the signature (and even then...)
// Thankfully the GCE SDK does seem to escape the parameters correctly, for example.

Expand Down
6 changes: 5 additions & 1 deletion upup/pkg/fi/cloudup/template_functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,10 @@ func (tf *TemplateFunctions) KopsControllerConfig() (string, error) {
Region: tf.Region,
}

if cluster.Spec.ExternalCloudControllerManager != nil && cluster.IsKubernetesGTE("1.23") {
config.Server.UseInstanceIDForNodeName = true
}

case kops.CloudProviderGCE:
c := tf.cloud.(gce.GCECloud)

Expand All @@ -599,7 +603,7 @@ func (tf *TemplateFunctions) KopsControllerConfig() (string, error) {
}
}

if tf.Cluster.Spec.IsKopsControllerIPAM() {
if cluster.Spec.IsKopsControllerIPAM() {
config.EnableCloudIPAM = true
}

Expand Down
15 changes: 10 additions & 5 deletions upup/pkg/fi/nodeup/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ func completeWarmingLifecycleAction(cloud awsup.AWSCloud, modelContext *model.No
}

func evaluateSpec(c *NodeUpCommand, nodeupConfig *nodeup.Config) error {
hostnameOverride, err := evaluateHostnameOverride(api.CloudProviderID(c.cluster.Spec.CloudProvider))
hostnameOverride, err := evaluateHostnameOverride(api.CloudProviderID(c.cluster.Spec.CloudProvider), nodeupConfig.UseInstanceIDForNodeName)
if err != nil {
return err
}
Expand Down Expand Up @@ -477,14 +477,19 @@ func evaluateSpec(c *NodeUpCommand, nodeupConfig *nodeup.Config) error {
return nil
}

func evaluateHostnameOverride(cloudProvider api.CloudProviderID) (string, error) {
func evaluateHostnameOverride(cloudProvider api.CloudProviderID, useInstanceIDForNodeName bool) (string, error) {
switch cloudProvider {
case api.CloudProviderAWS:
hostnameBytes, err := vfs.Context.ReadFile("metadata://aws/meta-data/local-hostname")
source := "local-hostname"
if useInstanceIDForNodeName {
source = "instance-id"
}
nodeNameBytes, err := vfs.Context.ReadFile("metadata://aws/meta-data/" + source)
if err != nil {
return "", fmt.Errorf("error reading local-hostname from AWS metadata: %v", err)
return "", fmt.Errorf("error reading %s from AWS metadata: %v", source, err)
}
return string(hostnameBytes), nil
johngmyers marked this conversation as resolved.
Show resolved Hide resolved
return string(nodeNameBytes), nil

case api.CloudProviderGCE:
// This lets us tolerate broken hostnames (i.e. systemd)
b, err := vfs.Context.ReadFile("metadata://gce/instance/hostname")
Expand Down