Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use etcd-manager for the cilium etcd cluster #8750

Merged
merged 5 commits into from
Apr 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cmd/kops/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,16 @@ func TestPrivateCalico(t *testing.T) {
newIntegrationTest("privatecalico.example.com", "privatecalico").withPrivate().runTestCloudformation(t)
}

func TestPrivateCilium(t *testing.T) {
newIntegrationTest("privatecilium.example.com", "privatecilium").withPrivate().runTestTerraformAWS(t)
newIntegrationTest("privatecilium.example.com", "privatecilium").withPrivate().runTestCloudformation(t)
}

func TestPrivateCiliumAdvanced(t *testing.T) {
newIntegrationTest("privateciliumadvanced.example.com", "privateciliumadvanced").withPrivate().runTestTerraformAWS(t)
newIntegrationTest("privateciliumadvanced.example.com", "privateciliumadvanced").withPrivate().runTestCloudformation(t)
}

// TestPrivateCanal runs the test on a configuration with private topology, canal networking
func TestPrivateCanal(t *testing.T) {
newIntegrationTest("privatecanal.example.com", "privatecanal").withPrivate().runTestTerraformAWS(t)
Expand Down
26 changes: 26 additions & 0 deletions docs/networking.md
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,32 @@ The following command will launch your cluster with desired Cilium configuration
$ kops update cluster myclustername.mydns.io --yes
```

##### Using etcd for agent state sync

By default, Cilium will use CRDs for synchronizing agent state. This can cause performance problems on larger clusters. As of kops 1.18, kops can manage an etcd cluster using etcd-manager dedicated for cilium agent state sync. The [Cilium docs](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-external-etcd/) contains recommendations for this must be enabled.

Add the following to `spec.etcdClusters`:
Make sure `instanceGroup` match the other etcd clusters.

```
- etcdMembers:
- instanceGroup: master-az-1a
name: a
- instanceGroup: master-az-1b
name: b
- instanceGroup: master-az-1c
name: c
name: cilium
```

Then enable etcd as kvstore:

```
networking:
cilium:
etcdManaged: true
```

##### Enabling BPF NodePort

As of Kops 1.18 you can safely enable Cilium NodePort.
Expand Down
4 changes: 3 additions & 1 deletion docs/releases/1.18-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

* Rolling updates now support surging and parallelism within an instance group. For details see [the documentation](../operations/rolling-update.md).

* Cilium CNI can now use AWS networking natively through the AWS ENI IPAM mode. Kops can also run a Kubernetes cluster entirely without kube-proxy using Cilium's BPF NodePort implementation
* Cilium CNI can now use AWS networking natively through the AWS ENI IPAM mode. Kops can also run a Kubernetes cluster entirely without kube-proxy using Cilium's BPF NodePort implementation.

* Cilium CNI can now use a dedicated etcd cluster managed by etcd-manager for synchronizing agent state instead of CRDs.

* The Terraform target now supports Terraform 0.12 syntax (HCL2) by default. See the Required Actions item below.

Expand Down
1 change: 1 addition & 0 deletions nodeup/pkg/model/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go_library(
name = "go_default_library",
srcs = [
"architecture.go",
"cilium.go",
"cloudconfig.go",
"containerd.go",
"context.go",
Expand Down
197 changes: 197 additions & 0 deletions nodeup/pkg/model/cilium.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
/*
Copyright 2020 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package model

import (
"crypto/rsa"
"crypto/x509"
"fmt"
"io/ioutil"
"os"
"path/filepath"

"golang.org/x/sys/unix"
certutil "k8s.io/client-go/util/cert"
"k8s.io/klog"
"k8s.io/kops/pkg/pkiutil"
"k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/nodeup/nodetasks"
)

// CiliumBuilder writes Cilium's assets
type CiliumBuilder struct {
*NodeupModelContext
}

var _ fi.ModelBuilder = &CiliumBuilder{}

// Build is responsible for configuring the network cni
func (b *CiliumBuilder) Build(c *fi.ModelBuilderContext) error {
networking := b.Cluster.Spec.Networking

if networking.Cilium == nil {
return nil
}

if err := b.buildBPFMount(c); err != nil {
return err
}

if networking.Cilium.EtcdManaged {
if err := b.buildCiliumEtcdSecrets(c); err != nil {
return err
}
}

return nil

}

func (b *CiliumBuilder) buildBPFMount(c *fi.ModelBuilderContext) error {

var fsdata unix.Statfs_t
err := unix.Statfs("/sys/fs/bpf", &fsdata)

if err != nil {
return fmt.Errorf("error checking for /sys/fs/bpf: %v", err)
}

// equivalent to unix.BPF_FS_MAGIC in golang.org/x/sys/unix
BPF_FS_MAGIC := uint32(0xcafe4a11)

// systemd v238 includes the bpffs mount by default; and gives an error "has a bad unit file setting" if we try to mount it again (see mount_point_is_api)
alreadyMounted := uint32(fsdata.Type) == BPF_FS_MAGIC

if !alreadyMounted {
unit := s(`
[Unit]
Description=Cilium BPF mounts
Documentation=http://docs.cilium.io/
DefaultDependencies=no
Before=local-fs.target umount.target kubelet.service

[Mount]
What=bpffs
Where=/sys/fs/bpf
Type=bpf

[Install]
WantedBy=multi-user.target
`)

service := &nodetasks.Service{
Name: "sys-fs-bpf.mount",
Definition: unit,
}
service.InitDefaults()
c.AddTask(service)
}

return nil
}

func (b *CiliumBuilder) buildCiliumEtcdSecrets(c *fi.ModelBuilderContext) error {

if b.IsMaster {
d := "/etc/kubernetes/pki/etcd-manager-cilium"

keys := make(map[string]string)
keys["etcd-manager-ca"] = "etcd-manager-ca-cilium"
keys["etcd-peers-ca"] = "etcd-peers-ca-cilium"
keys["etcd-clients-ca"] = "etcd-clients-ca-cilium"

for fileName, keystoreName := range keys {
_, err := b.KeyStore.FindCert(keystoreName)
if err != nil {
return err
}

if err := b.BuildCertificateTask(c, keystoreName, d+"/"+fileName+".crt"); err != nil {
return err
}
if err := b.BuildPrivateKeyTask(c, keystoreName, d+"/"+fileName+".key"); err != nil {
return err
}
}
}

etcdClientsCACertificate, err := b.KeyStore.FindCert("etcd-clients-ca-cilium")
if err != nil {
return err
}

etcdClientsCAPrivateKey, err := b.KeyStore.FindPrivateKey("etcd-clients-ca-cilium")
if err != nil {
return err
}

dir := "/etc/kubernetes/pki/cilium"

if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("error creating directories %q: %v", dir, err)
}

{
p := filepath.Join(dir, "etcd-ca.crt")
certBytes := pkiutil.EncodeCertPEM(etcdClientsCACertificate.Certificate)
if err := ioutil.WriteFile(p, certBytes, 0644); err != nil {
return fmt.Errorf("error writing certificate key file %q: %v", p, err)
}
}

name := "etcd-client"

humanName := dir + "/" + name
privateKey, err := pkiutil.NewPrivateKey()
if err != nil {
return fmt.Errorf("unable to create private key %q: %v", humanName, err)
}
privateKeyBytes := pkiutil.EncodePrivateKeyPEM(privateKey)

certConfig := &certutil.Config{
CommonName: "cilium",
Usages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
}

signingKey, ok := etcdClientsCAPrivateKey.Key.(*rsa.PrivateKey)
if !ok {
return fmt.Errorf("etcd-clients-ca private key had unexpected type %T", etcdClientsCAPrivateKey.Key)
}

klog.Infof("signing certificate for %q", humanName)
cert, err := pkiutil.NewSignedCert(certConfig, privateKey, etcdClientsCACertificate.Certificate, signingKey)
if err != nil {
return fmt.Errorf("error signing certificate for %q: %v", humanName, err)
}

certBytes := pkiutil.EncodeCertPEM(cert)

p := filepath.Join(dir, name)
{
if err := ioutil.WriteFile(p+".crt", certBytes, 0644); err != nil {
return fmt.Errorf("error writing certificate key file %q: %v", p+".crt", err)
}
}

{
if err := ioutil.WriteFile(p+".key", privateKeyBytes, 0600); err != nil {
return fmt.Errorf("error writing private key file %q: %v", p+".key", err)
}
}

return nil
}
41 changes: 0 additions & 41 deletions nodeup/pkg/model/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"fmt"
"path/filepath"

"golang.org/x/sys/unix"
"k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/nodeup/nodetasks"
)
Expand Down Expand Up @@ -67,46 +66,6 @@ func (b *NetworkBuilder) Build(c *fi.ModelBuilderContext) error {
}
}

if networking.Cilium != nil {
var fsdata unix.Statfs_t
err := unix.Statfs("/sys/fs/bpf", &fsdata)

if err != nil {
return fmt.Errorf("error checking for /sys/fs/bpf: %v", err)
}

// equivalent to unix.BPF_FS_MAGIC in golang.org/x/sys/unix
BPF_FS_MAGIC := uint32(0xcafe4a11)

// systemd v238 includes the bpffs mount by default; and gives an error "has a bad unit file setting" if we try to mount it again (see mount_point_is_api)
alreadyMounted := uint32(fsdata.Type) == BPF_FS_MAGIC

if !alreadyMounted {
unit := s(`
[Unit]
Description=Cilium BPF mounts
Documentation=http://docs.cilium.io/
DefaultDependencies=no
Before=local-fs.target umount.target kubelet.service

[Mount]
What=bpffs
Where=/sys/fs/bpf
Type=bpf

[Install]
WantedBy=multi-user.target
`)

service := &nodetasks.Service{
Name: "sys-fs-bpf.mount",
Definition: unit,
}
service.InitDefaults()
c.AddTask(service)
}
}

return nil
}

Expand Down
16 changes: 16 additions & 0 deletions pkg/apis/kops/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,22 @@ func validateNetworkingCilium(c *kops.ClusterSpec, v *kops.CiliumNetworkingSpec,
}
}

if v.EtcdManaged {
hasCiliumCluster := false
for _, cluster := range c.EtcdClusters {
if cluster.Name == "cilium" {
if cluster.Provider == kops.EtcdProviderTypeLegacy {
allErrs = append(allErrs, field.Invalid(fldPath.Root().Child("etcdClusters"), kops.EtcdProviderTypeLegacy, "Legacy etcd provider is not supported for the cilium cluster"))
}
hasCiliumCluster = true
break
}
}
if !hasCiliumCluster {
allErrs = append(allErrs, field.Required(fldPath.Root().Child("etcdClusters"), "Cilium with managed etcd requires a dedicated etcd cluster"))
}
}

return allErrs
}

Expand Down
19 changes: 17 additions & 2 deletions pkg/model/components/etcdmanager/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,15 @@ func (b *EtcdManagerBuilder) Build(c *fi.ModelBuilderContext) error {
}); err != nil {
return err
}

if etcdCluster.Name == "cilium" {
c.AddTask(&fitasks.Keypair{
Name: fi.String("etcd-clients-ca-cilium"),
Subject: "cn=etcd-clients-ca-cilium",
Type: "ca",
Format: format,
})
}
}

return nil
Expand Down Expand Up @@ -278,6 +287,7 @@ func (b *EtcdManagerBuilder) buildPod(etcdCluster *kops.EtcdClusterSpec) (*v1.Po

etcdInsecure := !b.UseEtcdTLS()

clientHost := "__name__"
clientPort := 4001

clusterName := "etcd-" + etcdCluster.Name
Expand Down Expand Up @@ -320,7 +330,12 @@ func (b *EtcdManagerBuilder) buildPod(etcdCluster *kops.EtcdClusterSpec) (*v1.Po
peerPort = 2381
grpcPort = wellknownports.EtcdEventsGRPC
quarantinedClientPort = wellknownports.EtcdEventsQuarantinedClientPort

case "cilium":
clientPort = 4003
peerPort = 2382
grpcPort = wellknownports.EtcdCiliumGRPC
quarantinedClientPort = wellknownports.EtcdCiliumQuarantinedClientPort
clientHost = b.Cluster.Spec.MasterInternalName
default:
return nil, fmt.Errorf("unknown etcd cluster key %q", etcdCluster.Name)
}
Expand Down Expand Up @@ -351,7 +366,7 @@ func (b *EtcdManagerBuilder) buildPod(etcdCluster *kops.EtcdClusterSpec) (*v1.Po
scheme := "https"

config.PeerUrls = fmt.Sprintf("%s://__name__:%d", scheme, peerPort)
config.ClientUrls = fmt.Sprintf("%s://__name__:%d", scheme, clientPort)
config.ClientUrls = fmt.Sprintf("%s://%s:%d", scheme, clientHost, clientPort)
config.QuarantineClientUrls = fmt.Sprintf("%s://__name__:%d", scheme, quarantinedClientPort)

// TODO: We need to wire these into the etcd-manager spec
Expand Down
Loading