Skip to content

Commit

Permalink
Controller manager: add high availability support
Browse files Browse the repository at this point in the history
  • Loading branch information
giorio94 authored and adamjensenbot committed Jun 6, 2022
1 parent 13cef1d commit 8557ff1
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 14 deletions.
21 changes: 14 additions & 7 deletions cmd/liqo-controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"k8s.io/client-go/tools/leaderelection/resourcelock"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
Expand Down Expand Up @@ -97,6 +98,9 @@ func main() {
metricsAddr := flag.String("metrics-address", ":8080", "The address the metric endpoint binds to")
probeAddr := flag.String("health-probe-address", ":8081", "The address the health probe endpoint binds to")

// Leader election
leaderElection := flag.Bool("enable-leader-election", false, "Enable leader election for controller manager")

// Global parameters
resyncPeriod := flag.Duration("resync-period", 10*time.Hour, "The resync period for the informers")
clusterIdentityFlags := argsutils.NewClusterIdentityFlags(true, nil)
Expand Down Expand Up @@ -166,13 +170,16 @@ func main() {
utilruntime.Must(err)

mgr, err := ctrl.NewManager(config, ctrl.Options{
MapperProvider: mapper.LiqoMapperProvider(scheme),
Scheme: scheme,
MetricsBindAddress: *metricsAddr,
HealthProbeBindAddress: *probeAddr,
LeaderElection: false,
LeaderElectionID: "66cf253f.liqo.io",
Port: int(*webhookPort),
MapperProvider: mapper.LiqoMapperProvider(scheme),
Scheme: scheme,
MetricsBindAddress: *metricsAddr,
HealthProbeBindAddress: *probeAddr,
LeaderElection: *leaderElection,
LeaderElectionID: "66cf253f.ctrlmgr.liqo.io",
LeaderElectionNamespace: *liqoNamespace,
LeaderElectionReleaseOnCancel: true,
LeaderElectionResourceLock: resourcelock.LeasesResourceLock,
Port: int(*webhookPort),
NewCache: cache.BuilderWithOptions(cache.Options{
SelectorsByObject: cache.SelectorsByObject{
&corev1.Pod{}: {
Expand Down
2 changes: 1 addition & 1 deletion cmd/liqoctl/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func newInstallCommand(ctx context.Context, f *factory.Factory) *cobra.Command {
cmd.PersistentFlags().Var(&sharingPercentage, "sharing-percentage",
"The maximum percentage of available cluster resources that could be shared with remote clusters (0-100)")
cmd.PersistentFlags().BoolVar(&options.EnableHA, "enable-ha", false,
"Enable the support for high-availability of Liqo components, currently supported by the gateway.")
"Enable the support for high-availability of Liqo components, currently supported by the gateway and the controller manager.")
cmd.PersistentFlags().Var(&reservedSubnets, "reserved-subnets",
"The private CIDRs to be excluded, as already in use (e.g., the subnet of the cluster nodes); PodCIDR and ServiceCIDR shall not be included.")

Expand Down
1 change: 1 addition & 0 deletions deployments/liqo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
| controllerManager.pod.annotations | object | `{}` | controller-manager pod annotations |
| controllerManager.pod.extraArgs | list | `[]` | controller-manager pod extra arguments |
| controllerManager.pod.labels | object | `{}` | controller-manager pod labels |
| controllerManager.replicas | int | `1` | The number of controller-manager instances to run, which can be increased for active/passive high availability. |
| crdReplicator.imageName | string | `"liqo/crd-replicator"` | crdReplicator image repository |
| crdReplicator.pod.annotations | object | `{}` | crdReplicator pod annotations |
| crdReplicator.pod.extraArgs | list | `[]` | crdReplicator pod extra arguments |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ metadata:
{{- include "liqo.labels" $ctrlManagerConfig | nindent 4 }}
name: {{ include "liqo.prefixedName" $ctrlManagerConfig }}
spec:
replicas: 1
replicas: {{ .Values.controllerManager.replicas }}
selector:
matchLabels:
{{- include "liqo.selectorLabels" $ctrlManagerConfig | nindent 6 }}
Expand All @@ -25,6 +25,17 @@ spec:
{{- toYaml .Values.controllerManager.pod.labels | nindent 8 }}
{{- end }}
spec:
{{- if gt .Values.controllerManager.replicas 1.0 }}
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
{{- include "liqo.selectorLabels" $ctrlManagerConfig | nindent 18 }}
topologyKey: kubernetes.io/hostname
{{- end }}
securityContext:
{{- include "liqo.podSecurityContext" . | nindent 8 }}
serviceAccountName: {{ include "liqo.prefixedName" $ctrlManagerConfig }}
Expand Down Expand Up @@ -86,6 +97,9 @@ spec:
{{- $d := dict "commandName" "--cluster-labels" "dictionary" .Values.discovery.config.clusterLabels }}
{{- include "liqo.concatenateMap" $d | nindent 10 }}
{{- end }}
{{- if gt .Values.controllerManager.replicas 1.0 }}
- --enable-leader-election=true
{{- end}}
env:
- name: CLUSTER_ID
valueFrom:
Expand Down
2 changes: 2 additions & 0 deletions deployments/liqo/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ apiServer:
trustedCA: false

controllerManager:
# -- The number of controller-manager instances to run, which can be increased for active/passive high availability.
replicas: 1
pod:
# -- controller-manager pod annotations
annotations: {}
Expand Down
7 changes: 6 additions & 1 deletion docs/installation/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ Alternatively, you can manually specify a desired name with the `--cluster-name`
The following lists the main **customization parameters** exposed by the *liqoctl install* commands, along with a brief description.
Additionally, **arbitrary parameters** available in the Helm *values* file (the full list is provided in the dedicated [repository page](https://github.com/liqotech/liqo/tree/master/deployments/liqo)) can be modified through the `--set` flag, which supports the standard Helm syntax.

### Global

The main global flags, besides those concerning the installation of [development versions](InstallationDevelopmentVersions), include:

* `--enable-ha`: whether to enable the support for **high-availability of the Liqo components**, starting two replicas (in an active/standby configuration) of the **gateway** to ensure no cross-cluster connectivity downtime in case one of the replicas is restarted, as well as of the **controller manager**, which embeds the Liqo control plane logic.

### Control plane

The main control plane flags include:
Expand All @@ -311,7 +317,6 @@ The main networking flags include:

* `--reserved-subnets`: the list of **private CIDRs to be excluded** from the ones used by Liqo to remap remote clusters in case of address conflicts, as already in use (e.g., the subnet of the cluster nodes).
The Pod CIDR and the Service CIDR shall not be manually specified, as automatically included in the reserved list.
* `--enable-ha`: whether to enable the support for **high-availability of the Liqo gateway**, starting two replicas (in an active/standby configuration) to ensure no cross-cluster connectivity downtime in case one of the replicas is restarted.

## Install with Helm

Expand Down
5 changes: 4 additions & 1 deletion pkg/liqo-controller-manager/storageprovisioner/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ type StorageControllerRunnable struct {

// Start starts the runnable and make it run until the context is open.
func (c StorageControllerRunnable) Start(ctx context.Context) error {
c.Ctrl.Run(ctx)
// The Run method blocks forever, regardless of the context status.
// Hence, this is executed in a goroutine, to ensure the method terminates when the context is closed.
go c.Ctrl.Run(ctx)
<-ctx.Done()
return nil
}
7 changes: 4 additions & 3 deletions pkg/liqoctl/install/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,9 @@ func (o *Options) isRelease() bool {
}

func (o *Options) values() map[string]interface{} {
gatewayReplicas := 1
replicas := 1
if o.EnableHA {
gatewayReplicas = 2
replicas = 2
}

return map[string]interface{}{
Expand All @@ -312,6 +312,7 @@ func (o *Options) values() map[string]interface{} {
},

"controllerManager": map[string]interface{}{
"replicas": float64(replicas),
"config": map[string]interface{}{
// The value is converted to float64 to match the type returned by the helm client.
"resourceSharingPercentage": float64(o.SharingPercentage),
Expand All @@ -327,7 +328,7 @@ func (o *Options) values() map[string]interface{} {
},

"gateway": map[string]interface{}{
"replicas": float64(gatewayReplicas),
"replicas": float64(replicas),
},
}
}

0 comments on commit 8557ff1

Please sign in to comment.