Skip to content

Commit

Permalink
[MM-49378] Added Nginx SLO for Enterprise customer (#823)
Browse files Browse the repository at this point in the history
  • Loading branch information
mirshahriar authored Jan 10, 2023
1 parent bfe42a5 commit 03ab5fe
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 5 deletions.
1 change: 1 addition & 0 deletions cmd/cloud/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ func executeServerCmd(flags serverFlags) error {
NdotsValue: flags.ndotsDefaultValue,
PGBouncerConfig: pgbouncerConfig,
SLOInstallationGroups: flags.sloInstallationGroups,
SLOEnterpriseGroups: flags.sloEnterpriseGroups,
EtcdManagerEnv: etcdManagerEnv,
SLOTargetAvailability: flags.sloTargetAvailability,
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/cloud/server_flag.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ type provisioningParams struct {
s3StateStore string
allowListCIDRRange []string
sloInstallationGroups []string
sloEnterpriseGroups []string
vpnListCIDR []string
useExistingResources bool
deployMySQLOperator bool
Expand All @@ -90,6 +91,7 @@ func (flags *provisioningParams) addFlags(command *cobra.Command) {
command.Flags().StringVar(&flags.s3StateStore, "state-store", "dev.cloud.mattermost.com", "The S3 bucket used to store cluster state.")
command.Flags().StringSliceVar(&flags.allowListCIDRRange, "allow-list-cidr-range", []string{"0.0.0.0/0"}, "The list of CIDRs to allow communication with the private ingress.")
command.Flags().StringSliceVar(&flags.sloInstallationGroups, "slo-installation-groups", []string{}, "The list of installation group ids to create dedicated SLOs for.")
command.Flags().StringSliceVar(&flags.sloEnterpriseGroups, "slo-enterprise-groups", []string{}, "The list of enterprise group ids to create dedicated Nginx SLOs for.")
command.Flags().StringSliceVar(&flags.vpnListCIDR, "vpn-list-cidr", []string{"0.0.0.0/0"}, "The list of VPN CIDRs to allow communication with the clusters.")
command.Flags().BoolVar(&flags.useExistingResources, "use-existing-aws-resources", true, "Whether to use existing AWS resources (VPCs, subnets, etc.) or not.")
command.Flags().BoolVar(&flags.deployMySQLOperator, "deploy-mysql-operator", true, "Whether to deploy the mysql operator.")
Expand Down
41 changes: 36 additions & 5 deletions internal/provisioner/cluster_installation_provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,23 @@ func (provisioner *CommonProvisioner) createClusterInstallation(clusterInstallat
return errors.Wrap(err, "failed to ensure database and filestore")
}

if *installation.GroupID != "" && containsInstallationGroup(*installation.GroupID, provisioner.params.SLOInstallationGroups) {
logger.Debug("Installation belongs in the approved SLO installation group list. Adding SLI")
err = createInstallationSLI(clusterInstallation, k8sClient, logger)
if err != nil {
return errors.Wrap(err, "failed to create installation SLI")
if installation.GroupID != nil && *installation.GroupID != "" {
if containsInstallationGroup(*installation.GroupID, provisioner.params.SLOInstallationGroups) {
logger.Debug("Installation belongs in the approved SLO installation group list. Adding SLI")
err = createInstallationSLI(clusterInstallation, k8sClient, logger)
if err != nil {
return errors.Wrap(err, "failed to create installation SLI")
}
}
if containsInstallationGroup(*installation.GroupID, provisioner.params.SLOEnterpriseGroups) {
logger.Debug("Installation belongs in the approved enterprise installation group list. Adding Nginx SLI")
err = createOrUpdateNginxSLI(clusterInstallation, k8sClient, logger)
if err != nil {
return errors.Wrap(err, "failed to create enterprise nginx SLI")
}
}
}

ctx := context.TODO()
_, err = k8sClient.MattermostClientsetV1Beta.MattermostV1beta1().Mattermosts(clusterInstallation.Namespace).Create(ctx, mattermost, metav1.CreateOptions{})
if err != nil {
Expand Down Expand Up @@ -173,6 +183,11 @@ func hibernateInstallation(configLocation string, logger *log.Entry, clusterInst
if err != nil {
return errors.Wrap(err, "failed to delete installation SLI")
}

if err = ensureNginxSLIDeleted(clusterInstallation, k8sClient, logger); err != nil {
return errors.Wrap(err, "failed to delete enterprise nginx SLI")
}

logger.Info("Updated cluster installation")

return nil
Expand Down Expand Up @@ -328,6 +343,18 @@ func (provisioner *CommonProvisioner) updateClusterInstallation(
}
}

if installation.GroupID != nil && *installation.GroupID != "" && containsInstallationGroup(*installation.GroupID, provisioner.params.SLOEnterpriseGroups) {
logger.Debug("Creating or updating Mattermost Enterprise Nginx SLI")
if err = createOrUpdateNginxSLI(clusterInstallation, k8sClient, logger); err != nil {
return errors.Wrapf(err, "failed to create enterprise nginx SLI %s", getNginxSlothObjectName(clusterInstallation))
}
} else {
logger.Debug("Removing Mattermost Enterprise Nginx SLI")
if err := ensureNginxSLIDeleted(clusterInstallation, k8sClient, logger); err != nil {
return errors.Wrapf(err, "failed to delete enterprise nginx SLI %s", getNginxSlothObjectName(clusterInstallation))
}
}

logger.Info("Updated cluster installation")

return nil
Expand Down Expand Up @@ -399,6 +426,10 @@ func deleteClusterInstallation(
return errors.Wrap(err, "failed to delete installation SLI")
}

if err = ensureNginxSLIDeleted(clusterInstallation, k8sClient, logger); err != nil {
return errors.Wrap(err, "failed to delete enterprise nginx SLI")
}

logger.Info("Successfully deleted cluster installation")

return nil
Expand Down
1 change: 1 addition & 0 deletions internal/provisioner/kops_provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type ProvisioningParams struct {
NdotsValue string
PGBouncerConfig *PGBouncerConfig
SLOInstallationGroups []string
SLOEnterpriseGroups []string
EtcdManagerEnv map[string]string
SLOTargetAvailability float64
}
Expand Down
82 changes: 82 additions & 0 deletions internal/provisioner/nginx_sli.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
//

package provisioner

import (
"context"
"fmt"
"time"

"github.com/mattermost/mattermost-cloud/k8s"
"github.com/mattermost/mattermost-cloud/model"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
slothv1 "github.com/slok/sloth/pkg/kubernetes/api/sloth/v1"
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func getNginxSlothObjectName(clusterInstallation *model.ClusterInstallation) string {
return fmt.Sprintf("slo-nginx-my-enterpise-%s", clusterInstallation.InstallationID)
}

func makeNginxSLI(clusterInstallation *model.ClusterInstallation) slothv1.PrometheusServiceLevel {
pslName := getNginxSlothObjectName(clusterInstallation)
serviceName := makeClusterInstallationName(clusterInstallation)

sli := slothv1.PrometheusServiceLevel{
ObjectMeta: metav1.ObjectMeta{
Name: pslName,
Labels: map[string]string{
"app": "kube-prometheus-stack",
"release": "prometheus-operator",
},
Namespace: prometheusNamespace,
},
Spec: slothv1.PrometheusServiceLevelSpec{
Service: fmt.Sprintf("nginx-%s-service", clusterInstallation.InstallationID),
Labels: map[string]string{
"owner": "sre-team",
},
SLOs: []slothv1.SLO{
{
Name: "requests-availability",
Objective: 99.5,
Description: "Common SLO based on availability for HTTP request responses measured on ingress layer.",
SLI: slothv1.SLI{Events: &slothv1.SLIEvents{
ErrorQuery: "sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_service='" + serviceName + "',status=~'(5..|429|499)'}[{{.window}}]))",
TotalQuery: "sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_service='" + serviceName + "'}[{{.window}}]))",
}},
Alerting: slothv1.Alerting{
PageAlert: slothv1.Alert{Disable: true},
TicketAlert: slothv1.Alert{Disable: true},
},
}},
},
}

return sli
}

func createOrUpdateNginxSLI(clusterInstallation *model.ClusterInstallation, k8sClient *k8s.KubeClient, logger log.FieldLogger) error {
sli := makeNginxSLI(clusterInstallation)
return createOrUpdateClusterPrometheusServiceLevel(sli, k8sClient, logger)
}

func ensureNginxSLIDeleted(clusterInstallation *model.ClusterInstallation, k8sClient *k8s.KubeClient, logger log.FieldLogger) error {
pslName := getNginxSlothObjectName(clusterInstallation)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
defer cancel()
_, err := k8sClient.SlothClientsetV1.SlothV1().PrometheusServiceLevels(prometheusNamespace).Get(ctx, pslName, metav1.GetOptions{})
if err != nil && k8sErrors.IsNotFound(err) {
logger.Debugf("sloth CRD doesn't exist on cluster: %s", err)
return nil
}
err = k8sClient.SlothClientsetV1.SlothV1().PrometheusServiceLevels(prometheusNamespace).Delete(ctx, pslName, metav1.DeleteOptions{})
if err != nil {
return errors.Wrap(err, "failed to delete enterprise nginx sli")
}
return nil
}

0 comments on commit 03ab5fe

Please sign in to comment.