Skip to content

Commit

Permalink
CLD-3770 Merge master
Browse files Browse the repository at this point in the history
  • Loading branch information
Stylianos Rigas committed Aug 1, 2022
2 parents 4f8fa99 + 975fca4 commit 704b01f
Show file tree
Hide file tree
Showing 29 changed files with 719 additions and 185 deletions.
97 changes: 55 additions & 42 deletions cmd/cloud/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,15 @@ func init() {
serverCmd.PersistentFlags().Bool("installation-db-restoration-supervisor", false, "Whether this server will run an installation db restoration supervisor or not.")
serverCmd.PersistentFlags().Bool("installation-db-migration-supervisor", false, "Whether this server will run an installation db migration supervisor or not.")

// Scheduling and installation options
// Scheduling options
serverCmd.PersistentFlags().Bool("balanced-installation-scheduling", false, "Whether to schedule installations on the cluster with the greatest percentage of available resources or not. (slows down scheduling speed as cluster count increases)")
serverCmd.PersistentFlags().Int("cluster-resource-threshold", 80, "The percent threshold where new installations won't be scheduled on a multi-tenant cluster.")
serverCmd.PersistentFlags().Int("cluster-resource-threshold-scale-value", 0, "The number of worker nodes to scale up by when the threshold is passed. Set to 0 for no scaling. Scaling will never exceed the cluster max worker configuration value.")
serverCmd.PersistentFlags().Int("cluster-resource-threshold", 80, "The percent threshold where new installations won't be scheduled on a multi-tenant cluster.")
serverCmd.PersistentFlags().Int("cluster-resource-threshold-cpu-override", 0, "The cluster-resource-threshold override value for CPU resources only")
serverCmd.PersistentFlags().Int("cluster-resource-threshold-memory-override", 0, "The cluster-resource-threshold override value for memory resources only")
serverCmd.PersistentFlags().Int("cluster-resource-threshold-pod-count-override", 0, "The cluster-resource-threshold override value for pod count only")

// Installation options
serverCmd.PersistentFlags().Bool("use-existing-aws-resources", true, "Whether to use existing AWS resources (VPCs, subnets, etc.) or not.")
serverCmd.PersistentFlags().Bool("keep-database-data", true, "Whether to preserve database data after installation deletion or not.")
serverCmd.PersistentFlags().Bool("keep-filestore-data", true, "Whether to preserve filestore data after installation deletion or not.")
Expand All @@ -99,6 +104,7 @@ func init() {
serverCmd.PersistentFlags().Int("max-client-connections", 20000, "The db proxy max client connections.")
serverCmd.PersistentFlags().String("kubecost-token", "", "Set a kubecost token")
serverCmd.PersistentFlags().String("ndots-value", "5", "The default ndots value for installations.")
serverCmd.PersistentFlags().Bool("disable-db-init-check", false, "Whether to disable init container with database check.")

// DB clusters utilization configuration
serverCmd.PersistentFlags().Int("max-installations-rds-postgres-pgbouncer", toolsAWS.DefaultRDSMultitenantPGBouncerDatabasePostgresCountLimit, "Max installations per DB cluster of type RDS Postgres PGbouncer")
Expand Down Expand Up @@ -217,13 +223,16 @@ var serverCmd = &cobra.Command{
}

// TODO: move these cluster threshold values to cluster configuration.
balancedInstallationScheduling, _ := command.Flags().GetBool("balanced-installation-scheduling")
clusterResourceThreshold, _ := command.Flags().GetInt("cluster-resource-threshold")
if clusterResourceThreshold < 10 || clusterResourceThreshold > 100 {
return errors.Errorf("cluster-resource-threshold (%d) must be set between 10 and 100", clusterResourceThreshold)
}
thresholdCPUOverride, _ := command.Flags().GetInt("cluster-resource-threshold-cpu-override")
thresholdMemoryOverride, _ := command.Flags().GetInt("cluster-resource-threshold-memory-override")
thresholdPodCountOverride, _ := command.Flags().GetInt("cluster-resource-threshold-pod-count-override")
clusterResourceThresholdScaleValue, _ := command.Flags().GetInt("cluster-resource-threshold-scale-value")
if clusterResourceThresholdScaleValue < 0 || clusterResourceThresholdScaleValue > 10 {
return errors.Errorf("cluster-resource-threshold-scale-value (%d) must be set between 0 and 10", clusterResourceThresholdScaleValue)
installationScheduling := supervisor.NewInstallationSupervisorSchedulingOptions(balancedInstallationScheduling, clusterResourceThreshold, thresholdCPUOverride, thresholdMemoryOverride, thresholdPodCountOverride, clusterResourceThresholdScaleValue)
err = installationScheduling.Validate()
if err != nil {
return errors.Wrap(err, "invalid installation scheduling options")
}

clusterSupervisor, _ := command.Flags().GetBool("cluster-supervisor")
Expand Down Expand Up @@ -252,15 +261,16 @@ var serverCmd = &cobra.Command{
keepDatabaseData, _ := command.Flags().GetBool("keep-database-data")
keepFilestoreData, _ := command.Flags().GetBool("keep-filestore-data")
useExistingResources, _ := command.Flags().GetBool("use-existing-aws-resources")
balancedInstallationScheduling, _ := command.Flags().GetBool("balanced-installation-scheduling")
backupRestoreToolImage, _ := command.Flags().GetString("backup-restore-tool-image")
backupJobTTL, _ := command.Flags().GetInt32("backup-job-ttl-seconds")

deployMySQLOperator, _ := command.Flags().GetBool("deploy-mysql-operator")
deployMinioOperator, _ := command.Flags().GetBool("deploy-minio-operator")
ndotsDefaultValue, _ := command.Flags().GetString("ndots-value")
model.SetDeployOperators(deployMySQLOperator, deployMinioOperator)

ndotsDefaultValue, _ := command.Flags().GetString("ndots-value")
disableDBInitCheck, _ := command.Flags().GetBool("disable-db-init-check")

wd, err := os.Getwd()
if err != nil {
wd = "error getting working directory"
Expand All @@ -277,36 +287,40 @@ var serverCmd = &cobra.Command{
}

logger.WithFields(logrus.Fields{
"build-hash": model.BuildHash,
"cluster-supervisor": clusterSupervisor,
"group-supervisor": groupSupervisor,
"installation-supervisor": installationSupervisor,
"cluster-installation-supervisor": clusterInstallationSupervisor,
"backup-supervisor": backupSupervisor,
"import-supervisor": importSupervisor,
"installation-db-restoration-supervisor": installationDBRestorationSupervisor,
"installation-db-migration-supervisor": installationDBMigrationSupervisor,
"store-version": currentVersion,
"state-store": s3StateStore,
"working-directory": wd,
"balanced-installation-scheduling": balancedInstallationScheduling,
"cluster-resource-threshold": clusterResourceThreshold,
"cluster-resource-threshold-scale-value": clusterResourceThresholdScaleValue,
"use-existing-aws-resources": useExistingResources,
"keep-database-data": keepDatabaseData,
"keep-filestore-data": keepFilestoreData,
"force-cr-upgrade": forceCRUpgrade,
"backup-restore-tool-image": backupRestoreToolImage,
"backup-job-ttl-seconds": backupJobTTL,
"debug": debugMode,
"dev-mode": devMode,
"deploy-mysql-operator": deployMySQLOperator,
"deploy-minio-operator": deployMinioOperator,
"ndots-value": ndotsDefaultValue,
"maxDatabaseConnectionsPerPool": maxDatabaseConnectionsPerPool,
"defaultPoolSize": defaultPoolSize,
"minPoolSize": minPoolSize,
"maxClientConnections": maxClientConnections,
"build-hash": model.BuildHash,
"cluster-supervisor": clusterSupervisor,
"group-supervisor": groupSupervisor,
"installation-supervisor": installationSupervisor,
"cluster-installation-supervisor": clusterInstallationSupervisor,
"backup-supervisor": backupSupervisor,
"import-supervisor": importSupervisor,
"installation-db-restoration-supervisor": installationDBRestorationSupervisor,
"installation-db-migration-supervisor": installationDBMigrationSupervisor,
"store-version": currentVersion,
"state-store": s3StateStore,
"working-directory": wd,
"balanced-installation-scheduling": balancedInstallationScheduling,
"cluster-resource-threshold": clusterResourceThreshold,
"cluster-resource-threshold-cpu-override": thresholdCPUOverride,
"cluster-resource-threshold-memory-override": thresholdMemoryOverride,
"cluster-resource-threshold-pod-count-override": thresholdPodCountOverride,
"cluster-resource-threshold-scale-value": clusterResourceThresholdScaleValue,
"use-existing-aws-resources": useExistingResources,
"keep-database-data": keepDatabaseData,
"keep-filestore-data": keepFilestoreData,
"force-cr-upgrade": forceCRUpgrade,
"backup-restore-tool-image": backupRestoreToolImage,
"backup-job-ttl-seconds": backupJobTTL,
"debug": debugMode,
"dev-mode": devMode,
"deploy-mysql-operator": deployMySQLOperator,
"deploy-minio-operator": deployMinioOperator,
"ndots-value": ndotsDefaultValue,
"maxDatabaseConnectionsPerPool": maxDatabaseConnectionsPerPool,
"defaultPoolSize": defaultPoolSize,
"minPoolSize": minPoolSize,
"maxClientConnections": maxClientConnections,
"disable-db-init-check": disableDBInitCheck,
}).Info("Starting Mattermost Provisioning Server")

deprecationWarnings(logger, command)
Expand Down Expand Up @@ -339,7 +353,7 @@ var serverCmd = &cobra.Command{
return errors.Wrap(err, "failed health check")
}

resourceUtil := utils.NewResourceUtil(instanceID, awsClient, dbClusterUtilizationSettingsFromFlags(command))
resourceUtil := utils.NewResourceUtil(instanceID, awsClient, dbClusterUtilizationSettingsFromFlags(command), disableDBInitCheck)

provisioningParams := provisioner.ProvisioningParams{
S3StateStore: s3StateStore,
Expand Down Expand Up @@ -395,8 +409,7 @@ var serverCmd = &cobra.Command{
multiDoer = append(multiDoer, supervisor.NewGroupSupervisor(sqlStore, eventsProducer, instanceID, logger))
}
if installationSupervisor {
scheduling := supervisor.NewInstallationSupervisorSchedulingOptions(balancedInstallationScheduling, clusterResourceThreshold, clusterResourceThresholdScaleValue)
multiDoer = append(multiDoer, supervisor.NewInstallationSupervisor(sqlStore, kopsProvisioner, awsClient, instanceID, keepDatabaseData, keepFilestoreData, scheduling, resourceUtil, logger, cloudMetrics, eventsProducer, forceCRUpgrade, cloudflareClient))
multiDoer = append(multiDoer, supervisor.NewInstallationSupervisor(sqlStore, kopsProvisioner, awsClient, instanceID, keepDatabaseData, keepFilestoreData, installationScheduling, resourceUtil, logger, cloudMetrics, eventsProducer, forceCRUpgrade, cloudflareClient))
}
if clusterInstallationSupervisor {
multiDoer = append(multiDoer, supervisor.NewClusterInstallationSupervisor(sqlStore, kopsProvisioner, awsClient, eventsProducer, instanceID, logger, cloudMetrics))
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ require (
github.com/jmoiron/sqlx v1.2.0
github.com/lib/pq v1.10.4
github.com/mattermost/awat v0.0.0-20210616202500-f0bdd4f43f90
github.com/mattermost/mattermost-operator v1.18.0
github.com/mattermost/mattermost-operator v1.19.0-rc.1
github.com/mattermost/rotator v0.2.0
github.com/mattn/go-sqlite3 v2.0.3+incompatible
github.com/olekukonko/tablewriter v0.0.5
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1107,8 +1107,8 @@ github.com/mattermost/ldap v0.0.0-20191128190019-9f62ba4b8d4d/go.mod h1:HLbgMEI5
github.com/mattermost/ldap v3.0.4+incompatible/go.mod h1:b4reDCcGpBxJ4WX0f224KFY+OR0npin7or7EFpeIko4=
github.com/mattermost/mattermost-cloud v0.39.0/go.mod h1:GbWfZajyp+DvdEpoNfFeJ3ZplwTtDV9pl6PhNnp+wq4=
github.com/mattermost/mattermost-operator v1.12.0/go.mod h1:a6pSJI6bDaIfO65M/Hvuqg8kUNYQQYDHLayoT5XZx5o=
github.com/mattermost/mattermost-operator v1.18.0 h1:knJvxKHy8HzmC7r5gHwqCZ9xSfgVahRyNkyUEElXbaQ=
github.com/mattermost/mattermost-operator v1.18.0/go.mod h1:427nFmeCyiwJ9N0J7hpYJNYE6a3DjPdwl0Vpel3JHVg=
github.com/mattermost/mattermost-operator v1.19.0-rc.1 h1:1iu2rXTDFO+6g2QTPOVxno5ebz1mpi7GyuedEsT2Hqw=
github.com/mattermost/mattermost-operator v1.19.0-rc.1/go.mod h1:427nFmeCyiwJ9N0J7hpYJNYE6a3DjPdwl0Vpel3JHVg=
github.com/mattermost/mattermost-server/v5 v5.23.0/go.mod h1:nMrt08IvThjybZpXPe/nqe/oJuvJxhqKkGI+m7M0R00=
github.com/mattermost/mmetl v0.0.2-0.20210316151859-38824e5f5efd/go.mod h1:w6GNqrudkzs/GddfgqkgUqsXVQ/4ImK5JJfNj5KJeUQ=
github.com/mattermost/rotator v0.2.0 h1:R3dlMHZjGR7t5T2bk76heDwpypl9rd2sZj4GoAZyuWU=
Expand Down
7 changes: 6 additions & 1 deletion internal/api/request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@ func TestNewCreateClusterRequestFromReader(t *testing.T) {
"thanos": {Chart: "10.3.1", ValuesPath: ""},
"teleport-kube-agent": {Chart: "6.2.8", ValuesPath: ""},
"pgbouncer": {Chart: "1.2.0", ValuesPath: ""},
<<<<<<< HEAD
"promtail": {Chart: "6.2.2", ValuesPath: ""},
"rtcd": {Chart: "1.0.0", ValuesPath: ""},
=======
"promtail": {Chart: "4.2.0", ValuesPath: ""},
"rtcd": {Chart: "1.1.0", ValuesPath: ""},
>>>>>>> 975fca4238484ae285fc68b618a77edf0310ac6a
"kubecost": {Chart: "1.88.1", ValuesPath: ""},
"node-problem-detector": {Chart: "2.0.5", ValuesPath: ""},
"metrics-server": {Chart: "3.8.2", ValuesPath: ""},
Expand Down Expand Up @@ -105,7 +110,7 @@ func TestNewCreateClusterRequestFromReader(t *testing.T) {
"teleport-kube-agent": {Chart: "6.2.8", ValuesPath: ""},
"pgbouncer": {Chart: "1.2.0", ValuesPath: ""},
"promtail": {Chart: "6.2.2", ValuesPath: ""},
"rtcd": {Chart: "1.0.0", ValuesPath: ""},
"rtcd": {Chart: "1.1.0", ValuesPath: ""},
"kubecost": {Chart: "1.88.1", ValuesPath: ""},
"node-problem-detector": {Chart: "2.0.5", ValuesPath: ""},
"metrics-server": {Chart: "3.8.2", ValuesPath: ""},
Expand Down
8 changes: 4 additions & 4 deletions internal/mocks/aws-tools/client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 37 additions & 3 deletions internal/provisioner/kops_provisioner_cluster_installation.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import (
"strings"
"time"

"github.com/mattermost/mattermost-operator/apis/mattermost/v1alpha1"
"github.com/mattermost/mattermost-operator/pkg/utils"

mmv1beta1 "github.com/mattermost/mattermost-operator/apis/mattermost/v1beta1"

"github.com/mattermost/mattermost-operator/pkg/resources"
Expand Down Expand Up @@ -95,7 +98,6 @@ func (provisioner *crProvisionerWrapper) CreateClusterInstallation(cluster *mode
Labels: generateClusterInstallationResourceLabels(installation, clusterInstallation),
},
Spec: mmv1beta1.MattermostSpec{
Size: installation.Size,
Version: translateMattermostVersion(installation.Version),
Image: installation.Image,
MattermostEnv: mattermostEnv.ToEnvList(),
Expand All @@ -109,6 +111,11 @@ func (provisioner *crProvisionerWrapper) CreateClusterInstallation(cluster *mode
},
}

err = setMMInstanceSize(installation, mattermost)
if err != nil {
return errors.Wrap(err, "failed to set Mattermost instance size")
}

if installation.State == model.InstallationStateHibernating {
logger.Info("creating hibernated cluster installation")
configureInstallationForHibernation(mattermost, installation, clusterInstallation)
Expand Down Expand Up @@ -266,7 +273,10 @@ func (provisioner *crProvisionerWrapper) UpdateClusterInstallation(cluster *mode
// when the size request change comes in on the API, but would require
// new scheduling logic. For now, take care when resizing.
// TODO: address these issue.
mattermost.Spec.Size = installation.Size // Appropriate replicas and resources will be set by Operator.
err = setMMInstanceSize(installation, mattermost)
if err != nil {
return errors.Wrap(err, "failed to set Mattermost instance size")
}

mattermost.Spec.LicenseSecret = ""
var secretName string
Expand Down Expand Up @@ -543,7 +553,7 @@ func (provisioner *crProvisionerWrapper) IsResourceReady(cluster *model.Cluster,
return false, errors.Wrap(err, "failed to get ClusterInstallation Custom Resource")
}

if cr.Status.State != mmv1beta1.Stable {
if cr.Status.State != mmv1beta1.Stable && cr.Status.State != mmv1beta1.Ready {
return false, nil
}
if cr.Status.ObservedGeneration != 0 {
Expand Down Expand Up @@ -626,6 +636,30 @@ func generateAffinityConfig(installation *model.Installation, clusterInstallatio
}
}

func setMMInstanceSize(installation *model.Installation, mattermost *mmv1beta1.Mattermost) error {
if strings.HasPrefix(installation.Size, model.ProvisionerSizePrefix) {
resSize, err := model.ParseProvisionerSize(installation.Size)
if err != nil {
return errors.Wrap(err, "failed to parse custom installation size")
}
overrideReplicasAndResourcesFromSize(resSize, mattermost)
return nil
}
mattermost.Spec.Size = installation.Size
return nil
}

// This function is adapted from Mattermost Operator, we can make it public
// there to avoid copying.
func overrideReplicasAndResourcesFromSize(size v1alpha1.ClusterInstallationSize, mm *mmv1beta1.Mattermost) {
mm.Spec.Size = ""

mm.Spec.Replicas = utils.NewInt32(size.App.Replicas)
mm.Spec.Scheduling.Resources = size.App.Resources
mm.Spec.FileStore.OverrideReplicasAndResourcesFromSize(size)
mm.Spec.Database.OverrideReplicasAndResourcesFromSize(size)
}

// getMattermostCustomResource gets the cluster installation resource from
// the kubernetes API.
func (provisioner *crProvisionerWrapper) getMattermostCustomResource(cluster *model.Cluster, clusterInstallation *model.ClusterInstallation, logger log.FieldLogger) (*mmv1beta1.Mattermost, error) {
Expand Down
Loading

0 comments on commit 704b01f

Please sign in to comment.