From 7190a67fd7f253d11334106f4736c30c2ee2ba2e Mon Sep 17 00:00:00 2001 From: Ashleigh Brennan Date: Wed, 19 Jan 2022 10:27:17 -0600 Subject: [PATCH] SRVKS-573: Autoscaling docs improvements --- _topic_maps/_topic_map.yml | 8 +- .../serverless-autoscaling-minscale-kn.adoc | 2 +- modules/serverless-enable-scale-to-zero.adoc | 31 ++++++ ...serverless-scale-to-zero-grace-period.adoc | 31 ++++++ .../serverless-admin-autoscaling.adoc | 12 +++ .../serverless-autoscaling-concurrency.adoc | 14 --- .../serverless-autoscaling-developer.adoc | 98 +++++++++++++++++++ .../serverless-autoscaling-scale-bounds.adoc | 71 -------------- .../develop/serverless-autoscaling.adoc | 16 --- 9 files changed, 176 insertions(+), 107 deletions(-) create mode 100644 modules/serverless-enable-scale-to-zero.adoc create mode 100644 modules/serverless-scale-to-zero-grace-period.adoc create mode 100644 serverless/admin_guide/serverless-admin-autoscaling.adoc delete mode 100644 serverless/develop/serverless-autoscaling-concurrency.adoc create mode 100644 serverless/develop/serverless-autoscaling-developer.adoc delete mode 100644 serverless/develop/serverless-autoscaling-scale-bounds.adoc delete mode 100644 serverless/develop/serverless-autoscaling.adoc diff --git a/_topic_maps/_topic_map.yml b/_topic_maps/_topic_map.yml index a73cd6a3373d..631b492a4426 100644 --- a/_topic_maps/_topic_map.yml +++ b/_topic_maps/_topic_map.yml @@ -2931,11 +2931,7 @@ Topics: - Name: Serverless applications File: serverless-applications - Name: Autoscaling - File: serverless-autoscaling - - Name: Scale bounds - File: serverless-autoscaling-scale-bounds - - Name: Concurrency - File: serverless-autoscaling-concurrency + File: serverless-autoscaling-developer - Name: Traffic management File: serverless-traffic-management - Name: Routing @@ -2975,6 +2971,8 @@ Topics: File: serverless-cluster-admin-serving - Name: Configuring the Knative Serving custom resource File: knative-serving-CR-config + - Name: Autoscaling + File: serverless-admin-autoscaling # Ingress options - Name: Integrating Service Mesh with OpenShift Serverless File: serverless-ossm-setup diff --git a/modules/serverless-autoscaling-minscale-kn.adoc b/modules/serverless-autoscaling-minscale-kn.adoc index 93b3e9f8cdf2..5fc0bdf72313 100644 --- a/modules/serverless-autoscaling-minscale-kn.adoc +++ b/modules/serverless-autoscaling-minscale-kn.adoc @@ -11,12 +11,12 @@ You can use the `kn service` command with the `--min-scale` flag to create or mo * Set the minimum number of replicas for the service by using the `--min-scale` flag: + -.Examples [source,terminal] ---- $ kn service create --image --min-scale ---- + +.Example command [source,terminal] ---- $ kn service create example-service --image quay.io/openshift-knative/knative-eventing-sources-event-display:latest --min-scale 2 diff --git a/modules/serverless-enable-scale-to-zero.adoc b/modules/serverless-enable-scale-to-zero.adoc new file mode 100644 index 000000000000..29c4c89fa814 --- /dev/null +++ b/modules/serverless-enable-scale-to-zero.adoc @@ -0,0 +1,31 @@ +// Module included in the following assemblies: +// +// * serverless/admin_guide/serverless-admin-autoscaling.adoc + +[id="serverless-enable-scale-to-zero_{context}"] += Enabling scale-to-zero + +Cluster administrators can enable or disable scale-to-zero globally for the cluster. + +.Prerequisites + +* You have installed {ServerlessOperatorName} and Knative Serving on your cluster. +* You have cluster administrator permissions. +* You are using the default Knative Pod Autoscaler. The scale to zero feature is not available if you are using the Kubernetes Horizontal Pod Autoscaler. + +.Procedure + +* Modify the `enable-scale-to-zero` spec in the `KnativeServing` CR: ++ +[source,yaml] +---- +apiVersion: operator.knative.dev/v1alpha1 +kind: KnativeServing +metadata: + name: knative-serving +spec: + config: + autoscaler: + enable-scale-to-zero: "false" <1> +---- +<1> The `enable-scale-to-zero` spec can be either `"true"` or `"false"`. If set to true, scale-to-zero is enabled. If set to false, applications are scaled down to the configured _minimum scale bound_. The default value is `"true"`. diff --git a/modules/serverless-scale-to-zero-grace-period.adoc b/modules/serverless-scale-to-zero-grace-period.adoc new file mode 100644 index 000000000000..ad05049d3f5c --- /dev/null +++ b/modules/serverless-scale-to-zero-grace-period.adoc @@ -0,0 +1,31 @@ +// Module included in the following assemblies: +// +// * serverless/admin_guide/serverless-admin-autoscaling.adoc + +[id="serverless-scale-to-zero-grace-period_{context}"] += Configuring the scale-to-zero grace period + +This setting specifies an upper bound time limit that Knative waits for scale-from-zero machinery to be in place before the last replica of an application is removed. + +.Prerequisites + +* You have installed {ServerlessOperatorName} and Knative Serving on your cluster. +* You have cluster administrator permissions. +* You are using the default Knative Pod Autoscaler. The scale to zero feature is not available if you are using the Kubernetes Horizontal Pod Autoscaler. + +.Procedure + +* Modify the `scale-to-zero-grace-period` spec in the `KnativeServing` CR: ++ +[source,yaml] +---- +apiVersion: operator.knative.dev/v1alpha1 +kind: KnativeServing +metadata: + name: knative-serving +spec: + config: + autoscaler: + scale-to-zero-grace-period: "30s" <1> +---- +<1> The grace period time in seconds. The default value is 30 seconds. diff --git a/serverless/admin_guide/serverless-admin-autoscaling.adoc b/serverless/admin_guide/serverless-admin-autoscaling.adoc new file mode 100644 index 000000000000..e758f5526c80 --- /dev/null +++ b/serverless/admin_guide/serverless-admin-autoscaling.adoc @@ -0,0 +1,12 @@ +[id="serverless-admin-autoscaling"] += Autoscaling +include::modules/common-attributes.adoc[] +include::modules/serverless-document-attributes.adoc[] +:context: serverless-admin-autoscaling + +toc::[] + +As a cluster administrator, you can set global and per-namespace default configurations for autoscaling features by modifying the `KnativeServing` custom resource (CR). This propagates changes to the relevant config maps. + +include::modules/serverless-enable-scale-to-zero.adoc[leveloffset=+1] +include::modules/serverless-scale-to-zero-grace-period.adoc[leveloffset=+1] diff --git a/serverless/develop/serverless-autoscaling-concurrency.adoc b/serverless/develop/serverless-autoscaling-concurrency.adoc deleted file mode 100644 index 517a831ded94..000000000000 --- a/serverless/develop/serverless-autoscaling-concurrency.adoc +++ /dev/null @@ -1,14 +0,0 @@ -[id="serverless-autoscaling-concurrency"] -= Concurrency -include::modules/common-attributes.adoc[] -include::modules/serverless-document-attributes.adoc[] -:context: serverless-autoscaling-concurrency - -toc::[] - -Concurrency determines the number of simultaneous requests that can be processed by each replica of an application at any given time. - -include::modules/serverless-concurrency-limits.adoc[leveloffset=+1] -include::modules/serverless-concurrency-limits-configure-soft.adoc[leveloffset=+1] -include::modules/serverless-concurrency-limits-configure-hard.adoc[leveloffset=+1] -include::modules/serverless-target-utilization.adoc[leveloffset=+1] diff --git a/serverless/develop/serverless-autoscaling-developer.adoc b/serverless/develop/serverless-autoscaling-developer.adoc new file mode 100644 index 000000000000..932e9f89e87d --- /dev/null +++ b/serverless/develop/serverless-autoscaling-developer.adoc @@ -0,0 +1,98 @@ +[id="serverless-autoscaling-developer"] += Autoscaling +include::modules/common-attributes.adoc[] +include::modules/serverless-document-attributes.adoc[] +:context: serverless-autoscaling-developer + +toc::[] + +Knative Serving provides automatic scaling, or _autoscaling_, for applications to match incoming demand. For example, if an application is receiving no traffic, and scale-to-zero is enabled, Knative Serving scales the application down to zero replicas. If scale-to-zero is disabled, the application is scaled down to the xref:../../serverless/develop/serverless-autoscaling-developer.adoc#serverless-autoscaling-developer-minscale[minimum number of replicas specified for applications on the cluster]. Replicas can also be scaled up to meet demand if traffic to the application increases. + +If Knative autoscaling is enabled for your cluster, you can configure concurrency and scale bounds for your application. + +[NOTE] +==== +Any limits or targets set in the revision template are measured against a single instance of your application. For example, setting the `target` annotation to `50` configures the autoscaler to scale the application so that each revision handles 50 requests at a time. +==== + +[id="serverless-autoscaling-developer-scale-bounds"] +== Scale bounds + +Scale bounds determine the minimum and maximum numbers of replicas that can serve an application at any given time. + +You can set scale bounds for an application to help prevent cold starts or control computing costs. + +[id="serverless-autoscaling-developer-minscale"] +=== Minimum scale bounds + +The minimum number of replicas that can serve an application is determined by the `minScale` annotation. + +The `minScale` value defaults to `0` replicas if the following conditions are met: + +* The `minScale` annotation is not set +* Scaling to zero is enabled +* The class `KPA` is used + +If scale to zero is not enabled, the `minScale` value defaults to `1`. + +// TODO: Document KPA if supported, link to docs about setting class + +// TO DO: +// Add info / links about enabling and disabling autoscaling (admin docs) +// if `enable-scale-to-zero` is set to `false` in the `config-autoscaler` config map. + +.Example service spec with `minScale` spec +[source,yaml] +---- +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: example-service + namespace: default +spec: + template: + metadata: + annotations: + autoscaling.knative.dev/minScale: "0" +... +---- + +include::modules/serverless-autoscaling-minscale-kn.adoc[leveloffset=+3] + +[id="serverless-autoscaling-developer-maxscale"] +=== Maximum scale bounds + +The maximum number of replicas that can serve an application is determined by the `maxScale` annotation. If the `maxScale` annotation is not set, there is no upper limit for the number of replicas created. + +.Example service spec with `maxScale` spec +[source,yaml] +---- +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: example-service + namespace: default +spec: + template: + metadata: + annotations: + autoscaling.knative.dev/maxScale: "10" +... +---- + +include::modules/serverless-autoscaling-maxscale-kn.adoc[leveloffset=+3] + +[id="serverless-autoscaling-developer-concurrency"] +== Concurrency + +Concurrency determines the number of simultaneous requests that can be processed by each replica of an application at any given time. + +include::modules/serverless-concurrency-limits.adoc[leveloffset=+2] +include::modules/serverless-concurrency-limits-configure-soft.adoc[leveloffset=+2] +include::modules/serverless-concurrency-limits-configure-hard.adoc[leveloffset=+2] +include::modules/serverless-target-utilization.adoc[leveloffset=+2] + +[id="additional-resources_serverless-autoscaling-developer"] +== Additional resources + +* Scale-to-zero can be enabled or disabled for the cluster by cluster administrators. For more information, see xref:../../serverless/admin_guide/serverless-admin-autoscaling.adoc#serverless-enable-scale-to-zero_serverless-admin-autoscaling[Enabling scale-to-zero]. diff --git a/serverless/develop/serverless-autoscaling-scale-bounds.adoc b/serverless/develop/serverless-autoscaling-scale-bounds.adoc deleted file mode 100644 index 766cc0846862..000000000000 --- a/serverless/develop/serverless-autoscaling-scale-bounds.adoc +++ /dev/null @@ -1,71 +0,0 @@ -[id="serverless-autoscaling-scale-bounds"] -= Scale bounds -include::modules/common-attributes.adoc[] -include::modules/serverless-document-attributes.adoc[] -:context: serverless-autoscaling-scale-bounds - -toc::[] - -Scale bounds determine the minimum and maximum numbers of replicas that can serve an application at any given time. - -You can set scale bounds for an application to help prevent cold starts or control computing costs. - -[id="serverless-autoscaling-minscale"] -== Minimum scale bounds - -The minimum number of replicas that can serve an application is determined by the `minScale` annotation. - -The `minScale` value defaults to `0` replicas if the following conditions are met: - -* The `minScale` annotation is not set -* Scaling to zero is enabled -* The class `KPA` is used - -If scale to zero is not enabled, the `minScale` value defaults to `1`. - -// TODO: Document KPA if supported, link to docs about setting class - -// TO DO: -// Add info / links about enabling and disabling autoscaling (admin docs) -// if `enable-scale-to-zero` is set to `false` in the `config-autoscaler` config map. - -.Example service spec with `minScale` spec -[source,yaml] ----- -apiVersion: serving.knative.dev/v1 -kind: Service -metadata: - name: example-service - namespace: default -spec: - template: - metadata: - annotations: - autoscaling.knative.dev/minScale: "0" -... ----- - -include::modules/serverless-autoscaling-minscale-kn.adoc[leveloffset=+2] - -[id="serverless-autoscaling-maxscale"] -== Maximum scale bounds - -The maximum number of replicas that can serve an application is determined by the `maxScale` annotation. If the `maxScale` annotation is not set, there is no upper limit for the number of replicas created. - -.Example service spec with `maxScale` spec -[source,yaml] ----- -apiVersion: serving.knative.dev/v1 -kind: Service -metadata: - name: example-service - namespace: default -spec: - template: - metadata: - annotations: - autoscaling.knative.dev/maxScale: "10" -... ----- - -include::modules/serverless-autoscaling-maxscale-kn.adoc[leveloffset=+2] diff --git a/serverless/develop/serverless-autoscaling.adoc b/serverless/develop/serverless-autoscaling.adoc deleted file mode 100644 index 0fd2f435004f..000000000000 --- a/serverless/develop/serverless-autoscaling.adoc +++ /dev/null @@ -1,16 +0,0 @@ -[id="serverless-autoscaling"] -= About autoscaling -include::modules/common-attributes.adoc[] -include::modules/serverless-document-attributes.adoc[] -:context: serverless-autoscaling - -toc::[] - -Knative Serving provides automatic scaling, or _autoscaling_, for applications to match incoming demand. For example, if an application is receiving no traffic, and scale to zero is enabled, Knative Serving scales the application down to zero replicas. If scaling to zero is disabled, the application is scaled down to the minimum number of replicas specified for applications on the cluster. Replicas can also be scaled up to meet demand if traffic to the application increases. - -To enable autoscaling for Knative Serving, you must configure xref:../develop/serverless-autoscaling-concurrency.adoc#serverless-autoscaling-concurrency[concurrency] and xref:../develop/serverless-autoscaling-scale-bounds.adoc#serverless-autoscaling-scale-bounds[scale bounds] for your application. - -[NOTE] -==== -Any limits or targets set in the revision template are measured against a single instance of your application. For example, setting the `target` annotation to `50` configures the autoscaler to scale the application so that each revision handles 50 requests at a time. -====