From 34c78617c852dc6306f8f076ed813a28ff516cd0 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 21 Oct 2019 10:58:51 -0700 Subject: [PATCH] Add example for lusture max cached size tuning --- .../kubernetes/max_cache_tuning/README.md | 35 +++++++++++++++++++ .../max_cache_tuning/specs/claim.yaml | 11 ++++++ .../max_cache_tuning/specs/pod.yaml | 24 +++++++++++++ .../max_cache_tuning/specs/storageclass.yaml | 10 ++++++ 4 files changed, 80 insertions(+) create mode 100644 examples/kubernetes/max_cache_tuning/README.md create mode 100644 examples/kubernetes/max_cache_tuning/specs/claim.yaml create mode 100644 examples/kubernetes/max_cache_tuning/specs/pod.yaml create mode 100644 examples/kubernetes/max_cache_tuning/specs/storageclass.yaml diff --git a/examples/kubernetes/max_cache_tuning/README.md b/examples/kubernetes/max_cache_tuning/README.md new file mode 100644 index 00000000..eb7743ee --- /dev/null +++ b/examples/kubernetes/max_cache_tuning/README.md @@ -0,0 +1,35 @@ +## Tuning Lustre Max Memory Cache +This example shows how to set lustre `llite.*.max_cached_mb` using init container. Lustre client interacts with lustre kernel module for data caching at host level. Since the cache resides in kernel space, it won't be counted toward application container's memory limit. Sometimes it is desireable to reduce the lustre cache size to limit memory consumption at host level. In this example, the max cache size is set to 32MB, but other values may be selected depending on what makes sense for the workload. + +### Edit [Pod](./specs/pod.yaml) +``` +apiVersion: v1 +kind: Pod +metadata: + name: fsx-app +spec: + initContainers: + - name: set-lustre-cache + image: amazon/aws-fsx-csi-driver:latest + securityContext: + privileged: true + command: ["/sbin/lctl"] + args: ["set_param", "llite.*.max_cached_mb=32"] + containers: + - name: app + image: amazonlinux:2 + command: ["/bin/sh"] + args: ["-c", "sleep 999999"] + volumeMounts: + - name: persistent-storage + mountPath: /data + volumes: + - name: persistent-storage + persistentVolumeClaim: + claimName: fsx-claim +``` +The `fsx-app` pod has an init container that sets `llite.*.max_cached_mb` using `lctl`. + +## Notes +* The aws-fsx-csi-driver image is reused in the init container for the `lctl` command. You could chose your own container image for this purpose as long as the lustre client user space tools `lctl` is available inside the image. +* The init container needs to be privileged as required by `lctl` diff --git a/examples/kubernetes/max_cache_tuning/specs/claim.yaml b/examples/kubernetes/max_cache_tuning/specs/claim.yaml new file mode 100644 index 00000000..dff1481c --- /dev/null +++ b/examples/kubernetes/max_cache_tuning/specs/claim.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: fsx-claim +spec: + accessModes: + - ReadWriteMany + storageClassName: fsx-sc + resources: + requests: + storage: 1200Gi diff --git a/examples/kubernetes/max_cache_tuning/specs/pod.yaml b/examples/kubernetes/max_cache_tuning/specs/pod.yaml new file mode 100644 index 00000000..bf029064 --- /dev/null +++ b/examples/kubernetes/max_cache_tuning/specs/pod.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Pod +metadata: + name: fsx-app +spec: + initContainers: + - name: set-lustre-cache + image: amazon/aws-fsx-csi-driver:latest + securityContext: + privileged: true + command: ["/sbin/lctl"] + args: ["set_param", "llite.*.max_cached_mb=32"] + containers: + - name: app + image: amazonlinux:2 + command: ["/bin/sh"] + args: ["-c", "sleep 999999"] + volumeMounts: + - name: persistent-storage + mountPath: /data + volumes: + - name: persistent-storage + persistentVolumeClaim: + claimName: fsx-claim diff --git a/examples/kubernetes/max_cache_tuning/specs/storageclass.yaml b/examples/kubernetes/max_cache_tuning/specs/storageclass.yaml new file mode 100644 index 00000000..b6dfff28 --- /dev/null +++ b/examples/kubernetes/max_cache_tuning/specs/storageclass.yaml @@ -0,0 +1,10 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: fsx-sc +provisioner: fsx.csi.aws.com +parameters: + subnetId: subnet-0d7b5e117ad7b4961 + securityGroupIds: sg-05a37bfe01467059a +mountOptions: + - flock