From 8fd2c3ac201bad5e9ea4306e2b3b9a90b83b4904 Mon Sep 17 00:00:00 2001 From: Alexander Maslennikov Date: Tue, 17 Jun 2025 13:29:41 +0200 Subject: [PATCH] feat: add a quick-start doc article * Add a new quick start section with (almost) ready-to-use deployment examples * Add keywords to use case definitions for ease of future automated parsing * Store examples as separate files and build them to allow automated deployments * Rename getting started with k8s to deployment guide and put it on the same level with quick start guide Signed-off-by: Alexander Maslennikov --- Makefile | 38 +++- ...es.rst => deployment-guide-kubernetes.rst} | 9 +- docs/getting-started-with-kubernetes.rst | 31 +++ docs/index.rst | 4 +- docs/overview.rst | 39 ++++ docs/quick-start/host-device-rdma.rst | 71 +++++++ docs/quick-start/ipoib-rdma-shared.rst | 78 ++++++++ docs/quick-start/macvlan-rdma-shared.rst | 74 +++++++ docs/quick-start/quick-start-k8s.rst | 143 ++++++++++++++ docs/quick-start/sriov-ib-rdma.rst | 81 ++++++++ docs/quick-start/sriov-network-rdma.rst | 81 ++++++++ .../host-device-rdma/10-nicclusterpolicy.yaml | 37 ++++ .../processed/host-device-rdma/20-ippool.yaml | 9 + .../30-hostdevicenetwork.yaml | 12 ++ .../processed/host-device-rdma/40-pod.yaml | 19 ++ .../processed/host-device-rdma/complete.yaml | 80 ++++++++ .../10-nicclusterpolicy.yaml | 56 ++++++ .../ipoib-rdma-shared/20-ippool.yaml | 29 +++ .../ipoib-rdma-shared/30-ipoibnetwork.yaml | 38 ++++ .../processed/ipoib-rdma-shared/40-pod.yaml | 59 ++++++ .../processed/ipoib-rdma-shared/complete.yaml | 185 ++++++++++++++++++ .../10-nicclusterpolicy.yaml | 36 ++++ .../macvlan-rdma-shared/20-ippool.yaml | 9 + .../30-macvlannetwork.yaml | 14 ++ .../processed/macvlan-rdma-shared/40-pod.yaml | 19 ++ .../macvlan-rdma-shared/complete.yaml | 81 ++++++++ .../sriov-ib-rdma/10-nicclusterpolicy.yaml | 24 +++ .../processed/sriov-ib-rdma/20-ippool.yaml | 9 + .../30-sriovnetworknodepolicy.yaml | 17 ++ .../sriov-ib-rdma/40-sriovibnetwork.yaml | 14 ++ examples/processed/sriov-ib-rdma/50-pod.yaml | 19 ++ .../processed/sriov-ib-rdma/complete.yaml | 87 ++++++++ .../10-nicclusterpolicy.yaml | 23 +++ .../sriov-network-rdma/20-ippool.yaml | 9 + .../30-sriovnetworknodepolicy.yaml | 16 ++ .../sriov-network-rdma/40-sriovnetwork.yaml | 13 ++ .../processed/sriov-network-rdma/50-pod.yaml | 19 ++ .../sriov-network-rdma/complete.yaml | 84 ++++++++ .../host-device-rdma/10-nicclusterpolicy.yaml | 37 ++++ .../templates/host-device-rdma/20-ippool.yaml | 9 + .../30-hostdevicenetwork.yaml | 12 ++ .../templates/host-device-rdma/40-pod.yaml | 19 ++ .../10-nicclusterpolicy.yaml | 56 ++++++ .../ipoib-rdma-shared/20-ippool.yaml | 29 +++ .../ipoib-rdma-shared/30-ipoibnetwork.yaml | 38 ++++ .../templates/ipoib-rdma-shared/40-pod.yaml | 59 ++++++ .../10-nicclusterpolicy.yaml | 36 ++++ .../macvlan-rdma-shared/20-ippool.yaml | 9 + .../30-macvlannetwork.yaml | 14 ++ .../templates/macvlan-rdma-shared/40-pod.yaml | 19 ++ .../sriov-ib-rdma/10-nicclusterpolicy.yaml | 24 +++ .../templates/sriov-ib-rdma/20-ippool.yaml | 9 + .../30-sriovnetworknodepolicy.yaml | 17 ++ .../sriov-ib-rdma/40-sriovibnetwork.yaml | 14 ++ examples/templates/sriov-ib-rdma/50-pod.yaml | 19 ++ .../10-nicclusterpolicy.yaml | 23 +++ .../sriov-network-rdma/20-ippool.yaml | 9 + .../30-sriovnetworknodepolicy.yaml | 16 ++ .../sriov-network-rdma/40-sriovnetwork.yaml | 13 ++ .../templates/sriov-network-rdma/50-pod.yaml | 19 ++ 60 files changed, 2155 insertions(+), 11 deletions(-) rename docs/{getting-started-kubernetes.rst => deployment-guide-kubernetes.rst} (99%) create mode 100644 docs/getting-started-with-kubernetes.rst create mode 100644 docs/quick-start/host-device-rdma.rst create mode 100644 docs/quick-start/ipoib-rdma-shared.rst create mode 100644 docs/quick-start/macvlan-rdma-shared.rst create mode 100644 docs/quick-start/quick-start-k8s.rst create mode 100644 docs/quick-start/sriov-ib-rdma.rst create mode 100644 docs/quick-start/sriov-network-rdma.rst create mode 100644 examples/processed/host-device-rdma/10-nicclusterpolicy.yaml create mode 100644 examples/processed/host-device-rdma/20-ippool.yaml create mode 100644 examples/processed/host-device-rdma/30-hostdevicenetwork.yaml create mode 100644 examples/processed/host-device-rdma/40-pod.yaml create mode 100644 examples/processed/host-device-rdma/complete.yaml create mode 100644 examples/processed/ipoib-rdma-shared/10-nicclusterpolicy.yaml create mode 100644 examples/processed/ipoib-rdma-shared/20-ippool.yaml create mode 100644 examples/processed/ipoib-rdma-shared/30-ipoibnetwork.yaml create mode 100644 examples/processed/ipoib-rdma-shared/40-pod.yaml create mode 100644 examples/processed/ipoib-rdma-shared/complete.yaml create mode 100644 examples/processed/macvlan-rdma-shared/10-nicclusterpolicy.yaml create mode 100644 examples/processed/macvlan-rdma-shared/20-ippool.yaml create mode 100644 examples/processed/macvlan-rdma-shared/30-macvlannetwork.yaml create mode 100644 examples/processed/macvlan-rdma-shared/40-pod.yaml create mode 100644 examples/processed/macvlan-rdma-shared/complete.yaml create mode 100644 examples/processed/sriov-ib-rdma/10-nicclusterpolicy.yaml create mode 100644 examples/processed/sriov-ib-rdma/20-ippool.yaml create mode 100644 examples/processed/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml create mode 100644 examples/processed/sriov-ib-rdma/40-sriovibnetwork.yaml create mode 100644 examples/processed/sriov-ib-rdma/50-pod.yaml create mode 100644 examples/processed/sriov-ib-rdma/complete.yaml create mode 100644 examples/processed/sriov-network-rdma/10-nicclusterpolicy.yaml create mode 100644 examples/processed/sriov-network-rdma/20-ippool.yaml create mode 100644 examples/processed/sriov-network-rdma/30-sriovnetworknodepolicy.yaml create mode 100644 examples/processed/sriov-network-rdma/40-sriovnetwork.yaml create mode 100644 examples/processed/sriov-network-rdma/50-pod.yaml create mode 100644 examples/processed/sriov-network-rdma/complete.yaml create mode 100644 examples/templates/host-device-rdma/10-nicclusterpolicy.yaml create mode 100644 examples/templates/host-device-rdma/20-ippool.yaml create mode 100644 examples/templates/host-device-rdma/30-hostdevicenetwork.yaml create mode 100644 examples/templates/host-device-rdma/40-pod.yaml create mode 100644 examples/templates/ipoib-rdma-shared/10-nicclusterpolicy.yaml create mode 100644 examples/templates/ipoib-rdma-shared/20-ippool.yaml create mode 100644 examples/templates/ipoib-rdma-shared/30-ipoibnetwork.yaml create mode 100644 examples/templates/ipoib-rdma-shared/40-pod.yaml create mode 100644 examples/templates/macvlan-rdma-shared/10-nicclusterpolicy.yaml create mode 100644 examples/templates/macvlan-rdma-shared/20-ippool.yaml create mode 100644 examples/templates/macvlan-rdma-shared/30-macvlannetwork.yaml create mode 100644 examples/templates/macvlan-rdma-shared/40-pod.yaml create mode 100644 examples/templates/sriov-ib-rdma/10-nicclusterpolicy.yaml create mode 100644 examples/templates/sriov-ib-rdma/20-ippool.yaml create mode 100644 examples/templates/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml create mode 100644 examples/templates/sriov-ib-rdma/40-sriovibnetwork.yaml create mode 100644 examples/templates/sriov-ib-rdma/50-pod.yaml create mode 100644 examples/templates/sriov-network-rdma/10-nicclusterpolicy.yaml create mode 100644 examples/templates/sriov-network-rdma/20-ippool.yaml create mode 100644 examples/templates/sriov-network-rdma/30-sriovnetworknodepolicy.yaml create mode 100644 examples/templates/sriov-network-rdma/40-sriovnetwork.yaml create mode 100644 examples/templates/sriov-network-rdma/50-pod.yaml diff --git a/Makefile b/Makefile index bd526a8c..2fa56559 100644 --- a/Makefile +++ b/Makefile @@ -42,9 +42,11 @@ HELM_CHART_VERSION ?= 24.4.1 NGC_HELM_CHART_URL ?= https://helm.ngc.nvidia.com/nvidia/charts/network-operator-${HELM_CHART_VERSION}.tgz HELM_CHART_PATH ?= -$(BUILDDIR) $(TOOLSDIR) $(HELM_CHART_DEP_ROOT) $(CRD_API_DEP_ROOT): ; $(info Creating directory $@...) - mkdir -p $@ +# Examples build directory for processed YAML files +EXAMPLES_BUILD_DIR = examples/processed +$(BUILDDIR) $(TOOLSDIR) $(HELM_CHART_DEP_ROOT) $(CRD_API_DEP_ROOT) $(EXAMPLES_BUILD_DIR): ; $(info Creating directory $@...) + mkdir -p $@ # helm-docs is used to generate helm chart documentation HELM_DOCS_PKG := github.com/norwoodj/helm-docs/cmd/helm-docs @@ -136,8 +138,38 @@ build-cache: ${CURDIR}/tools/packman/python.sh -m pip install --no-cache-dir --no-deps -U -t ${CACHE_DIR}/chk/sphinx/4.5.0.2-py3.7-linux-x86_64/ Sphinx-Substitution-Extensions; \ fi +.PHONY: process-examples +process-examples: | $(EXAMPLES_BUILD_DIR) + @echo "Processing YAML examples with version substitutions..." + @# Generate sed script from all variables in vars.rst + @grep -E '^\.\. \|.*\| replace::' docs/common/vars.rst | \ + sed 's/\.\. |\(.*\)| replace:: \(.*\)/s@|\1|@\2@g/' > /tmp/vars.sed + @find examples/templates -name "*.yaml" -type f | while read -r file; do \ + echo "Processing $$file"; \ + relative_path=$$(echo "$$file" | sed 's|^examples/templates/||'); \ + output_dir="$(EXAMPLES_BUILD_DIR)/$$(dirname "$$relative_path")"; \ + mkdir -p "$$output_dir"; \ + sed -f /tmp/vars.sed "$$file" > "$(EXAMPLES_BUILD_DIR)/$$relative_path"; \ + done + @rm -f /tmp/vars.sed + @echo "Generating complete configuration files..." + @# Generate complete.yaml files by concatenating numbered files in order + @find $(EXAMPLES_BUILD_DIR) -mindepth 1 -maxdepth 1 -type d | while read -r dir; do \ + echo "Creating complete.yaml for $$(basename "$$dir")"; \ + complete_file="$$dir/complete.yaml"; \ + > "$$complete_file"; \ + find "$$dir" -name "[0-9]*-*.yaml" | sort | while read -r numbered_file; do \ + cat "$$numbered_file" >> "$$complete_file"; \ + echo "" >> "$$complete_file"; \ + echo "---" >> "$$complete_file"; \ + done; \ + if [ -s "$$complete_file" ]; then \ + sed -i '$$d' "$$complete_file"; \ + fi; \ + done + .PHONY: gen-docs -gen-docs: build-cache +gen-docs: build-cache process-examples @echo "Generating documentation..."; \ export PM_PACKAGES_ROOT=${CACHE_DIR}; \ ${CURDIR}/repo.sh docs diff --git a/docs/getting-started-kubernetes.rst b/docs/deployment-guide-kubernetes.rst similarity index 99% rename from docs/getting-started-kubernetes.rst rename to docs/deployment-guide-kubernetes.rst index 90ef63cc..69c95b55 100644 --- a/docs/getting-started-kubernetes.rst +++ b/docs/deployment-guide-kubernetes.rst @@ -17,18 +17,15 @@ .. headings # #, * *, =, -, ^, " .. include:: ./common/vars.rst -******************************* -Getting Started with Kubernetes -******************************* +******************************************************** +NVIDIA Network Operator Deployment Guide with Kubernetes +******************************************************** .. contents:: On this page :depth: 3 :local: :backlinks: none -================================= -Network Operator Deployment Guide -================================= .. _here: ./release-notes.html .. _Kubernetes CRDs: https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/ .. _Operator SDK: https://github.com/operator-framework/operator-sdk diff --git a/docs/getting-started-with-kubernetes.rst b/docs/getting-started-with-kubernetes.rst new file mode 100644 index 00000000..f1721bf2 --- /dev/null +++ b/docs/getting-started-with-kubernetes.rst @@ -0,0 +1,31 @@ +.. license-header + SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +.. headings # #, * *, =, -, ^, " + +=============================== +Getting Started with Kubernetes +=============================== + +This section provides comprehensive guides to help you get started with the NVIDIA Network Operator on Kubernetes. + + +.. toctree:: + :maxdepth: 1 + :titlesonly: + + Quick Start Guide + Deployment Guide with Kubernetes diff --git a/docs/index.rst b/docs/index.rst index b3c62c87..061da2f5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,12 +18,12 @@ .. toctree:: :caption: NVIDIA Network Operator - :titlesonly: + :maxdepth: 2 :hidden: Release Notes Platform Support - Getting Started with Kubernetes + Getting Started with Kubernetes Getting Started with Red Hat OpenShift Customization Options and CRDs Life Cycle Management diff --git a/docs/overview.rst b/docs/overview.rst index f78d1613..12ac575b 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -29,6 +29,40 @@ The NVIDIA Network Operator works in conjunction with the NVIDIA GPU Operator to A Helm chart is provided for easily deploying the Network operator in a cluster to provision the host software on NVIDIA-enabled nodes. +=================== +Networking Features +=================== + +**RDMA Support** + Remote Direct Memory Access (RDMA) for memory-to-memory data transfers that bypass the CPU and kernel networking stack. Supports InfiniBand and RDMA over Converged Ethernet (RoCE) protocols. + +**SR-IOV Virtualization** + Single Root I/O Virtualization technology that partitions network interface cards into multiple Virtual Functions (VFs) for hardware-level isolation and performance. + +**Secondary Networks** + Multiple network interface types including host device networks, MacVLAN networks, IP over InfiniBand networks, and SR-IOV networks for specialized networking requirements. + +**Driver Management** + Automated deployment and management of NVIDIA DOCA-OFED networking drivers across cluster nodes with version control and updates. + +================== +Supported Hardware +================== + +See the :doc:`platform-support` page for supported hardware and software. + +========= +Use Cases +========= + +**High-Performance Computing (HPC)**: Scientific simulations, modeling applications, and distributed computing workloads + +**Machine Learning**: Distributed training and inference workloads across multiple GPU nodes + +**Data Processing**: Database systems, analytics platforms, and storage applications requiring high network throughput + +**Legacy Applications**: Existing applications that require direct access to networking hardware + ================= License Agreement ================= @@ -42,3 +76,8 @@ Learn More The Network Operator is open-source. For more information on contributions and release artifacts, see the `GitHub repo `_. + +For detailed deployment instructions and examples: + +- :doc:`quick-start/quick-start-k8s`: Quick deployment guide with common configurations +- :doc:`deployment-guide-kubernetes`: Detailed deployment scenarios diff --git a/docs/quick-start/host-device-rdma.rst b/docs/quick-start/host-device-rdma.rst new file mode 100644 index 00000000..288af79c --- /dev/null +++ b/docs/quick-start/host-device-rdma.rst @@ -0,0 +1,71 @@ +.. license-header + SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +.. headings # #, * *, =, -, ^, " +.. include:: ../common/vars.rst + +********************************* +Host Device Network with RDMA +********************************* + +**Step 1**: Create NicClusterPolicy with host device support + +.. literalinclude:: ../../examples/processed/host-device-rdma/10-nicclusterpolicy.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f nicclusterpolicy.yaml + +**Step 2**: Create IPPool for nv-ipam + +.. literalinclude:: ../../examples/processed/host-device-rdma/20-ippool.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f ippool.yaml + +**Step 3**: Create HostDeviceNetwork + +.. literalinclude:: ../../examples/processed/host-device-rdma/30-hostdevicenetwork.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f hostdevicenetwork.yaml + +**Step 4**: Deploy test workload + +.. literalinclude:: ../../examples/processed/host-device-rdma/40-pod.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f pod.yaml + +Verify the deployment: + +.. code-block:: bash + + kubectl exec -it hostdev-test-pod -- lspci | grep Mellanox + +.. dropdown:: Complete Configuration + :color: primary + :icon: code + + .. literalinclude:: ../../examples/processed/host-device-rdma/complete.yaml + :language: yaml diff --git a/docs/quick-start/ipoib-rdma-shared.rst b/docs/quick-start/ipoib-rdma-shared.rst new file mode 100644 index 00000000..3be7a978 --- /dev/null +++ b/docs/quick-start/ipoib-rdma-shared.rst @@ -0,0 +1,78 @@ +.. license-header + SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +.. headings # #, * *, =, -, ^, " +.. include:: ../common/vars.rst + +************************************************* +Deploy IP over InfiniBand with RDMA Shared Device +************************************************* + +**Step 1**: Create NicClusterPolicy with IPoIB support and 3 RDMA shared device pools + +.. literalinclude:: ../../examples/processed/ipoib-rdma-shared/10-nicclusterpolicy.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f nicclusterpolicy.yaml + +**Step 2**: Create IPPool CRs for nv-ipam with 3 IP pools + +.. literalinclude:: ../../examples/processed/ipoib-rdma-shared/20-ippool.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f ippool.yaml + +**Step 3**: Create IPoIBNetwork CRs for 3 IPoIB networks + +.. literalinclude:: ../../examples/processed/ipoib-rdma-shared/30-ipoibnetwork.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f ipoibnetwork.yaml + +**Step 4**: Deploy test workloads for 3 IPoIB networks + +.. literalinclude:: ../../examples/processed/ipoib-rdma-shared/40-pod.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f pod.yaml + +Verify the deployment: + +.. code-block:: bash + + kubectl exec -it ipoib-test-pod-a -- ibstat + kubectl exec -it ipoib-test-pod-a -- ip addr show + + kubectl exec -it ipoib-test-pod-b -- ibstat + kubectl exec -it ipoib-test-pod-b -- ip addr show + + kubectl exec -it ipoib-test-pod-c -- ibstat + kubectl exec -it ipoib-test-pod-c -- ip addr show + +.. dropdown:: Complete Configuration + :color: primary + :icon: code + + .. literalinclude:: ../../examples/processed/ipoib-rdma-shared/complete.yaml + :language: yaml \ No newline at end of file diff --git a/docs/quick-start/macvlan-rdma-shared.rst b/docs/quick-start/macvlan-rdma-shared.rst new file mode 100644 index 00000000..fe4b2ba9 --- /dev/null +++ b/docs/quick-start/macvlan-rdma-shared.rst @@ -0,0 +1,74 @@ +.. license-header + SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +.. headings # #, * *, =, -, ^, " +.. include:: ../common/vars.rst + +************************************************* +Deploy MacVLAN Network with RDMA Shared Device +************************************************* + +.. _macvlan-rdma-shared-deployment: + +**Step 1**: Create NicClusterPolicy with RDMA shared device + +.. literalinclude:: ../../examples/processed/macvlan-rdma-shared/10-nicclusterpolicy.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f nicclusterpolicy.yaml + +**Step 2**: Create IPPool for nv-ipam + +.. literalinclude:: ../../examples/processed/macvlan-rdma-shared/20-ippool.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f ippool.yaml + +**Step 3**: Create MacvlanNetwork + +.. literalinclude:: ../../examples/processed/macvlan-rdma-shared/30-macvlannetwork.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f macvlannetwork.yaml + +**Step 4**: Deploy test workload + +.. literalinclude:: ../../examples/processed/macvlan-rdma-shared/40-pod.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f pod.yaml + +Verify the deployment: + +.. code-block:: bash + + kubectl exec -it macvlan-test-pod -- ip addr show + kubectl exec -it macvlan-test-pod -- ibv_devinfo + +.. dropdown:: Complete Configuration + :color: primary + :icon: code + + .. literalinclude:: ../../examples/processed/macvlan-rdma-shared/complete.yaml + :language: yaml \ No newline at end of file diff --git a/docs/quick-start/quick-start-k8s.rst b/docs/quick-start/quick-start-k8s.rst new file mode 100644 index 00000000..37805d59 --- /dev/null +++ b/docs/quick-start/quick-start-k8s.rst @@ -0,0 +1,143 @@ +.. license-header + SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +.. headings # #, * *, =, -, ^, " +.. include:: ../common/vars.rst + +********************************* +Quick Start Guide for Kubernetes +********************************* + +.. contents:: On this page + :depth: 3 + :local: + :backlinks: none + +================ +Before You Begin +================ + +Before deploying the NVIDIA Network Operator, ensure you have the following: + +Prerequisites +============= + +#. **Kubernetes Cluster**: A running Kubernetes cluster (v1.19+) with nodes that have NVIDIA NICs. + +#. **CLI Tools**: Install ``kubectl`` and ``helm`` on your client machine: + + .. code-block:: bash + + $ curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 \ + && chmod 700 get_helm.sh \ + && ./get_helm.sh + +#. **Container Runtime**: Nodes must be configured with a container engine such as CRI-O or containerd. + +=================================== +Install Network Operator Helm Chart +=================================== + +Add the NVIDIA NGC Helm repository: + +.. code-block:: bash + + helm repo add nvidia https://helm.ngc.nvidia.com/nvidia + helm repo update + +Install the Network Operator: + +.. code-block:: bash + :substitutions: + + helm install network-operator nvidia/network-operator \ + -n nvidia-network-operator \ + --create-namespace \ + --version |network-operator-version| \ + --set sriovNetworkOperator.enabled=true \ + --wait + +Verify the installation: + +.. code-block:: bash + + kubectl -n nvidia-network-operator get pods + +================================ +Overview of Quickstart Use Cases +================================ + +This quick start guide covers five essential networking configurations for different computational requirements: + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Quick Start Guide + + SR-IOV Network with RDMA + Host Device Network with RDMA + IP over InfiniBand with RDMA Shared Device + MacVLAN Network with RDMA Shared Device + SR-IOV InfiniBand Network with RDMA + +.. list-table:: + :widths: 20 25 20 30 + :header-rows: 1 + + * - **Use Case** + - **Purpose** + - **Performance Requirements** + - **Applications** + * - :doc:`SR-IOV Network with RDMA ` + - High-performance networking with hardware acceleration + - • >10 Gbps throughput + • <1μs latency + • Dedicated VF resources + - HPC simulations, distributed ML training, financial trading + + *Keywords: SR-IOV, RDMA, HPC, low-latency, VF isolation* + * - :doc:`Host Device Network with RDMA ` + - Direct hardware access for legacy applications + - • Raw device control + • Exclusive hardware access + • Minimal CPU overhead + - Legacy HPC codes, specialized protocols, DPDK applications + + *Keywords: host-device, PCI-passthrough, direct-access, exclusive-access* + * - :doc:`IP over InfiniBand with RDMA Shared Device ` + - InfiniBand networking with shared RDMA resources + - • >50 Gbps bandwidth + • Parallel I/O workloads + • Shared device efficiency + - Distributed storage, data analytics, scientific computing + + *Keywords: InfiniBand, IPoIB, shared-device, high-bandwidth* + * - :doc:`MacVLAN Network with RDMA Shared Device ` + - Network isolation with shared RDMA capabilities + - • Multi-tenant segmentation + • 10+ pods per node + • Moderate throughput + - Cloud-native HPC, microservices, multi-tenant ML + + *Keywords: MacVLAN, multi-tenant, network-segmentation, resource-sharing* + * - :doc:`SR-IOV InfiniBand Network with RDMA ` + - Virtualized InfiniBand with hardware acceleration + - • >100 Gbps bandwidth + • Hardware acceleration + • Isolated IB partitions + - Large-scale HPC clusters, AI/ML training, research computing + + *Keywords: SR-IOV, InfiniBand, hardware-acceleration, ultra-high-bandwidth* diff --git a/docs/quick-start/sriov-ib-rdma.rst b/docs/quick-start/sriov-ib-rdma.rst new file mode 100644 index 00000000..26d600ef --- /dev/null +++ b/docs/quick-start/sriov-ib-rdma.rst @@ -0,0 +1,81 @@ +.. license-header + SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +.. headings # #, * *, =, -, ^, " +.. include:: ../common/vars.rst + +************************************************* +Deploy SR-IOV InfiniBand Network with RDMA +************************************************* + +**Step 1**: Create NicClusterPolicy for InfiniBand + +.. literalinclude:: ../../examples/processed/sriov-ib-rdma/10-nicclusterpolicy.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f nicclusterpolicy.yaml + +**Step 2**: Create IPPool for nv-ipam + +.. literalinclude:: ../../examples/processed/sriov-ib-rdma/20-ippool.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f ippool.yaml + +**Step 3**: Configure SR-IOV for InfiniBand + +.. literalinclude:: ../../examples/processed/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f sriovnetworknodepolicy.yaml + +**Step 4**: Create SriovIBNetwork + +.. literalinclude:: ../../examples/processed/sriov-ib-rdma/40-sriovibnetwork.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f sriovibnetwork.yaml + +**Step 5**: Deploy test workload + +.. literalinclude:: ../../examples/processed/sriov-ib-rdma/50-pod.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f pod.yaml + +Verify the deployment: + +.. code-block:: bash + + kubectl exec -it sriov-ib-test-pod -- ibv_devices + kubectl exec -it sriov-ib-test-pod -- ibstat + +.. dropdown:: Complete Configuration + :color: primary + :icon: code + + .. literalinclude:: ../../examples/processed/sriov-ib-rdma/complete.yaml + :language: yaml diff --git a/docs/quick-start/sriov-network-rdma.rst b/docs/quick-start/sriov-network-rdma.rst new file mode 100644 index 00000000..be62aaa8 --- /dev/null +++ b/docs/quick-start/sriov-network-rdma.rst @@ -0,0 +1,81 @@ +.. license-header + SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + SPDX-License-Identifier: Apache-2.0 + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +.. headings # #, * *, =, -, ^, " +.. include:: ../common/vars.rst + +********************************* +Deploy SR-IOV Network with RDMA +********************************* + +**Step 1**: Create NicClusterPolicy + +.. literalinclude:: ../../examples/processed/sriov-network-rdma/10-nicclusterpolicy.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f nicclusterpolicy.yaml + +**Step 2**: Create IPPool for nv-ipam + +.. literalinclude:: ../../examples/processed/sriov-network-rdma/20-ippool.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f ippool.yaml + +**Step 3**: Configure SR-IOV + +.. literalinclude:: ../../examples/processed/sriov-network-rdma/30-sriovnetworknodepolicy.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f sriovnetworknodepolicy.yaml + +**Step 4**: Create SR-IOV Network + +.. literalinclude:: ../../examples/processed/sriov-network-rdma/40-sriovnetwork.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f sriovnetwork.yaml + +**Step 5**: Deploy test workload + +.. literalinclude:: ../../examples/processed/sriov-network-rdma/50-pod.yaml + :language: yaml + +.. code-block:: bash + + kubectl apply -f pod.yaml + +Verify the deployment: + +.. code-block:: bash + + kubectl exec -it sriov-test-pod -- ip addr show + kubectl exec -it sriov-test-pod -- ibv_devices + +.. dropdown:: Complete Configuration + :color: primary + :icon: code + + .. literalinclude:: ../../examples/processed/sriov-network-rdma/complete.yaml + :language: yaml diff --git a/examples/processed/host-device-rdma/10-nicclusterpolicy.yaml b/examples/processed/host-device-rdma/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..fbe444a5 --- /dev/null +++ b/examples/processed/host-device-rdma/10-nicclusterpolicy.yaml @@ -0,0 +1,37 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + sriovDevicePlugin: + image: sriov-network-device-plugin + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + config: | + { + "resourceList": [ + { + "resourcePrefix": "nvidia.com", + "resourceName": "hostdev", + "selectors": { + "vendors": ["15b3"], + "isRdma": true + } + } + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 \ No newline at end of file diff --git a/examples/processed/host-device-rdma/20-ippool.yaml b/examples/processed/host-device-rdma/20-ippool.yaml new file mode 100644 index 00000000..efa36719 --- /dev/null +++ b/examples/processed/host-device-rdma/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: hostdev-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.3.0/24 + perNodeBlockSize: 50 + gateway: 192.168.3.1 \ No newline at end of file diff --git a/examples/processed/host-device-rdma/30-hostdevicenetwork.yaml b/examples/processed/host-device-rdma/30-hostdevicenetwork.yaml new file mode 100644 index 00000000..70a11653 --- /dev/null +++ b/examples/processed/host-device-rdma/30-hostdevicenetwork.yaml @@ -0,0 +1,12 @@ +apiVersion: mellanox.com/v1alpha1 +kind: HostDeviceNetwork +metadata: + name: hostdev-net +spec: + networkNamespace: "default" + resourceName: "hostdev" + ipam: | + { + "type": "nv-ipam", + "poolName": "hostdev-pool" + } \ No newline at end of file diff --git a/examples/processed/host-device-rdma/40-pod.yaml b/examples/processed/host-device-rdma/40-pod.yaml new file mode 100644 index 00000000..b4156e0e --- /dev/null +++ b/examples/processed/host-device-rdma/40-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: hostdev-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: hostdev-net +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/hostdev: '1' + limits: + nvidia.com/hostdev: '1' \ No newline at end of file diff --git a/examples/processed/host-device-rdma/complete.yaml b/examples/processed/host-device-rdma/complete.yaml new file mode 100644 index 00000000..cc8956da --- /dev/null +++ b/examples/processed/host-device-rdma/complete.yaml @@ -0,0 +1,80 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + sriovDevicePlugin: + image: sriov-network-device-plugin + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + config: | + { + "resourceList": [ + { + "resourcePrefix": "nvidia.com", + "resourceName": "hostdev", + "selectors": { + "vendors": ["15b3"], + "isRdma": true + } + } + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 +--- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: hostdev-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.3.0/24 + perNodeBlockSize: 50 + gateway: 192.168.3.1 +--- +apiVersion: mellanox.com/v1alpha1 +kind: HostDeviceNetwork +metadata: + name: hostdev-net +spec: + networkNamespace: "default" + resourceName: "hostdev" + ipam: | + { + "type": "nv-ipam", + "poolName": "hostdev-pool" + } +--- +apiVersion: v1 +kind: Pod +metadata: + name: hostdev-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: hostdev-net +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/hostdev: '1' + limits: + nvidia.com/hostdev: '1' diff --git a/examples/processed/ipoib-rdma-shared/10-nicclusterpolicy.yaml b/examples/processed/ipoib-rdma-shared/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..4699c40e --- /dev/null +++ b/examples/processed/ipoib-rdma-shared/10-nicclusterpolicy.yaml @@ -0,0 +1,56 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + ofedDriver: + image: doca-driver + repository: nvcr.io/nvstaging/mellanox + version: 25.04-0.6.1.0-2 + rdmaSharedDevicePlugin: + image: k8s-rdma-shared-dev-plugin + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + config: | + { + "configList": [ + { + "resourceName": "rdma_shared_device_a", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs1f0"] + } + }, + { + "resourceName": "rdma_shared_device_b", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs1f1"] + } + }, + { + "resourceName": "rdma_shared_device_c", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs2f0"] + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + ipoib: + image: ipoib-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 \ No newline at end of file diff --git a/examples/processed/ipoib-rdma-shared/20-ippool.yaml b/examples/processed/ipoib-rdma-shared/20-ippool.yaml new file mode 100644 index 00000000..0f9b4ed3 --- /dev/null +++ b/examples/processed/ipoib-rdma-shared/20-ippool.yaml @@ -0,0 +1,29 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-a + namespace: nvidia-network-operator +spec: + subnet: 192.168.5.0/24 + perNodeBlockSize: 50 + gateway: 192.168.5.1 +----- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-b + namespace: nvidia-network-operator +spec: + subnet: 192.168.6.0/24 + perNodeBlockSize: 50 + gateway: 192.168.6.1 +----- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-c + namespace: nvidia-network-operator +spec: + subnet: 192.168.7.0/24 + perNodeBlockSize: 50 + gateway: 192.168.7.1 \ No newline at end of file diff --git a/examples/processed/ipoib-rdma-shared/30-ipoibnetwork.yaml b/examples/processed/ipoib-rdma-shared/30-ipoibnetwork.yaml new file mode 100644 index 00000000..83d37e3e --- /dev/null +++ b/examples/processed/ipoib-rdma-shared/30-ipoibnetwork.yaml @@ -0,0 +1,38 @@ +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-a +spec: + networkNamespace: "default" + master: "ibs1f0" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-a" + } +----- +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-b +spec: + networkNamespace: "default" + master: "ibs1f1" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-b" + } +----- +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-c +spec: + networkNamespace: "default" + master: "ibs2f0" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-c" + } \ No newline at end of file diff --git a/examples/processed/ipoib-rdma-shared/40-pod.yaml b/examples/processed/ipoib-rdma-shared/40-pod.yaml new file mode 100644 index 00000000..784c47bb --- /dev/null +++ b/examples/processed/ipoib-rdma-shared/40-pod.yaml @@ -0,0 +1,59 @@ +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-a + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-a +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_a: 1 + limits: + rdma/rdma_shared_device_a: 1 +----- +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-b + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-b +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_b: 1 + limits: + rdma/rdma_shared_device_b: 1 +----- +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-c + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-c +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_c: 1 + limits: + rdma/rdma_shared_device_c: 1 \ No newline at end of file diff --git a/examples/processed/ipoib-rdma-shared/complete.yaml b/examples/processed/ipoib-rdma-shared/complete.yaml new file mode 100644 index 00000000..603b932e --- /dev/null +++ b/examples/processed/ipoib-rdma-shared/complete.yaml @@ -0,0 +1,185 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + ofedDriver: + image: doca-driver + repository: nvcr.io/nvstaging/mellanox + version: 25.04-0.6.1.0-2 + rdmaSharedDevicePlugin: + image: k8s-rdma-shared-dev-plugin + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + config: | + { + "configList": [ + { + "resourceName": "rdma_shared_device_a", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs1f0"] + } + }, + { + "resourceName": "rdma_shared_device_b", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs1f1"] + } + }, + { + "resourceName": "rdma_shared_device_c", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs2f0"] + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + ipoib: + image: ipoib-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 +--- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-a + namespace: nvidia-network-operator +spec: + subnet: 192.168.5.0/24 + perNodeBlockSize: 50 + gateway: 192.168.5.1 +----- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-b + namespace: nvidia-network-operator +spec: + subnet: 192.168.6.0/24 + perNodeBlockSize: 50 + gateway: 192.168.6.1 +----- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-c + namespace: nvidia-network-operator +spec: + subnet: 192.168.7.0/24 + perNodeBlockSize: 50 + gateway: 192.168.7.1 +--- +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-a +spec: + networkNamespace: "default" + master: "ibs1f0" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-a" + } +----- +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-b +spec: + networkNamespace: "default" + master: "ibs1f1" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-b" + } +----- +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-c +spec: + networkNamespace: "default" + master: "ibs2f0" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-c" + } +--- +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-a + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-a +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_a: 1 + limits: + rdma/rdma_shared_device_a: 1 +----- +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-b + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-b +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_b: 1 + limits: + rdma/rdma_shared_device_b: 1 +----- +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-c + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-c +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_c: 1 + limits: + rdma/rdma_shared_device_c: 1 diff --git a/examples/processed/macvlan-rdma-shared/10-nicclusterpolicy.yaml b/examples/processed/macvlan-rdma-shared/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..e622abcf --- /dev/null +++ b/examples/processed/macvlan-rdma-shared/10-nicclusterpolicy.yaml @@ -0,0 +1,36 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + rdmaSharedDevicePlugin: + image: k8s-rdma-shared-dev-plugin + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + config: | + { + "configList": [ + { + "resourceName": "rdma_shared_device_a", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ens1f0"] + } + } + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 \ No newline at end of file diff --git a/examples/processed/macvlan-rdma-shared/20-ippool.yaml b/examples/processed/macvlan-rdma-shared/20-ippool.yaml new file mode 100644 index 00000000..7685ee9f --- /dev/null +++ b/examples/processed/macvlan-rdma-shared/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: macvlan-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.4.0/24 + perNodeBlockSize: 50 + gateway: 192.168.4.1 \ No newline at end of file diff --git a/examples/processed/macvlan-rdma-shared/30-macvlannetwork.yaml b/examples/processed/macvlan-rdma-shared/30-macvlannetwork.yaml new file mode 100644 index 00000000..5778e8db --- /dev/null +++ b/examples/processed/macvlan-rdma-shared/30-macvlannetwork.yaml @@ -0,0 +1,14 @@ +apiVersion: mellanox.com/v1alpha1 +kind: MacvlanNetwork +metadata: + name: macvlan-network +spec: + networkNamespace: "default" + master: "ens1f0" + mode: "bridge" + mtu: 1500 + ipam: | + { + "type": "nv-ipam", + "poolName": "macvlan-pool" + } \ No newline at end of file diff --git a/examples/processed/macvlan-rdma-shared/40-pod.yaml b/examples/processed/macvlan-rdma-shared/40-pod.yaml new file mode 100644 index 00000000..c4b1c98e --- /dev/null +++ b/examples/processed/macvlan-rdma-shared/40-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: macvlan-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: macvlan-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_a: 1 + limits: + rdma/rdma_shared_device_a: 1 \ No newline at end of file diff --git a/examples/processed/macvlan-rdma-shared/complete.yaml b/examples/processed/macvlan-rdma-shared/complete.yaml new file mode 100644 index 00000000..3ecd62a6 --- /dev/null +++ b/examples/processed/macvlan-rdma-shared/complete.yaml @@ -0,0 +1,81 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + rdmaSharedDevicePlugin: + image: k8s-rdma-shared-dev-plugin + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + config: | + { + "configList": [ + { + "resourceName": "rdma_shared_device_a", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ens1f0"] + } + } + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 +--- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: macvlan-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.4.0/24 + perNodeBlockSize: 50 + gateway: 192.168.4.1 +--- +apiVersion: mellanox.com/v1alpha1 +kind: MacvlanNetwork +metadata: + name: macvlan-network +spec: + networkNamespace: "default" + master: "ens1f0" + mode: "bridge" + mtu: 1500 + ipam: | + { + "type": "nv-ipam", + "poolName": "macvlan-pool" + } +--- +apiVersion: v1 +kind: Pod +metadata: + name: macvlan-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: macvlan-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_a: 1 + limits: + rdma/rdma_shared_device_a: 1 diff --git a/examples/processed/sriov-ib-rdma/10-nicclusterpolicy.yaml b/examples/processed/sriov-ib-rdma/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..805be11d --- /dev/null +++ b/examples/processed/sriov-ib-rdma/10-nicclusterpolicy.yaml @@ -0,0 +1,24 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + ofedDriver: + image: doca-driver + repository: nvcr.io/nvstaging/mellanox + version: 25.04-0.6.1.0-2 + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 \ No newline at end of file diff --git a/examples/processed/sriov-ib-rdma/20-ippool.yaml b/examples/processed/sriov-ib-rdma/20-ippool.yaml new file mode 100644 index 00000000..369964c6 --- /dev/null +++ b/examples/processed/sriov-ib-rdma/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: sriov-ib-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.6.0/24 + perNodeBlockSize: 50 + gateway: 192.168.6.1 \ No newline at end of file diff --git a/examples/processed/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml b/examples/processed/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml new file mode 100644 index 00000000..6b7f4b18 --- /dev/null +++ b/examples/processed/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml @@ -0,0 +1,17 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: infiniband-sriov + namespace: nvidia-network-operator +spec: + deviceType: netdevice + mtu: 1500 + nodeSelector: + feature.node.kubernetes.io/pci-15b3.present: "true" + nicSelector: + vendor: "15b3" + linkType: IB + isRdma: true + numVfs: 8 + priority: 90 + resourceName: mlnxnics \ No newline at end of file diff --git a/examples/processed/sriov-ib-rdma/40-sriovibnetwork.yaml b/examples/processed/sriov-ib-rdma/40-sriovibnetwork.yaml new file mode 100644 index 00000000..ef6c7bb3 --- /dev/null +++ b/examples/processed/sriov-ib-rdma/40-sriovibnetwork.yaml @@ -0,0 +1,14 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovIBNetwork +metadata: + name: sriov-ib-network + namespace: nvidia-network-operator +spec: + ipam: | + { + "type": "nv-ipam", + "poolName": "sriov-ib-pool" + } + resourceName: mlnxnics + linkState: enable + networkNamespace: default \ No newline at end of file diff --git a/examples/processed/sriov-ib-rdma/50-pod.yaml b/examples/processed/sriov-ib-rdma/50-pod.yaml new file mode 100644 index 00000000..bda92965 --- /dev/null +++ b/examples/processed/sriov-ib-rdma/50-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: sriov-ib-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: sriov-ib-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/mlnxnics: '1' + limits: + nvidia.com/mlnxnics: '1' \ No newline at end of file diff --git a/examples/processed/sriov-ib-rdma/complete.yaml b/examples/processed/sriov-ib-rdma/complete.yaml new file mode 100644 index 00000000..d0db1b01 --- /dev/null +++ b/examples/processed/sriov-ib-rdma/complete.yaml @@ -0,0 +1,87 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + ofedDriver: + image: doca-driver + repository: nvcr.io/nvstaging/mellanox + version: 25.04-0.6.1.0-2 + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 +--- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: sriov-ib-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.6.0/24 + perNodeBlockSize: 50 + gateway: 192.168.6.1 +--- +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: infiniband-sriov + namespace: nvidia-network-operator +spec: + deviceType: netdevice + mtu: 1500 + nodeSelector: + feature.node.kubernetes.io/pci-15b3.present: "true" + nicSelector: + vendor: "15b3" + linkType: IB + isRdma: true + numVfs: 8 + priority: 90 + resourceName: mlnxnics +--- +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovIBNetwork +metadata: + name: sriov-ib-network + namespace: nvidia-network-operator +spec: + ipam: | + { + "type": "nv-ipam", + "poolName": "sriov-ib-pool" + } + resourceName: mlnxnics + linkState: enable + networkNamespace: default +--- +apiVersion: v1 +kind: Pod +metadata: + name: sriov-ib-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: sriov-ib-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/mlnxnics: '1' + limits: + nvidia.com/mlnxnics: '1' diff --git a/examples/processed/sriov-network-rdma/10-nicclusterpolicy.yaml b/examples/processed/sriov-network-rdma/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..e8d4dcbd --- /dev/null +++ b/examples/processed/sriov-network-rdma/10-nicclusterpolicy.yaml @@ -0,0 +1,23 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + sriovDevicePlugin: + image: sriov-network-device-plugin + repository: nvcr.io/nvidia/mellanox + version: network-operator-v25.7.0-beta.1 + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 \ No newline at end of file diff --git a/examples/processed/sriov-network-rdma/20-ippool.yaml b/examples/processed/sriov-network-rdma/20-ippool.yaml new file mode 100644 index 00000000..6ff8a91b --- /dev/null +++ b/examples/processed/sriov-network-rdma/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: sriov-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.2.0/24 + perNodeBlockSize: 50 + gateway: 192.168.2.1 \ No newline at end of file diff --git a/examples/processed/sriov-network-rdma/30-sriovnetworknodepolicy.yaml b/examples/processed/sriov-network-rdma/30-sriovnetworknodepolicy.yaml new file mode 100644 index 00000000..b7863092 --- /dev/null +++ b/examples/processed/sriov-network-rdma/30-sriovnetworknodepolicy.yaml @@ -0,0 +1,16 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: ethernet-sriov + namespace: nvidia-network-operator +spec: + deviceType: netdevice + mtu: 1500 + nodeSelector: + feature.node.kubernetes.io/pci-15b3.present: "true" + nicSelector: + vendor: "15b3" + isRdma: true + numVfs: 8 + priority: 90 + resourceName: sriov_resource \ No newline at end of file diff --git a/examples/processed/sriov-network-rdma/40-sriovnetwork.yaml b/examples/processed/sriov-network-rdma/40-sriovnetwork.yaml new file mode 100644 index 00000000..ed7ba428 --- /dev/null +++ b/examples/processed/sriov-network-rdma/40-sriovnetwork.yaml @@ -0,0 +1,13 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetwork +metadata: + name: sriov-rdma-network + namespace: nvidia-network-operator +spec: + ipam: | + { + "type": "nv-ipam", + "poolName": "sriov-pool" + } + networkNamespace: default + resourceName: sriov_resource \ No newline at end of file diff --git a/examples/processed/sriov-network-rdma/50-pod.yaml b/examples/processed/sriov-network-rdma/50-pod.yaml new file mode 100644 index 00000000..04045500 --- /dev/null +++ b/examples/processed/sriov-network-rdma/50-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: sriov-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: sriov-rdma-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/sriov_resource: '1' + limits: + nvidia.com/sriov_resource: '1' \ No newline at end of file diff --git a/examples/processed/sriov-network-rdma/complete.yaml b/examples/processed/sriov-network-rdma/complete.yaml new file mode 100644 index 00000000..d14ec6cc --- /dev/null +++ b/examples/processed/sriov-network-rdma/complete.yaml @@ -0,0 +1,84 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + sriovDevicePlugin: + image: sriov-network-device-plugin + repository: nvcr.io/nvidia/mellanox + version: network-operator-v25.7.0-beta.1 + nvIpam: + image: nvidia-k8s-ipam + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 + multus: + image: multus-cni + repository: nvcr.io/nvstaging/mellanox + version: network-operator-v25.7.0-beta.1 +--- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: sriov-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.2.0/24 + perNodeBlockSize: 50 + gateway: 192.168.2.1 +--- +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: ethernet-sriov + namespace: nvidia-network-operator +spec: + deviceType: netdevice + mtu: 1500 + nodeSelector: + feature.node.kubernetes.io/pci-15b3.present: "true" + nicSelector: + vendor: "15b3" + isRdma: true + numVfs: 8 + priority: 90 + resourceName: sriov_resource +--- +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetwork +metadata: + name: sriov-rdma-network + namespace: nvidia-network-operator +spec: + ipam: | + { + "type": "nv-ipam", + "poolName": "sriov-pool" + } + networkNamespace: default + resourceName: sriov_resource +--- +apiVersion: v1 +kind: Pod +metadata: + name: sriov-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: sriov-rdma-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/sriov_resource: '1' + limits: + nvidia.com/sriov_resource: '1' diff --git a/examples/templates/host-device-rdma/10-nicclusterpolicy.yaml b/examples/templates/host-device-rdma/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..578bbb0c --- /dev/null +++ b/examples/templates/host-device-rdma/10-nicclusterpolicy.yaml @@ -0,0 +1,37 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + sriovDevicePlugin: + image: sriov-network-device-plugin + repository: |sriovnetop-sriov-device-plugin-repository| + version: |sriovnetop-sriov-device-plugin-version| + config: | + { + "resourceList": [ + { + "resourcePrefix": "nvidia.com", + "resourceName": "hostdev", + "selectors": { + "vendors": ["15b3"], + "isRdma": true + } + } + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: |nvidia-ipam-repository| + version: |nvidia-ipam-version| + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: |cni-plugins-repository| + version: |cni-plugins-version| + multus: + image: multus-cni + repository: |multus-repository| + version: |multus-version| \ No newline at end of file diff --git a/examples/templates/host-device-rdma/20-ippool.yaml b/examples/templates/host-device-rdma/20-ippool.yaml new file mode 100644 index 00000000..efa36719 --- /dev/null +++ b/examples/templates/host-device-rdma/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: hostdev-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.3.0/24 + perNodeBlockSize: 50 + gateway: 192.168.3.1 \ No newline at end of file diff --git a/examples/templates/host-device-rdma/30-hostdevicenetwork.yaml b/examples/templates/host-device-rdma/30-hostdevicenetwork.yaml new file mode 100644 index 00000000..70a11653 --- /dev/null +++ b/examples/templates/host-device-rdma/30-hostdevicenetwork.yaml @@ -0,0 +1,12 @@ +apiVersion: mellanox.com/v1alpha1 +kind: HostDeviceNetwork +metadata: + name: hostdev-net +spec: + networkNamespace: "default" + resourceName: "hostdev" + ipam: | + { + "type": "nv-ipam", + "poolName": "hostdev-pool" + } \ No newline at end of file diff --git a/examples/templates/host-device-rdma/40-pod.yaml b/examples/templates/host-device-rdma/40-pod.yaml new file mode 100644 index 00000000..b4156e0e --- /dev/null +++ b/examples/templates/host-device-rdma/40-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: hostdev-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: hostdev-net +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/hostdev: '1' + limits: + nvidia.com/hostdev: '1' \ No newline at end of file diff --git a/examples/templates/ipoib-rdma-shared/10-nicclusterpolicy.yaml b/examples/templates/ipoib-rdma-shared/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..75f6edf0 --- /dev/null +++ b/examples/templates/ipoib-rdma-shared/10-nicclusterpolicy.yaml @@ -0,0 +1,56 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + ofedDriver: + image: doca-driver + repository: |doca-driver-repository| + version: |doca-driver-version| + rdmaSharedDevicePlugin: + image: k8s-rdma-shared-dev-plugin + repository: |k8s-rdma-shared-dev-plugin-repository| + version: |k8s-rdma-shared-dev-plugin-version| + config: | + { + "configList": [ + { + "resourceName": "rdma_shared_device_a", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs1f0"] + } + }, + { + "resourceName": "rdma_shared_device_b", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs1f1"] + } + }, + { + "resourceName": "rdma_shared_device_c", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ibs2f0"] + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: |nvidia-ipam-repository| + version: |nvidia-ipam-version| + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: |cni-plugins-repository| + version: |cni-plugins-version| + multus: + image: multus-cni + repository: |multus-repository| + version: |multus-version| + ipoib: + image: ipoib-cni + repository: |ipoib-cni-repository| + version: |ipoib-cni-version| \ No newline at end of file diff --git a/examples/templates/ipoib-rdma-shared/20-ippool.yaml b/examples/templates/ipoib-rdma-shared/20-ippool.yaml new file mode 100644 index 00000000..0f9b4ed3 --- /dev/null +++ b/examples/templates/ipoib-rdma-shared/20-ippool.yaml @@ -0,0 +1,29 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-a + namespace: nvidia-network-operator +spec: + subnet: 192.168.5.0/24 + perNodeBlockSize: 50 + gateway: 192.168.5.1 +----- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-b + namespace: nvidia-network-operator +spec: + subnet: 192.168.6.0/24 + perNodeBlockSize: 50 + gateway: 192.168.6.1 +----- +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: ipoib-pool-c + namespace: nvidia-network-operator +spec: + subnet: 192.168.7.0/24 + perNodeBlockSize: 50 + gateway: 192.168.7.1 \ No newline at end of file diff --git a/examples/templates/ipoib-rdma-shared/30-ipoibnetwork.yaml b/examples/templates/ipoib-rdma-shared/30-ipoibnetwork.yaml new file mode 100644 index 00000000..83d37e3e --- /dev/null +++ b/examples/templates/ipoib-rdma-shared/30-ipoibnetwork.yaml @@ -0,0 +1,38 @@ +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-a +spec: + networkNamespace: "default" + master: "ibs1f0" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-a" + } +----- +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-b +spec: + networkNamespace: "default" + master: "ibs1f1" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-b" + } +----- +apiVersion: mellanox.com/v1alpha1 +kind: IPoIBNetwork +metadata: + name: ipoib-network-c +spec: + networkNamespace: "default" + master: "ibs2f0" + ipam: | + { + "type": "nv-ipam", + "poolName": "ipoib-pool-c" + } \ No newline at end of file diff --git a/examples/templates/ipoib-rdma-shared/40-pod.yaml b/examples/templates/ipoib-rdma-shared/40-pod.yaml new file mode 100644 index 00000000..784c47bb --- /dev/null +++ b/examples/templates/ipoib-rdma-shared/40-pod.yaml @@ -0,0 +1,59 @@ +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-a + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-a +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_a: 1 + limits: + rdma/rdma_shared_device_a: 1 +----- +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-b + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-b +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_b: 1 + limits: + rdma/rdma_shared_device_b: 1 +----- +apiVersion: v1 +kind: Pod +metadata: + name: ipoib-test-pod-c + annotations: + k8s.v1.cni.cncf.io/networks: ipoib-network-c +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_c: 1 + limits: + rdma/rdma_shared_device_c: 1 \ No newline at end of file diff --git a/examples/templates/macvlan-rdma-shared/10-nicclusterpolicy.yaml b/examples/templates/macvlan-rdma-shared/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..f0c62bbf --- /dev/null +++ b/examples/templates/macvlan-rdma-shared/10-nicclusterpolicy.yaml @@ -0,0 +1,36 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + rdmaSharedDevicePlugin: + image: k8s-rdma-shared-dev-plugin + repository: |k8s-rdma-shared-dev-plugin-repository| + version: |k8s-rdma-shared-dev-plugin-version| + config: | + { + "configList": [ + { + "resourceName": "rdma_shared_device_a", + "rdmaHcaMax": 63, + "selectors": { + "ifNames": ["ens1f0"] + } + } + ] + } + nvIpam: + image: nvidia-k8s-ipam + repository: |nvidia-ipam-repository| + version: |nvidia-ipam-version| + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: |cni-plugins-repository| + version: |cni-plugins-version| + multus: + image: multus-cni + repository: |multus-repository| + version: |multus-version| \ No newline at end of file diff --git a/examples/templates/macvlan-rdma-shared/20-ippool.yaml b/examples/templates/macvlan-rdma-shared/20-ippool.yaml new file mode 100644 index 00000000..7685ee9f --- /dev/null +++ b/examples/templates/macvlan-rdma-shared/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: macvlan-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.4.0/24 + perNodeBlockSize: 50 + gateway: 192.168.4.1 \ No newline at end of file diff --git a/examples/templates/macvlan-rdma-shared/30-macvlannetwork.yaml b/examples/templates/macvlan-rdma-shared/30-macvlannetwork.yaml new file mode 100644 index 00000000..5778e8db --- /dev/null +++ b/examples/templates/macvlan-rdma-shared/30-macvlannetwork.yaml @@ -0,0 +1,14 @@ +apiVersion: mellanox.com/v1alpha1 +kind: MacvlanNetwork +metadata: + name: macvlan-network +spec: + networkNamespace: "default" + master: "ens1f0" + mode: "bridge" + mtu: 1500 + ipam: | + { + "type": "nv-ipam", + "poolName": "macvlan-pool" + } \ No newline at end of file diff --git a/examples/templates/macvlan-rdma-shared/40-pod.yaml b/examples/templates/macvlan-rdma-shared/40-pod.yaml new file mode 100644 index 00000000..c4b1c98e --- /dev/null +++ b/examples/templates/macvlan-rdma-shared/40-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: macvlan-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: macvlan-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + rdma/rdma_shared_device_a: 1 + limits: + rdma/rdma_shared_device_a: 1 \ No newline at end of file diff --git a/examples/templates/sriov-ib-rdma/10-nicclusterpolicy.yaml b/examples/templates/sriov-ib-rdma/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..e17d3804 --- /dev/null +++ b/examples/templates/sriov-ib-rdma/10-nicclusterpolicy.yaml @@ -0,0 +1,24 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + ofedDriver: + image: doca-driver + repository: |doca-driver-repository| + version: |doca-driver-version| + nvIpam: + image: nvidia-k8s-ipam + repository: |nvidia-ipam-repository| + version: |nvidia-ipam-version| + imagePullSecrets: [] + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: |cni-plugins-repository| + version: |cni-plugins-version| + multus: + image: multus-cni + repository: |multus-repository| + version: |multus-version| \ No newline at end of file diff --git a/examples/templates/sriov-ib-rdma/20-ippool.yaml b/examples/templates/sriov-ib-rdma/20-ippool.yaml new file mode 100644 index 00000000..369964c6 --- /dev/null +++ b/examples/templates/sriov-ib-rdma/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: sriov-ib-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.6.0/24 + perNodeBlockSize: 50 + gateway: 192.168.6.1 \ No newline at end of file diff --git a/examples/templates/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml b/examples/templates/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml new file mode 100644 index 00000000..6b7f4b18 --- /dev/null +++ b/examples/templates/sriov-ib-rdma/30-sriovnetworknodepolicy.yaml @@ -0,0 +1,17 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: infiniband-sriov + namespace: nvidia-network-operator +spec: + deviceType: netdevice + mtu: 1500 + nodeSelector: + feature.node.kubernetes.io/pci-15b3.present: "true" + nicSelector: + vendor: "15b3" + linkType: IB + isRdma: true + numVfs: 8 + priority: 90 + resourceName: mlnxnics \ No newline at end of file diff --git a/examples/templates/sriov-ib-rdma/40-sriovibnetwork.yaml b/examples/templates/sriov-ib-rdma/40-sriovibnetwork.yaml new file mode 100644 index 00000000..ef6c7bb3 --- /dev/null +++ b/examples/templates/sriov-ib-rdma/40-sriovibnetwork.yaml @@ -0,0 +1,14 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovIBNetwork +metadata: + name: sriov-ib-network + namespace: nvidia-network-operator +spec: + ipam: | + { + "type": "nv-ipam", + "poolName": "sriov-ib-pool" + } + resourceName: mlnxnics + linkState: enable + networkNamespace: default \ No newline at end of file diff --git a/examples/templates/sriov-ib-rdma/50-pod.yaml b/examples/templates/sriov-ib-rdma/50-pod.yaml new file mode 100644 index 00000000..bda92965 --- /dev/null +++ b/examples/templates/sriov-ib-rdma/50-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: sriov-ib-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: sriov-ib-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/mlnxnics: '1' + limits: + nvidia.com/mlnxnics: '1' \ No newline at end of file diff --git a/examples/templates/sriov-network-rdma/10-nicclusterpolicy.yaml b/examples/templates/sriov-network-rdma/10-nicclusterpolicy.yaml new file mode 100644 index 00000000..37d1820e --- /dev/null +++ b/examples/templates/sriov-network-rdma/10-nicclusterpolicy.yaml @@ -0,0 +1,23 @@ +apiVersion: mellanox.com/v1alpha1 +kind: NicClusterPolicy +metadata: + name: nic-cluster-policy +spec: + sriovDevicePlugin: + image: sriov-network-device-plugin + repository: |sriovnetop-repository| + version: |sriovnetop-sriov-device-plugin-version| + nvIpam: + image: nvidia-k8s-ipam + repository: |nvidia-ipam-repository| + version: |nvidia-ipam-version| + enableWebhook: false + secondaryNetwork: + cniPlugins: + image: plugins + repository: |cni-plugins-repository| + version: |cni-plugins-version| + multus: + image: multus-cni + repository: |multus-repository| + version: |multus-version| \ No newline at end of file diff --git a/examples/templates/sriov-network-rdma/20-ippool.yaml b/examples/templates/sriov-network-rdma/20-ippool.yaml new file mode 100644 index 00000000..6ff8a91b --- /dev/null +++ b/examples/templates/sriov-network-rdma/20-ippool.yaml @@ -0,0 +1,9 @@ +apiVersion: nv-ipam.nvidia.com/v1alpha1 +kind: IPPool +metadata: + name: sriov-pool + namespace: nvidia-network-operator +spec: + subnet: 192.168.2.0/24 + perNodeBlockSize: 50 + gateway: 192.168.2.1 \ No newline at end of file diff --git a/examples/templates/sriov-network-rdma/30-sriovnetworknodepolicy.yaml b/examples/templates/sriov-network-rdma/30-sriovnetworknodepolicy.yaml new file mode 100644 index 00000000..b7863092 --- /dev/null +++ b/examples/templates/sriov-network-rdma/30-sriovnetworknodepolicy.yaml @@ -0,0 +1,16 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: ethernet-sriov + namespace: nvidia-network-operator +spec: + deviceType: netdevice + mtu: 1500 + nodeSelector: + feature.node.kubernetes.io/pci-15b3.present: "true" + nicSelector: + vendor: "15b3" + isRdma: true + numVfs: 8 + priority: 90 + resourceName: sriov_resource \ No newline at end of file diff --git a/examples/templates/sriov-network-rdma/40-sriovnetwork.yaml b/examples/templates/sriov-network-rdma/40-sriovnetwork.yaml new file mode 100644 index 00000000..ed7ba428 --- /dev/null +++ b/examples/templates/sriov-network-rdma/40-sriovnetwork.yaml @@ -0,0 +1,13 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetwork +metadata: + name: sriov-rdma-network + namespace: nvidia-network-operator +spec: + ipam: | + { + "type": "nv-ipam", + "poolName": "sriov-pool" + } + networkNamespace: default + resourceName: sriov_resource \ No newline at end of file diff --git a/examples/templates/sriov-network-rdma/50-pod.yaml b/examples/templates/sriov-network-rdma/50-pod.yaml new file mode 100644 index 00000000..04045500 --- /dev/null +++ b/examples/templates/sriov-network-rdma/50-pod.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: sriov-test-pod + annotations: + k8s.v1.cni.cncf.io/networks: sriov-rdma-network +spec: + containers: + - name: test-container + image: mellanox/rping-test + command: ["/bin/bash", "-c", "sleep infinity"] + securityContext: + capabilities: + add: ["IPC_LOCK"] + resources: + requests: + nvidia.com/sriov_resource: '1' + limits: + nvidia.com/sriov_resource: '1' \ No newline at end of file