diff --git a/_topic_map.yml b/_topic_map.yml index 192249407bdc..f3887d5e5bfa 100644 --- a/_topic_map.yml +++ b/_topic_map.yml @@ -2015,12 +2015,6 @@ Topics: - Name: Scaling the Cluster Monitoring Operator File: scaling-cluster-monitoring-operator Distros: openshift-origin,openshift-enterprise -- Name: The Node Feature Discovery Operator - File: psap-node-feature-discovery-operator - Distros: openshift-origin,openshift-enterprise -- Name: The Driver Toolkit - File: psap-driver-toolkit - Distros: openshift-origin,openshift-enterprise - Name: Planning your environment according to object maximums File: planning-your-environment-according-to-object-maximums Distros: openshift-origin,openshift-enterprise @@ -2051,6 +2045,22 @@ Topics: File: ztp-deploying-disconnected Distros: openshift-webscale --- +Name: Hardware enablement +Dir: hardware_enablement +Distros: openshift-origin,openshift-enterprise +Topics: +- Name: About hardware enablement on OpenShift + File: about-hardware-enablement +- Name: The Driver Toolkit + File: psap-driver-toolkit + Distros: openshift-origin,openshift-enterprise +- Name: The Special Resource Operator + File: psap-special-resource-operator + Distros: openshift-origin,openshift-enterprise +- Name: The Node Feature Discovery Operator + File: psap-node-feature-discovery-operator + Distros: openshift-origin,openshift-enterprise +--- Name: Backup and restore Dir: backup_and_restore Distros: openshift-origin,openshift-enterprise diff --git a/hardware_enablement/about-hardware-enablement.adoc b/hardware_enablement/about-hardware-enablement.adoc new file mode 100644 index 000000000000..ca3530741f83 --- /dev/null +++ b/hardware_enablement/about-hardware-enablement.adoc @@ -0,0 +1,12 @@ +[id="about-hardware-enablement"] += About hardware enablement on OpenShift +include::modules/common-attributes.adoc[] +:context: about-hardware-enablement + +toc::[] + +Many applications require specialized hardware or software which depend on kernel modules or drivers. The recommended approach to load out-of-tree kernel modules on {op-system-first} nodes is to use driver containers. To deploy out-of-tree drivers at the same time as cluster installation (day-1) the kmods-via-containers framework can be used. For loading drivers or kernel modules on an existing {product-title} cluster (day-2) {product-title} offers several tools: + +* The Driver Toolkit is a container image which is a part of every OpenShift release. It contains the kernel packages and other common dependencies needed for building a driver or kernel module. The Driver Toolkit can be used as a base image for driver container image builds on the {product-title}. +* The Special Resource Operator (SRO) orchestrates the building and management of driver containers for loading kernel modules and drivers on an existing (day 2) OpenShift or Kubernetes cluster. +* The Node Feature Discovery (NFD) Operator for adding node labels for CPU capabilities, kernel version, PCIe device vendor IDs, and more. diff --git a/hardware_enablement/images b/hardware_enablement/images new file mode 120000 index 000000000000..5e67573196d8 --- /dev/null +++ b/hardware_enablement/images @@ -0,0 +1 @@ +../images \ No newline at end of file diff --git a/hardware_enablement/modules b/hardware_enablement/modules new file mode 120000 index 000000000000..464b823aca16 --- /dev/null +++ b/hardware_enablement/modules @@ -0,0 +1 @@ +../modules \ No newline at end of file diff --git a/scalability_and_performance/psap-driver-toolkit.adoc b/hardware_enablement/psap-driver-toolkit.adoc similarity index 68% rename from scalability_and_performance/psap-driver-toolkit.adoc rename to hardware_enablement/psap-driver-toolkit.adoc index 56a96d65151c..b3efd80ea7c0 100644 --- a/scalability_and_performance/psap-driver-toolkit.adoc +++ b/hardware_enablement/psap-driver-toolkit.adoc @@ -15,3 +15,7 @@ include::modules/psap-driver-toolkit.adoc[leveloffset=+1] include::modules/psap-driver-toolkit-pulling.adoc[leveloffset=+1] include::modules/psap-driver-toolkit-using.adoc[leveloffset=+1] + +.Additional resources + +* For more information about configuring registry storage for your cluster, see xref:../registry/configuring-registry-operator.adoc#registry-removed_configuring-registry-operator[Image Registry Operator in OpenShift Container Platform] \ No newline at end of file diff --git a/scalability_and_performance/psap-node-feature-discovery-operator.adoc b/hardware_enablement/psap-node-feature-discovery-operator.adoc similarity index 100% rename from scalability_and_performance/psap-node-feature-discovery-operator.adoc rename to hardware_enablement/psap-node-feature-discovery-operator.adoc diff --git a/hardware_enablement/psap-special-resource-operator.adoc b/hardware_enablement/psap-special-resource-operator.adoc new file mode 100644 index 000000000000..49ac1e08c0d8 --- /dev/null +++ b/hardware_enablement/psap-special-resource-operator.adoc @@ -0,0 +1,24 @@ +[id="special-resource-operator"] += The Special Resource Operator +include::modules/common-attributes.adoc[] +:context: special-resource-operator + +toc::[] + +Learn about the Special Resource Operator (SRO) and how you can use it to build and manage driver containers for loading kernel modules and device drivers on nodes in an {product-title} cluster. + + +:FeatureName: The Special Resource Operator +include::modules/technology-preview.adoc[leveloffset=+0] + +include::modules/psap-special-resource-operator.adoc[leveloffset=+1] + +include::modules/psap-special-resource-operator-installing.adoc[leveloffset=+1] + +include::modules/psap-special-resource-operator-using.adoc[leveloffset=+1] + +.Additional resources + +* For information about restoring the Image Registry Operator state before using the Special Resource Operator, see +xref:../registry/configuring-registry-operator.adoc#registry-removed_configuring-registry-operator[Image registry removed during installation]. +* For details about installing the NFD Operator see xref:psap-node-feature-discovery-operator.adoc#installing-the-node-feature-discovery-operator_node-feature-discovery-operator[Node Feature Discovery (NFD) Operator]. diff --git a/modules/psap-driver-toolkit-pulling.adoc b/modules/psap-driver-toolkit-pulling.adoc index fe4ced6f814a..bd3c06932373 100644 --- a/modules/psap-driver-toolkit-pulling.adoc +++ b/modules/psap-driver-toolkit-pulling.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // -// * scalability_and_performance/psap-driver-toolkit.adoc +// * hardware_enablement/psap-driver-toolkit.adoc [id="pulling-the-driver-toolkit"] = Pulling the Driver Toolkit container image @@ -18,8 +18,8 @@ The driver-toolkit image for the latest minor release will be tagged with the mi .Prerequisites -* Obtain the image pull secret needed to perform an installation of {product-title}, from the link:https://console.redhat.com/openshift/install/pull-secret[Pull Secret] page on the {cloud-redhat-com} site. -* Install the OpenShift CLI (`oc`). +* You have obtained the image pull secret needed to perform an installation of {product-title}, from the link:https://cloud.redhat.com/openshift/install/pull-secret[Pull Secret] page on the {cloud-redhat-com} site. +* You have installed the OpenShift CLI (`oc`). .Procedure diff --git a/modules/psap-driver-toolkit-using.adoc b/modules/psap-driver-toolkit-using.adoc index ef290ebba1c2..280ef4df0d0b 100644 --- a/modules/psap-driver-toolkit-using.adoc +++ b/modules/psap-driver-toolkit-using.adoc @@ -1,20 +1,26 @@ // Module included in the following assemblies: // -// * scalability_and_performance/psap-driver-toolkit.adoc +// * hardware_enablement/psap-driver-toolkit.adoc [id="using-the-driver-toolkit"] = Using the Driver Toolkit As an example, the Driver Toolkit can be used as the base image for building a very simple kernel module called simple-kmod. +[NOTE] +==== +The Driver Toolkit contains the necessary dependencies, `openssl, mokutil, keyutils`, needed to sign a kernel module. However, in this example, the simple-kmod kernel module is not signed and therefore cannot be loaded on systems with `Secure Boot` enabled. +==== + [id="create-simple-kmod-image"] == Build and run the simple-kmod driver container on a cluster .Prerequisites -* An {product-title} cluster -* Install the OpenShift CLI (`oc`). -* Log in as a user with `cluster-admin` privileges. +* You have a running {product-title} cluster. +* You have configured the cluster. +* You have installed the OpenShift CLI (`oc`). +* You are logged into the OpenShift CLI as a user with `cluster-admin` privileges. .Procedure diff --git a/modules/psap-driver-toolkit.adoc b/modules/psap-driver-toolkit.adoc index f59a32a0773f..49662504e3a2 100644 --- a/modules/psap-driver-toolkit.adoc +++ b/modules/psap-driver-toolkit.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // -// * scalability_and_performance/psap-driver-toolkit.adoc +// * hardware_enablement/psap-driver-toolkit.adoc [id="about-driver-toolkit"] = About the Driver Toolkit diff --git a/modules/psap-installing-node-feature-discovery-operator.adoc b/modules/psap-installing-node-feature-discovery-operator.adoc index 85bdb53f20cd..00db81ee7746 100644 --- a/modules/psap-installing-node-feature-discovery-operator.adoc +++ b/modules/psap-installing-node-feature-discovery-operator.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // -// * scalability_and_performance/psap-node-feature-discovery-operator.adoc +// * hardware_enablement/psap-node-feature-discovery-operator.adoc [id="installing-the-node-feature-discovery-operator_{context}"] = Installing the Node Feature Discovery Operator diff --git a/modules/psap-node-feature-discovery-operator.adoc b/modules/psap-node-feature-discovery-operator.adoc index 94f4d38c5072..ad86bf9beab9 100644 --- a/modules/psap-node-feature-discovery-operator.adoc +++ b/modules/psap-node-feature-discovery-operator.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // -// * scalability_and_performance/psap-node-feature-discovery-operator.adoc +// * hardware_enablement/psap-node-feature-discovery-operator.adoc ifeval::["{context}" == "red-hat-operators"] :operators: diff --git a/modules/psap-special-resource-operator-installing.adoc b/modules/psap-special-resource-operator-installing.adoc new file mode 100644 index 000000000000..4420e3535ccf --- /dev/null +++ b/modules/psap-special-resource-operator-installing.adoc @@ -0,0 +1,170 @@ +// Module included in the following assemblies: +// +// * hardware_enablement/psap-special-resource-operator.adoc + +[id="installing-the-special-resource-operator_{context}"] += Installing the Special Resource Operator + +As a cluster administrator, you can install the Special Resource Operator (SRO) using the {product-title} CLI or the web console. + +[id="install-operator-cli_{context}"] +== Installing the Special Resource Operator using the CLI + +As a cluster administrator, you can install SRO using the CLI. + +.Prerequisites + +* You have a running {product-title} cluster. +* You have installed the OpenShift CLI (`oc`). +* You are logged into the OpenShift CLI as a user with `cluster-admin` privileges. +* You have installed the Node Feature Discovery (NFD) Operator. + +.Procedure + +. Create a namespace for the Special Resource Operator. + +.. Create the `Namespace` custom resource (CR) that defines the `openshift-special-resource-operator` namespace, and then save the YAML in the `sro-namespace.yaml` file: ++ +[source,yaml] +---- +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-special-resource-operator +---- + +.. Create the namespace by running the following command: ++ +[source,terminal] +---- +$ oc create -f sro-namespace.yaml +---- + +. Install SRO in the namespace you created in the previous step. + +.. Create the `OperatorGroup` CR and save the YAML in the `sro-operatorgroup.yaml` file: ++ +[source,yaml] +---- +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + generateName: openshift-special-resource-operator- + name: openshift-special-resource-operator + namespace: openshift-special-resource-operator +spec: + targetNamespaces: + - openshift-special-resource-operator +---- + +.. Run the following `oc create` command to create the `OperatorGroup` CR: ++ +[source,terminal] +---- +$ oc create -f sro-operatorgroup.yaml +---- + +.. Run the following command to get the `channel` value required for the next step. ++ +[source,terminal] +---- +$ oc get packagemanifest special-resource-operator -n openshift-marketplace -o jsonpath='{.status.defaultChannel}' +---- ++ +.Example output +[source,terminal] +---- +4.9 +---- + +.. Create the `Subscription` CR and save the YAML in the `sro-sub.yaml` file. If the output from the previous command is different than the `spec.channel` field, replace it with the output from the previous command: ++ +.Example Subscription +[source,yaml] +---- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: special-resource-operator + namespace: openshift-special-resource-operator +spec: + channel: "4.9" + installPlanApproval: Automatic + name: special-resource-operator + source: redhat-operators + sourceNamespace: openshift-marketplace +---- + +.. Create the subscription object by running the following command: ++ +[source,terminal] +---- +$ oc create -f sro-sub.yaml +---- + +.. Change to the `openshift-special-resource-operator` project: ++ +[source,terminal] +---- +$ oc project openshift-special-resource-operator +---- + +.Verification + +* To verify that the Operator deployment is successful, run: ++ +[source,terminal] +---- +$ oc get pods +---- ++ +.Example output +[source,terminal] +---- +NAME READY STATUS RESTARTS AGE +special-resource-controller-manager-7bfb544d45-xx62r 2/2 Running 0 2m28s +---- ++ +A successful deployment shows a `Running` status. + +[id="install-operator-web-console_{context}"] +== Installing the Special Resource Operator using the web console + +As a cluster administrator, you can install the Special Resource Operator using the web console. + +[NOTE] +==== +It is recommended that you create the `Namespace` as detailed in the previous section. +==== + +.Procedure + +. In the {product-title} web console, click *Operators* -> *OperatorHub*. + +. Choose *Special Resource Operator* from the list of available Operators, and then click *Install*. + +. On the *Install Operator* page, select *a specific namespace on the cluster*, select the namespace created in the previous section, and then click *Install*. + +.Verification + +To verify that the Special Resource Operator installed successfully: + +. Navigate to the *Operators* -> *Installed Operators* page. +. Ensure that *Special Resource Operator* is listed in the *openshift-special-resource-operator* project with a *Status* of *InstallSucceeded*. ++ +[NOTE] +==== +During installation, an Operator might display a *Failed* status. If the installation later succeeds with an *InstallSucceeded* message, you can ignore the *Failed* message. +==== + +.Troubleshooting + +If the Operator does not appear as installed, to troubleshoot further: + +. Navigate to the *Operators* -> *Installed Operators* page and inspect the *Operator Subscriptions* and *Install Plans* tabs for any failure or errors under *Status*. +. Navigate to the *Workloads* -> *Pods* page and check the logs for pods in the `openshift-special-resource-operator` project. + +[NOTE] +==== +The Node Feature Discovery (NFD) Operator is a dependency of the Special Resource Operator (SRO). If the NFD Operator is not installed before installing SRO, the Operator Lifecycle Manager will automatically install the NFD Operator. However, the required Node Feature Discovery operand will not be deployed automatically. The Node Feature Discovery Operator documentation provides details about how to deploy NFD using the NFD Operator. +==== \ No newline at end of file diff --git a/modules/psap-special-resource-operator-using.adoc b/modules/psap-special-resource-operator-using.adoc new file mode 100644 index 000000000000..a11d461e9c17 --- /dev/null +++ b/modules/psap-special-resource-operator-using.adoc @@ -0,0 +1,404 @@ +// Module included in the following assemblies: +// +// * hardware_enablement/psap-special-resource-operator.adoc + +[id="using-the-special-resource-operator"] += Using the Special Resource Operator + +As an example, the Special Resource Operator can be used to manage the build and deployment of a driver container for a minimal kernel module called simple-kmod. + +The objects required to build and deploy the kernel module can be defined using a Helm chart. The SRO image contains a local repository of Helm charts including the templates for deploying the simple-kmod. SRO can also read the templates from a ConfigMap. Both methods are demonstrated in the following examples. + +[id="deploy-simple-kmod-using-local-chart"] +== Build and run the simple-kmod SpecialResource using the SRO image's local manifests. + +.Prerequisites + +* You have a running {product-title} cluster. +* You have set the Image Registry Operator state to Managed for your cluster. See the additional resources section below for more information. +* You have installed the OpenShift CLI (`oc`). +* You are logged into the OpenShift CLI as a user with `cluster-admin` privileges. +* You have installed the Node Feature Discovery (NFD) Operator. +* You have installed the Special Resource Operator. + +.Procedure +. To deploy the simple-kmod using the SRO image's local Helm repository, the following SpecialResource manifest can be used. Save this YAML as `simple-kmod-local.yaml` ++ +[source,yaml] +---- +apiVersion: sro.openshift.io/v1beta1 +kind: SpecialResource +metadata: + name: simple-kmod +spec: + namespace: simple-kmod + chart: + name: simple-kmod + version: 0.0.1 + repository: + name: example + url: file:///charts/example + set: + kind: Values + apiVersion: sro.openshift.io/v1beta1 + kmodNames: ["simple-kmod", "simple-procfs-kmod"] + buildArgs: + - name: "KMODVER" + value: "SRO" + driverContainer: + source: + git: + ref: "master" + uri: "https://github.com/openshift-psap/kvc-simple-kmod.git" +---- + +. Create the SpecialResource with: ++ +[source,terminal] +---- +$ oc create -f simple-kmod-local.yaml +---- + +. The simple-kmod resources are deployed in the simple-kmod namespace as specified in the object manifest. After a short time, the build pod for the simple-kmod driver container runs. The build completes after a few minutes and then the driver container pods run. ++ +[source,terminal] +---- +$ oc get pods -n simple-kmod +---- ++ +.Example output +[source,terminal] +---- +NAME READY STATUS RESTARTS AGE +simple-kmod-driver-build-12813789169ac0ee-1-build 0/1 Completed 0 7m12s +simple-kmod-driver-container-12813789169ac0ee-mjsnh 1/1 Running 0 8m2s +simple-kmod-driver-container-12813789169ac0ee-qtkff 1/1 Running 0 8m2s +---- + +. You can display the logs of the simple-kmod driver container image build using the oc log command along with the build pod name from the previous command: ++ +[source,terminal] +---- +$ oc logs pod/simple-kmod-driver-build-12813789169ac0ee-1-build -n simple-kmod +---- + +. To verify that the simple-kmod kernel modules are loaded, execute the `lsmod` command in one of the driver container pods from step 3. ++ +[source,terminal] +---- +$ oc exec -n simple-kmod -it pod/simple-kmod-driver-container-12813789169ac0ee-mjsnh -- lsmod | grep simple +---- ++ +.Example output +[source,terminal] +---- +simple_procfs_kmod 16384 0 +simple_kmod 16384 0 +---- + +To remove the simple-kmod kernel module from the node, delete the simple-kmod SpecialResource API object with `oc delete`. The kernel module is unloaded when the driver container pod is deleted. + +[id="deploy-simple-kmod-using-configmap-chart"] +== Build and run the simple-kmod SpecialResource using a ConfigMap + +This example shows how the loading of any kernel module can be managed by SRO, by storing the Helm chart templates in a ConfigMap. Like in the previous example, the simple-kmod kernel module is used. + +.Prerequisites + +* You have a running {product-title} cluster. +* You have set the Image Registry Operator state to Managed for your cluster. +* You have installed the OpenShift CLI (`oc`). +* You are logged into the OpenShift CLI as a user with `cluster-admin` privileges. +* You have installed Node Feature Discovery (NFD) Operator. +* You have installed the Special Resource Operator. +* You have installed the Helm CLI (`helm`). + +.Procedure +. To create a simple-kmod SpecialResource, you will need to define an ImageStream and BuildConfig to build the image, and a ServiceAccount, Role, RoleBinding, and DaemonSet to run the container. The ServiceAccount, Role, and RoleBinding are required to run the DaemonSet with the privileged securityContext so that the kernel module can be loaded. +.. Create a directory for the recipe, and change into the `templates` directory ++ +[source,terminal] +---- +mkdir -p chart/simple-kmod-0.0.1/templates +---- ++ +[source,terminal] +---- +cd chart/simple-kmod-0.0.1/templates +---- + +.. Save this YAML template for the ImageStream and BuildConfig in the `templates` directory as `0000-buildconfig.yaml`. Note that the templates such as `{{.Values.specialresource.metadata.name}}` will be filled in by SRO, based on fields in the SpecialResource CR and variables known to the Operator such as `{{.Values.KernelFullVersion}}` ++ +[source,yaml] +---- +apiVersion: image.openshift.io/v1 +kind: ImageStream +metadata: + labels: + app: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} +spec: {} +--- +apiVersion: build.openshift.io/v1 +kind: BuildConfig +metadata: + labels: + app: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverBuild}} + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverBuild}} + annotations: + specialresource.openshift.io/wait: "true" + specialresource.openshift.io/driver-container-vendor: simple-kmod + specialresource.openshift.io/kernel-affine: "true" +spec: + nodeSelector: + node-role.kubernetes.io/worker: "" + runPolicy: "Serial" + triggers: + - type: "ConfigChange" + - type: "ImageChange" + source: + git: + ref: {{.Values.specialresource.spec.driverContainer.source.git.ref}} + uri: {{.Values.specialresource.spec.driverContainer.source.git.uri}} + type: Git + strategy: + dockerStrategy: + dockerfilePath: Dockerfile.SRO + buildArgs: + - name: "IMAGE" + value: {{ .Values.driverToolkitImage }} + {{- range $arg := .Values.buildArgs }} + - name: {{ $arg.name }} + value: {{ $arg.value }} + {{- end }} + - name: KVER + value: {{ .Values.kernelFullVersion }} + output: + to: + kind: ImageStreamTag + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}}:v{{.Values.kernelFullVersion}} +---- + +.. Save this YAML template for the RBAC and DaemonSet in the templates directory as `1000-driver-container.yaml` ++ +[source,yaml] +---- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} +rules: +- apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + verbs: + - use + resourceNames: + - privileged +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} +subjects: +- kind: ServiceAccount + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + namespace: {{.Values.specialresource.spec.namespace}} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + annotations: + specialresource.openshift.io/wait: "true" + specialresource.openshift.io/state: "driver-container" + specialresource.openshift.io/driver-container-vendor: simple-kmod + specialresource.openshift.io/kernel-affine: "true" +spec: + updateStrategy: + type: OnDelete + selector: + matchLabels: + app: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + template: + metadata: + # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler + # reserves resources for critical add-on pods so that they can be rescheduled after + # a failure. This annotation works in tandem with the toleration below. + annotations: + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + app: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + spec: + serviceAccount: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + serviceAccountName: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + containers: + - image: image-registry.openshift-image-registry.svc:5000/{{.Values.specialresource.spec.namespace}}/{{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}}:v{{.Values.kernelFullVersion}} + name: {{.Values.specialresource.metadata.name}}-{{.Values.groupName.driverContainer}} + imagePullPolicy: Always + command: ["/sbin/init"] + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "systemctl stop kmods-via-containers@{{.Values.specialresource.metadata.name}}"] + securityContext: + privileged: true + nodeSelector: + node-role.kubernetes.io/worker: "" + feature.node.kubernetes.io/kernel-version.full: "{{.Values.KernelFullVersion}}" +---- + +.. Change into the `chart/simple-kmod-0.0.1` directory ++ +[source, terminal] +---- +cd .. +---- + +.. Save the following YAML for the Chart as `Chart.yaml` in the `chart/simple-kmod-0.0.1` directory. ++ +[source, yaml] +---- +apiVersion: v2 +name: simple-kmod +description: Simple kmod will deploy a simple kmod driver-container +icon: https://avatars.githubusercontent.com/u/55542927 +type: application +version: 0.0.1 +appVersion: 1.0.0 +---- + +. Now that the structure for the Helm chart is prepared, create the chart with the helm package command, from the `chart` directory: ++ +[source,terminal] +---- +cd .. +---- ++ +[source, terminal] +---- +helm package simple-kmod-0.0.1/ +---- ++ +.Example output +[source,terminal] +---- +Successfully packaged chart and saved it to: /data/dagray/git/dagrayvid/special-resource-operator/yaml-for-docs/chart/simple-kmod-0.0.1/simple-kmod-0.0.1.tgz +---- + +. Create a ConfigMap to store the chart files ++ +[source,terminal] +---- +mkdir cm +---- ++ +[source, terminal] +---- +cp simple-kmod-0.0.1.tgz cm/simple-kmod-0.0.1.tgz +---- ++ +[source, terminal] +---- +helm repo index cm --url=cm://simple-kmod/simple-kmod-chart +---- ++ +[source, terminal] +---- +oc create namespace simple-kmod +---- ++ +[source, terminal] +---- +oc create cm simple-kmod-chart --from-file=cm/index.yaml --from-file=cm/simple-kmod-0.0.1.tgz -n simple-kmod +---- + + +. Use the following SpecialResource manifest to deploy the simple-kmod using the Helm chart that you created in the ConfigMap. The `spec.chart.repository.url` field tells SRO to look for the chart in a ConfigMap. Optionally, uncomment the #debug: true line, to have the YAML files in the chart printed in full in the operator logs and to verify that the logs are created and templated properly. Save this YAML as `simple-kmod-configmap.yaml` ++ +[source,yaml] +---- +apiVersion: sro.openshift.io/v1beta1 +kind: SpecialResource +metadata: + name: simple-kmod +spec: + #debug: true + namespace: simple-kmod + chart: + name: simple-kmod + version: 0.0.1 + repository: + name: example + url: cm://simple-kmod/simple-kmod-chart + set: + kind: Values + apiVersion: sro.openshift.io/v1beta1 + kmodNames: ["simple-kmod", "simple-procfs-kmod"] + buildArgs: + - name: "KMODVER" + value: "SRO" + driverContainer: + source: + git: + ref: "master" + uri: "https://github.com/openshift-psap/kvc-simple-kmod.git" +---- + +. Create the SpecialResource with: ++ +[source,terminal] +---- +$ oc create -f simple-kmod-configmap.yaml +---- + +. The simple-kmod resources are deployed in the namespace `simple-kmod` as specified in the object manifest. After a short time, the build pod for the simple-kmod driver container should start running. After a few minutes, the build should complete and the driver container pods should start Running. ++ +[source,terminal] +---- +$ oc get pods -n simple-kmod +---- ++ +.Example output +[source,terminal] +---- +NAME READY STATUS RESTARTS AGE +simple-kmod-driver-build-12813789169ac0ee-1-build 0/1 Completed 0 7m12s +simple-kmod-driver-container-12813789169ac0ee-mjsnh 1/1 Running 0 8m2s +simple-kmod-driver-container-12813789169ac0ee-qtkff 1/1 Running 0 8m2s +---- + +. The logs of the simple-kmod driver container image build can be displayed by getting the logs of the build pod. For example, using the pod name from the previous command. ++ +[source,terminal] +---- +$ oc logs pod/simple-kmod-driver-build-12813789169ac0ee-1-build -n simple-kmod +---- + +. To verify that the simple-kmod kernel modules are loaded, execute the lsmod command in one of the driver container pods. ++ +[source,terminal] +---- +$ oc exec -n simple-kmod -it pod/simple-kmod-driver-container-12813789169ac0ee-mjsnh -- lsmod | grep simple +---- ++ +.Example output +[source,terminal] +---- +simple_procfs_kmod 16384 0 +simple_kmod 16384 0 +---- + +To remove the simple-kmod kernel module from the node, delete the simple-kmod SpecialResource API object with `oc delete`. The kernel module is unloaded when the driver container pod is deleted. diff --git a/modules/psap-special-resource-operator.adoc b/modules/psap-special-resource-operator.adoc new file mode 100644 index 000000000000..66610902a513 --- /dev/null +++ b/modules/psap-special-resource-operator.adoc @@ -0,0 +1,12 @@ +// Module included in the following assemblies: +// +// * hardware_enablement/psap-special-resource-operator.adoc + +[id="about-special-resource-operator"] += About the Special Resource Operator + +[discrete] +== Background +The Special Resource Operator (SRO) helps you manage the deployment of kernel modules and drivers on an existing (day 2) {product-title} cluster. SRO can be used for a case as simple as building and loading a single kernel module, or as complex as deploying the driver, device plugin, and monitoring stack for a hardware accelerator. + +For loading kernel modules, SRO is designed around the use of driver containers. Driver containers are increasingly being used in cloud-native environments, especially when run on pure container operating systems to deliver hardware drivers to the host. Driver containers extend the kernel stack beyond the out-of-the-box software and hardware features of a specific kernel. Driver containers work on various container capable Linux distributions. With driver containers the host operating system stays "clean" and there will not be any clash between different library versions or binaries on the host. diff --git a/modules/psap-using-node-feature-discovery-operator.adoc b/modules/psap-using-node-feature-discovery-operator.adoc index d789bcdb8d84..7229474ec08b 100644 --- a/modules/psap-using-node-feature-discovery-operator.adoc +++ b/modules/psap-using-node-feature-discovery-operator.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // -// * scalability_and_performance/psap-node-feature-discovery-operator.adoc +// * hardware_enablement/psap-node-feature-discovery-operator.adoc [id="using-the-node-feature-discovery-operator_{context}"] = Using the Node Feature Discovery Operator