From 1fabb79515172d32b25fdcfdd36b7f1a15761f26 Mon Sep 17 00:00:00 2001 From: technowhizz <7688823+technowhizz@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:50:27 +0000 Subject: [PATCH 1/9] Change conditional to search for 'Intel' Changes the conditional to search for 'Intel' in the ansible_facts.processor variable as the first item in the list is not always consistent. --- roles/iommu/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index 4ca9872..075a964 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -7,7 +7,7 @@ - intel_iommu=on kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] - ^intel_iommu= - when: "'Intel' in ansible_facts.processor.0" + when: ansible_facts.processor | select('search', 'Intel') | list | length > 0 - name: Set iommu=pt ansible.builtin.include_role: From 20708e906159bc837940f59f830d6f8d0d09b8d0 Mon Sep 17 00:00:00 2001 From: technowhizz <7688823+technowhizz@users.noreply.github.com> Date: Wed, 27 Mar 2024 13:37:08 +0000 Subject: [PATCH 2/9] Add support for specfying vfio id's --- roles/iommu/README.md | 16 ++++++++++++++++ roles/iommu/handlers/main.yml | 8 ++++++++ roles/iommu/tasks/main.yml | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 roles/iommu/handlers/main.yml diff --git a/roles/iommu/README.md b/roles/iommu/README.md index cb9882b..8efce83 100644 --- a/roles/iommu/README.md +++ b/roles/iommu/README.md @@ -20,3 +20,19 @@ become: true ``` + +Or if you want the node to reboot automatically + +``` +--- +- name: Enable IOMMU + hosts: iommu + tasks: + - import_role: + name: stackhpc.linux.iommu + handlers: + - name: reboot + reboot: + become: true + +``` diff --git a/roles/iommu/handlers/main.yml b/roles/iommu/handlers/main.yml new file mode 100644 index 0000000..ce03b6e --- /dev/null +++ b/roles/iommu/handlers/main.yml @@ -0,0 +1,8 @@ +--- +- name: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r) + become: true + changed_when: true diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index 075a964..23f4c75 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -1,12 +1,43 @@ --- +- name: Template dracut config for vfio + ansible.builtin.blockinfile: + path: /etc/dracut.conf.d/gpu-vfio.conf + block: | + add_drivers+="vfio vfio_iommu_type1 vfio_pci vfio_virqfd" + owner: root + group: root + mode: "0660" + create: true + become: true + when: iommu_vfio_pci_ids is defined + notify: + - Regenerate initramfs + - reboot + +- name: Add vfio to modules-load.d + ansible.builtin.blockinfile: + path: /etc/modules-load.d/vfio.conf + block: | + vfio + vfio_iommu_type1 + vfio_pci + vfio_virqfd + owner: root + group: root + mode: "0664" + create: true + become: true + when: iommu_vfio_pci_ids is defined + notify: reboot + - name: Add iommu to kernel command line (Intel) ansible.builtin.include_role: name: stackhpc.linux.grubcmdline vars: - kernel_cmdline: # noqa: var-naming[no-role-prefix] - - intel_iommu=on + kernel_cmdline: "{{ ['intel_iommu=on'] + (['vfio-pci.ids=' + iommu_vfio_pci_ids] if iommu_vfio_pci_ids is defined else []) }}" # noqa: var-naming[no-role-prefix] kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] - ^intel_iommu= + - ^vfio-pci\.ids= when: ansible_facts.processor | select('search', 'Intel') | list | length > 0 - name: Set iommu=pt From 08d611f5a97ce9697333828da4b91e88494d55ab Mon Sep 17 00:00:00 2001 From: technowhizz <7688823+technowhizz@users.noreply.github.com> Date: Wed, 27 Mar 2024 16:29:00 +0000 Subject: [PATCH 3/9] Add GPU Passthrough role --- roles/gpu_passthrough/README.md | 34 +++++++++++++++++++ roles/gpu_passthrough/defaults/main.yml | 1 + roles/gpu_passthrough/handlers/main.yml | 8 +++++ roles/gpu_passthrough/tasks/main.yml | 45 +++++++++++++++++++++++++ 4 files changed, 88 insertions(+) create mode 100644 roles/gpu_passthrough/README.md create mode 100644 roles/gpu_passthrough/defaults/main.yml create mode 100644 roles/gpu_passthrough/handlers/main.yml create mode 100644 roles/gpu_passthrough/tasks/main.yml diff --git a/roles/gpu_passthrough/README.md b/roles/gpu_passthrough/README.md new file mode 100644 index 0000000..2169654 --- /dev/null +++ b/roles/gpu_passthrough/README.md @@ -0,0 +1,34 @@ +# stackhpc.linux.iommu + +## Example playbook + +``` +--- +- name: Enable GPU Passthrough + hosts: gpu_passthrough + tasks: + - import_role: + name: stackhpc.linux.gpu_passthrough + handlers: + - name: reboot + fail: + msg: "Please reboot your hypervisor and re-run your host configure to continue" + become: true + +``` + +Or if you want the machine to reboot automatically: + +``` +--- +- name: Enable GPU Passthrough + hosts: gpu_passthrough + tasks: + - import_role: + name: stackhpc.linux.gpu_passthrough + handlers: + - name: reboot + reboot: + become: true + +``` diff --git a/roles/gpu_passthrough/defaults/main.yml b/roles/gpu_passthrough/defaults/main.yml new file mode 100644 index 0000000..ed97d53 --- /dev/null +++ b/roles/gpu_passthrough/defaults/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/gpu_passthrough/handlers/main.yml b/roles/gpu_passthrough/handlers/main.yml new file mode 100644 index 0000000..ce03b6e --- /dev/null +++ b/roles/gpu_passthrough/handlers/main.yml @@ -0,0 +1,8 @@ +--- +- name: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r) + become: true + changed_when: true diff --git a/roles/gpu_passthrough/tasks/main.yml b/roles/gpu_passthrough/tasks/main.yml new file mode 100644 index 0000000..5760a1d --- /dev/null +++ b/roles/gpu_passthrough/tasks/main.yml @@ -0,0 +1,45 @@ +--- +- name: Blacklist nouveau + ansible.builtin.blockinfile: + path: /etc/modprobe.d/blacklist-nouveau.conf + block: | + blacklist nouveau + options nouveau modeset=0 + mode: "0664" + owner: root + group: root + create: true + become: true + notify: + - Regenerate initramfs + - reboot # no-qa + +- name: Ignore unsupported model specific registers + # Occasionally, applications running in the VM may crash unexpectedly, + # whereas they would run normally on a physical machine. If, while + # running dmesg -wH, you encounter an error mentioning MSR, the reason + # for those crashes is that KVM injects a General protection fault (GPF) + # when the guest tries to access unsupported Model-specific registers + # (MSRs) - this often results in guest applications/OS crashing. A + # number of those issues can be solved by passing the ignore_msrs=1 + # option to the KVM module, which will ignore unimplemented MSRs. + # source: https://wiki.archlinux.org/index.php/QEMU + ansible.builtin.blockinfile: + path: /etc/modprobe.d/kvm.conf + block: | + options kvm ignore_msrs=Y + # This option is not available in centos 7 as the kernel is too old, + # but it can help with dmesg spam in newer kernels (centos8?). Sample + # dmesg log message: + # [ +0.000002] kvm [8348]: vcpu0, guest rIP: 0xffffffffb0a767fa ignored rdmsr: 0x619 + # options kvm report_ignored_msrs=N + mode: "0664" + owner: root + group: root + create: true + become: true + notify: reboot # no-qa + +- name: Add IOMMU config to kernel command line + ansible.builtin.include_role: + name: stackhpc.linux.iommu From bdcc85a3937c3589001faa8715a426e8f75c4fdc Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Thu, 2 May 2024 15:15:50 +0100 Subject: [PATCH 4/9] Set vfio pci ids for AMD cards --- roles/iommu/tasks/main.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index 23f4c75..82c0654 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -34,12 +34,20 @@ ansible.builtin.include_role: name: stackhpc.linux.grubcmdline vars: - kernel_cmdline: "{{ ['intel_iommu=on'] + (['vfio-pci.ids=' + iommu_vfio_pci_ids] if iommu_vfio_pci_ids is defined else []) }}" # noqa: var-naming[no-role-prefix] + kernel_cmdline: "{{ ['intel_iommu=on'] }}" # noqa: var-naming[no-role-prefix] kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] - ^intel_iommu= - ^vfio-pci\.ids= when: ansible_facts.processor | select('search', 'Intel') | list | length > 0 +- name: Add vfio pci ids to kernel command line + ansible.builtin.include_role: + name: stackhpc.linux.grubcmdline + vars: + kernel_cmdline: "{{ ['vfio-pci.ids=' + iommu_vfio_pci_ids] if iommu_vfio_pci_ids is defined else [] }}" # noqa: var-naming[no-role-prefix] + kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] + - ^vfio-pci\.ids= + - name: Set iommu=pt ansible.builtin.include_role: name: stackhpc.linux.grubcmdline From ed1e2079e69f91cd433c53bd8b7ee6b7ffacba09 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 9 Jul 2024 08:50:33 +0000 Subject: [PATCH 5/9] Add debian handler --- roles/gpu_passthrough/handlers/main.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/roles/gpu_passthrough/handlers/main.yml b/roles/gpu_passthrough/handlers/main.yml index ce03b6e..bbb36ab 100644 --- a/roles/gpu_passthrough/handlers/main.yml +++ b/roles/gpu_passthrough/handlers/main.yml @@ -1,8 +1,20 @@ --- -- name: Regenerate initramfs +- name: Regenerate initramfs (RedHat) + listen: Regenerate initramfs ansible.builtin.shell: |- #!/bin/bash set -eux dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r) become: true changed_when: true + when: ansible_facts.os_family == 'RedHat' + +- name: Regenerate initramfs (Debian) + listen: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + update-initramfs -u -k $(uname -r) + become: true + changed_when: true + when: ansible_facts.os_family == 'Debian' From 0a5dc507e3a84b17ee4411fa6d297f38901db9f0 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 9 Jul 2024 11:14:10 +0100 Subject: [PATCH 6/9] Update handlers for Ubuntu support --- roles/iommu/handlers/main.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/roles/iommu/handlers/main.yml b/roles/iommu/handlers/main.yml index ce03b6e..bbb36ab 100644 --- a/roles/iommu/handlers/main.yml +++ b/roles/iommu/handlers/main.yml @@ -1,8 +1,20 @@ --- -- name: Regenerate initramfs +- name: Regenerate initramfs (RedHat) + listen: Regenerate initramfs ansible.builtin.shell: |- #!/bin/bash set -eux dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r) become: true changed_when: true + when: ansible_facts.os_family == 'RedHat' + +- name: Regenerate initramfs (Debian) + listen: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + update-initramfs -u -k $(uname -r) + become: true + changed_when: true + when: ansible_facts.os_family == 'Debian' From 33ec084f2d5ea3d7ee5fc9b98ea79c40e0850f42 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 9 Jul 2024 11:15:20 +0100 Subject: [PATCH 7/9] Only configure dracut on RedHat --- roles/iommu/tasks/main.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index 82c0654..fe85950 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -9,7 +9,9 @@ mode: "0660" create: true become: true - when: iommu_vfio_pci_ids is defined + when: + - iommu_vfio_pci_ids is defined + - ansible_facts.os_family == 'Debian' notify: - Regenerate initramfs - reboot From b11018fb6acbaf0aeda07478faa60c870b2c1f83 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 9 Jul 2024 14:08:53 +0100 Subject: [PATCH 8/9] whitespace fix --- roles/iommu/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index 03ca54d..ae6002b 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -9,7 +9,7 @@ mode: "0660" create: true become: true - when: + when: - iommu_vfio_pci_ids is defined - ansible_facts.os_family == 'Debian' notify: From 555da331ca14281c25ffe766c09c654d1db281d8 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Tue, 9 Jul 2024 14:09:53 +0100 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Dawud <7688823+technowhizz@users.noreply.github.com> --- roles/iommu/tasks/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index ae6002b..cb69eb5 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -45,9 +45,10 @@ ansible.builtin.include_role: name: stackhpc.linux.grubcmdline vars: - kernel_cmdline: "{{ ['vfio-pci.ids=' + iommu_vfio_pci_ids] if iommu_vfio_pci_ids is defined else [] }}" # noqa: var-naming[no-role-prefix] + kernel_cmdline: "{{ ['vfio-pci.ids=' + iommu_vfio_pci_ids] }}" # noqa: var-naming[no-role-prefix] kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] - ^vfio-pci\.ids= + when: iommu_vfio_pci_ids is defined - name: Set iommu=pt ansible.builtin.include_role: