diff --git a/roles/gpu_passthrough/README.md b/roles/gpu_passthrough/README.md new file mode 100644 index 0000000..2169654 --- /dev/null +++ b/roles/gpu_passthrough/README.md @@ -0,0 +1,34 @@ +# stackhpc.linux.iommu + +## Example playbook + +``` +--- +- name: Enable GPU Passthrough + hosts: gpu_passthrough + tasks: + - import_role: + name: stackhpc.linux.gpu_passthrough + handlers: + - name: reboot + fail: + msg: "Please reboot your hypervisor and re-run your host configure to continue" + become: true + +``` + +Or if you want the machine to reboot automatically: + +``` +--- +- name: Enable GPU Passthrough + hosts: gpu_passthrough + tasks: + - import_role: + name: stackhpc.linux.gpu_passthrough + handlers: + - name: reboot + reboot: + become: true + +``` diff --git a/roles/gpu_passthrough/defaults/main.yml b/roles/gpu_passthrough/defaults/main.yml new file mode 100644 index 0000000..ed97d53 --- /dev/null +++ b/roles/gpu_passthrough/defaults/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/gpu_passthrough/handlers/main.yml b/roles/gpu_passthrough/handlers/main.yml new file mode 100644 index 0000000..bbb36ab --- /dev/null +++ b/roles/gpu_passthrough/handlers/main.yml @@ -0,0 +1,20 @@ +--- +- name: Regenerate initramfs (RedHat) + listen: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r) + become: true + changed_when: true + when: ansible_facts.os_family == 'RedHat' + +- name: Regenerate initramfs (Debian) + listen: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + update-initramfs -u -k $(uname -r) + become: true + changed_when: true + when: ansible_facts.os_family == 'Debian' diff --git a/roles/gpu_passthrough/tasks/main.yml b/roles/gpu_passthrough/tasks/main.yml new file mode 100644 index 0000000..5760a1d --- /dev/null +++ b/roles/gpu_passthrough/tasks/main.yml @@ -0,0 +1,45 @@ +--- +- name: Blacklist nouveau + ansible.builtin.blockinfile: + path: /etc/modprobe.d/blacklist-nouveau.conf + block: | + blacklist nouveau + options nouveau modeset=0 + mode: "0664" + owner: root + group: root + create: true + become: true + notify: + - Regenerate initramfs + - reboot # no-qa + +- name: Ignore unsupported model specific registers + # Occasionally, applications running in the VM may crash unexpectedly, + # whereas they would run normally on a physical machine. If, while + # running dmesg -wH, you encounter an error mentioning MSR, the reason + # for those crashes is that KVM injects a General protection fault (GPF) + # when the guest tries to access unsupported Model-specific registers + # (MSRs) - this often results in guest applications/OS crashing. A + # number of those issues can be solved by passing the ignore_msrs=1 + # option to the KVM module, which will ignore unimplemented MSRs. + # source: https://wiki.archlinux.org/index.php/QEMU + ansible.builtin.blockinfile: + path: /etc/modprobe.d/kvm.conf + block: | + options kvm ignore_msrs=Y + # This option is not available in centos 7 as the kernel is too old, + # but it can help with dmesg spam in newer kernels (centos8?). Sample + # dmesg log message: + # [ +0.000002] kvm [8348]: vcpu0, guest rIP: 0xffffffffb0a767fa ignored rdmsr: 0x619 + # options kvm report_ignored_msrs=N + mode: "0664" + owner: root + group: root + create: true + become: true + notify: reboot # no-qa + +- name: Add IOMMU config to kernel command line + ansible.builtin.include_role: + name: stackhpc.linux.iommu diff --git a/roles/iommu/README.md b/roles/iommu/README.md index cb9882b..8efce83 100644 --- a/roles/iommu/README.md +++ b/roles/iommu/README.md @@ -20,3 +20,19 @@ become: true ``` + +Or if you want the node to reboot automatically + +``` +--- +- name: Enable IOMMU + hosts: iommu + tasks: + - import_role: + name: stackhpc.linux.iommu + handlers: + - name: reboot + reboot: + become: true + +``` diff --git a/roles/iommu/handlers/main.yml b/roles/iommu/handlers/main.yml new file mode 100644 index 0000000..bbb36ab --- /dev/null +++ b/roles/iommu/handlers/main.yml @@ -0,0 +1,20 @@ +--- +- name: Regenerate initramfs (RedHat) + listen: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + dracut -v -f /boot/initramfs-$(uname -r).img $(uname -r) + become: true + changed_when: true + when: ansible_facts.os_family == 'RedHat' + +- name: Regenerate initramfs (Debian) + listen: Regenerate initramfs + ansible.builtin.shell: |- + #!/bin/bash + set -eux + update-initramfs -u -k $(uname -r) + become: true + changed_when: true + when: ansible_facts.os_family == 'Debian' diff --git a/roles/iommu/tasks/main.yml b/roles/iommu/tasks/main.yml index 1129a27..cb69eb5 100644 --- a/roles/iommu/tasks/main.yml +++ b/roles/iommu/tasks/main.yml @@ -1,14 +1,55 @@ --- +- name: Template dracut config for vfio + ansible.builtin.blockinfile: + path: /etc/dracut.conf.d/gpu-vfio.conf + block: | + add_drivers+="vfio vfio_iommu_type1 vfio_pci vfio_virqfd" + owner: root + group: root + mode: "0660" + create: true + become: true + when: + - iommu_vfio_pci_ids is defined + - ansible_facts.os_family == 'Debian' + notify: + - Regenerate initramfs + - reboot + +- name: Add vfio to modules-load.d + ansible.builtin.blockinfile: + path: /etc/modules-load.d/vfio.conf + block: | + vfio + vfio_iommu_type1 + vfio_pci + vfio_virqfd + owner: root + group: root + mode: "0664" + create: true + become: true + when: iommu_vfio_pci_ids is defined + notify: reboot + - name: Add iommu to kernel command line (Intel) ansible.builtin.include_role: name: stackhpc.linux.grubcmdline vars: - kernel_cmdline: # noqa: var-naming[no-role-prefix] - - intel_iommu=on + kernel_cmdline: "{{ ['intel_iommu=on'] }}" # noqa: var-naming[no-role-prefix] kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] - ^intel_iommu= when: ansible_facts.processor | select('search','Intel') +- name: Add vfio pci ids to kernel command line + ansible.builtin.include_role: + name: stackhpc.linux.grubcmdline + vars: + kernel_cmdline: "{{ ['vfio-pci.ids=' + iommu_vfio_pci_ids] }}" # noqa: var-naming[no-role-prefix] + kernel_cmdline_remove: # noqa: var-naming[no-role-prefix] + - ^vfio-pci\.ids= + when: iommu_vfio_pci_ids is defined + - name: Set iommu=pt ansible.builtin.include_role: name: stackhpc.linux.grubcmdline