From ea1736c16c2c95e4a291f8bdb0f9a297c6b04361 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 8 Oct 2025 09:30:14 +0100 Subject: [PATCH 01/14] auto set GresTypes --- README.md | 2 +- defaults/main.yml | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f421e5e7..30677909 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ unique set of homogenous nodes: - `conf`: A string with the [resource specification](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1) but requiring the format `::`, e.g. `gpu:A100:2`. Note the `type` is an arbitrary string. - `file`: Omit if `gres_autodetect` is set. A string with the [File](https://slurm.schedmd.com/gres.conf.html#OPT_File) (path to device(s)) for this resource, e.g. `/dev/nvidia[0-1]` for the above example. - Note [GresTypes](https://slurm.schedmd.com/slurm.conf.html#OPT_GresTypes) must be set in `openhpc_config` if this is used. + Note [GresTypes](https://slurm.schedmd.com/slurm.conf.html#OPT_GresTypes) is automatically set from `gres` entries. * `features`: Optional. List of [Features](https://slurm.schedmd.com/slurm.conf.html#OPT_Features) strings. * `node_params`: Optional. Mapping of additional parameters and values for [node configuration](https://slurm.schedmd.com/slurm.conf.html#lbAE). diff --git a/defaults/main.yml b/defaults/main.yml index 29e33adf..6a6225ad 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -40,6 +40,7 @@ openhpc_default_config: PropagateResourceLimitsExcept: MEMLOCK Epilog: /etc/slurm/slurm.epilog.clean ReturnToService: 2 + GresTypes: "{{ ohpc_nodegroup_gres_types if ohpc_nodegroup_gres_types != '' else 'omit' }}" openhpc_cgroup_default_config: ConstrainCores: "yes" ConstrainDevices: "yes" @@ -48,6 +49,15 @@ openhpc_cgroup_default_config: openhpc_config: {} openhpc_cgroup_config: {} +ohpc_nodegroup_gres_types: >- + {{ + openhpc_nodegroups | + community.general.json_query('[].gres[].conf') | + map('split', ':') | + map('first') | + unique | + join(',') + }} openhpc_gres_template: gres.conf.j2 openhpc_cgroup_template: cgroup.conf.j2 From a7e8a66d61605ab9261b990bdae0366add1c74ff Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 8 Oct 2025 14:43:53 +0100 Subject: [PATCH 02/14] auto gres - v1 --- defaults/main.yml | 5 +++ library/gpu_info.py | 88 +++++++++++++++++++++++++++++++++++++++++ tasks/runtime.yml | 10 +++++ templates/gres.conf.j2 | 10 ++--- templates/slurm.conf.j2 | 6 ++- 5 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 library/gpu_info.py diff --git a/defaults/main.yml b/defaults/main.yml index 6a6225ad..cbf42853 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -12,6 +12,7 @@ openhpc_packages: openhpc_resume_timeout: 300 openhpc_retry_delay: 10 openhpc_job_maxtime: '60-0' # quote this to avoid ansible converting some formats to seconds, which is interpreted as minutes by Slurm +openhpc_gres_autodetect: 'off' openhpc_default_config: # This only defines values which are not Slurm defaults SlurmctldHost: "{{ openhpc_slurm_control_host }}{% if openhpc_slurm_control_host_address is defined %}({{ openhpc_slurm_control_host_address }}){% endif %}" @@ -50,6 +51,9 @@ openhpc_cgroup_default_config: openhpc_config: {} openhpc_cgroup_config: {} ohpc_nodegroup_gres_types: >- + {% if openhpc_gres_autodetect == 'nvml' %} + gpu + {% else %} {{ openhpc_nodegroups | community.general.json_query('[].gres[].conf') | @@ -58,6 +62,7 @@ ohpc_nodegroup_gres_types: >- unique | join(',') }} + {% endif %} openhpc_gres_template: gres.conf.j2 openhpc_cgroup_template: cgroup.conf.j2 diff --git a/library/gpu_info.py b/library/gpu_info.py new file mode 100644 index 00000000..e04b42cd --- /dev/null +++ b/library/gpu_info.py @@ -0,0 +1,88 @@ +#!/usr/bin/python + +# Copyright: (c) 2025, StackHPC +# Apache 2 License + +from ansible.module_utils.basic import AnsibleModule + +ANSIBLE_METADATA = { + "metadata_version": "0.1", + "status": ["preview"], + "supported_by": "community", +} + +DOCUMENTATION = """ +--- +module: sacct_cluster +short_description: Manages clusters in the accounting database +version_added: "2.9" +description: + - "Adds/removes a cluster from the accounting database" +options: + name: + description: + - Name of the cluster + required: true + type: str + state: + description: + - If C(present), cluster will be added if it does't already exist + - If C(absent), cluster will be removed if it exists + type: str + required: true + choices: [ absent, present] + +requirements: + - "python >= 3.6" +author: + - Will Szumski, StackHPC +""" + +EXAMPLES = """ +""" + +import collections + +def run_module(): + module_args = dict({}) + + module = AnsibleModule(argument_spec=module_args, supports_check_mode=True) + + try: + rc ,stdout, stderr = module.run_command("nvidia-smi --query-gpu=name --format=noheader", check_rc=False, handle_exceptions=False) + except FileNotFoundError: # nvidia-smi not installed + rc = None + + # nvidia-smi return codes: https://docs.nvidia.com/deploy/nvidia-smi/index.html + gpus = {} + result = {'changed': False, 'gpus': gpus, 'gres':''} + if rc == 0: + # stdout line e.g. 'NVIDIA H200' for each GPU + lines = [line for line in stdout.splitlines() if line != ''] # defensive: currently no blank lines + models = [line.split()[1] for line in lines] + gpus.update(collections.Counter(models)) + elif rc == 9: + # nvidia-smi installed but driver not running + pass + elif rc == None: + # nvidia-smi not installed + pass + else: + result.update({'stdout': stdout, 'rc': rc, 'stderr':stderr}) + module.fail_json(**result) + + if len(gpus) > 0: + gres_parts = [] + for model, count in gpus.items(): + gres_parts.append(f"gpu:{model}:{count}") + result.update({'gres': ','.join(gres_parts)}) + + module.exit_json(**result) + + +def main(): + run_module() + + +if __name__ == "__main__": + main() diff --git a/tasks/runtime.yml b/tasks/runtime.yml index c2e30d45..4af98d02 100644 --- a/tasks/runtime.yml +++ b/tasks/runtime.yml @@ -63,6 +63,16 @@ notify: Restart slurmdbd service when: openhpc_enable.database | default(false) | bool +- name: Query GPU info + gpu_info: + register: _gpu_info + when: openhpc_enable.batch | default(false) + +- name: Set fact for node GPU GRES + set_fact: + ohpc_node_gpu_gres: "{{ _gpu_info.gres }}" + when: openhpc_enable.batch | default(false) + - name: Template slurm.conf template: src: "{{ openhpc_slurm_conf_template }}" diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index 0ca7f260..f2ebbf7d 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -1,16 +1,16 @@ -AutoDetect=off +AutoDetect={{ openhpc_gres_autodetect }} {% for nodegroup in openhpc_nodegroups %} {% set gres_list = nodegroup.gres | default([]) %} -{% set gres_autodetect = nodegroup.gres_autodetect | default('off') %} +{% set nodegroup_gres_autodetect = nodegroup.gres_autodetect | default('off') %} {% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} {% set inventory_group_hosts = groups.get(inventory_group_name, []) %} {% set hostlist_string = inventory_group_hosts | hostlist_expression | join(',') %} -{% if gres_autodetect != 'off' %} -NodeName={{ hostlist_string }} AutoDetect={{ gres_autodetect }} +{% if nodegroup_gres_autodetect != 'off' %} +NodeName={{ hostlist_string }} AutoDetect={{ nodegroup_gres_autodetect }} {% else %} {% for gres in gres_list %} {% set gres_name, gres_type, _ = gres.conf.split(':') %} -NodeName={{ hostlist_string }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is set to off. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }} +NodeName={{ hostlist_string }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }} {% endfor %}{# gres #} {% endif %}{# autodetect #} {% endfor %}{# nodegroup #} diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index 725cad7f..58a9cf0a 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -38,7 +38,11 @@ NodeName={{ hostlists | join(',') }} {{ '' -}} CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} {{ '' -}} ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }} {{ '' -}} {{ nodegroup.node_params | default({}) | dict2parameters }} {{ '' -}} - {% if 'gres' in nodegroup %}Gres={{ ','.join(nodegroup.gres | map(attribute='conf')) }}{% endif %} + {% if 'gres' in nodegroup -%} + Gres={{ ','.join(nodegroup.gres | map(attribute='conf')) -}} + {% elif openhpc_gres_autodetect == 'nvml' and first_host_hv['ohpc_node_gpu_gres'] != '' -%} + Gres={{ first_host_hv['ohpc_node_gpu_gres'] -}} + {% endif %} {% endif %}{# 1 or more hosts in inventory #} NodeSet=nodegroup_{{ nodegroup.name }} Feature=nodegroup_{{ nodegroup.name }} From ccaf02aaa1f3a9e34d5b8d544583237dd965261a Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 8 Oct 2025 14:52:49 +0100 Subject: [PATCH 03/14] auto gres v2 --- templates/gres.conf.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index f2ebbf7d..db870320 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -10,7 +10,7 @@ NodeName={{ hostlist_string }} AutoDetect={{ nodegroup_gres_autodetect }} {% else %} {% for gres in gres_list %} {% set gres_name, gres_type, _ = gres.conf.split(':') %} -NodeName={{ hostlist_string }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }} +NodeName={{ hostlist_string }} Name={{ gres_name }} Type={{ gres_type }}{% if openhpc_gres_autodetect == nodegroup_gres_autodetect == 'off' %} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }}{% endif %} {% endfor %}{# gres #} {% endif %}{# autodetect #} {% endfor %}{# nodegroup #} From ccd170d1a108d43ad1493b035aa633e51d963aeb Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 8 Oct 2025 15:10:00 +0100 Subject: [PATCH 04/14] auto gres v3 --- templates/gres.conf.j2 | 3 +-- templates/slurm.conf.j2 | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index db870320..bd95b038 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -1,6 +1,5 @@ AutoDetect={{ openhpc_gres_autodetect }} {% for nodegroup in openhpc_nodegroups %} -{% set gres_list = nodegroup.gres | default([]) %} {% set nodegroup_gres_autodetect = nodegroup.gres_autodetect | default('off') %} {% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} {% set inventory_group_hosts = groups.get(inventory_group_name, []) %} @@ -8,7 +7,7 @@ AutoDetect={{ openhpc_gres_autodetect }} {% if nodegroup_gres_autodetect != 'off' %} NodeName={{ hostlist_string }} AutoDetect={{ nodegroup_gres_autodetect }} {% else %} -{% for gres in gres_list %} +{% for gres in nodegroup.gres | default([]) %} {% set gres_name, gres_type, _ = gres.conf.split(':') %} NodeName={{ hostlist_string }} Name={{ gres_name }} Type={{ gres_type }}{% if openhpc_gres_autodetect == nodegroup_gres_autodetect == 'off' %} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }}{% endif %} {% endfor %}{# gres #} diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index 58a9cf0a..ab5f3448 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -40,7 +40,7 @@ NodeName={{ hostlists | join(',') }} {{ '' -}} {{ nodegroup.node_params | default({}) | dict2parameters }} {{ '' -}} {% if 'gres' in nodegroup -%} Gres={{ ','.join(nodegroup.gres | map(attribute='conf')) -}} - {% elif openhpc_gres_autodetect == 'nvml' and first_host_hv['ohpc_node_gpu_gres'] != '' -%} + {% elif nodegroup.gres_autodetect | default(openhpc_gres_autodetect) == 'nvml' and first_host_hv['ohpc_node_gpu_gres'] != '' -%} Gres={{ first_host_hv['ohpc_node_gpu_gres'] -}} {% endif %} From 56317bbf91c66ef8565933d0af06c467dd27d77e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 8 Oct 2025 15:42:32 +0100 Subject: [PATCH 05/14] auto gres v4 --- templates/gres.conf.j2 | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index bd95b038..708b1508 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -4,12 +4,8 @@ AutoDetect={{ openhpc_gres_autodetect }} {% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} {% set inventory_group_hosts = groups.get(inventory_group_name, []) %} {% set hostlist_string = inventory_group_hosts | hostlist_expression | join(',') %} -{% if nodegroup_gres_autodetect != 'off' %} -NodeName={{ hostlist_string }} AutoDetect={{ nodegroup_gres_autodetect }} -{% else %} -{% for gres in nodegroup.gres | default([]) %} -{% set gres_name, gres_type, _ = gres.conf.split(':') %} -NodeName={{ hostlist_string }} Name={{ gres_name }} Type={{ gres_type }}{% if openhpc_gres_autodetect == nodegroup_gres_autodetect == 'off' %} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }}{% endif %} -{% endfor %}{# gres #} -{% endif %}{# autodetect #} +{% for gres in nodegroup.gres | default([]) %} +{% set gres_name, gres_type, _ = gres.conf.split(':') %} +NodeName={{ hostlist_string }} AutoDetect={{ nodegroup_gres_autodetect }} Name={{ gres_name }} Type={{ gres_type }}{% if openhpc_gres_autodetect == nodegroup_gres_autodetect == 'off' %} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }}{% endif %} +{% endfor %}{# gres #} {% endfor %}{# nodegroup #} From 6ebea62bbf00c811e9a97214dc24e64d5a1c91d8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 8 Oct 2025 17:55:01 +0100 Subject: [PATCH 06/14] auto gres v5 - proper top-level/overrride --- defaults/main.yml | 27 +++++++------ files/nodegroup.schema | 86 ------------------------------------------ tasks/validate.yml | 34 +++++++++++++---- templates/gres.conf.j2 | 3 +- 4 files changed, 41 insertions(+), 109 deletions(-) delete mode 100644 files/nodegroup.schema diff --git a/defaults/main.yml b/defaults/main.yml index cbf42853..e12ad7fa 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -41,7 +41,7 @@ openhpc_default_config: PropagateResourceLimitsExcept: MEMLOCK Epilog: /etc/slurm/slurm.epilog.clean ReturnToService: 2 - GresTypes: "{{ ohpc_nodegroup_gres_types if ohpc_nodegroup_gres_types != '' else 'omit' }}" + GresTypes: "{{ ohpc_gres_types | flatten | sort | unique | join(',') if ohpc_gres_types else 'omit' }}" openhpc_cgroup_default_config: ConstrainCores: "yes" ConstrainDevices: "yes" @@ -50,19 +50,18 @@ openhpc_cgroup_default_config: openhpc_config: {} openhpc_cgroup_config: {} -ohpc_nodegroup_gres_types: >- - {% if openhpc_gres_autodetect == 'nvml' %} - gpu - {% else %} - {{ - openhpc_nodegroups | - community.general.json_query('[].gres[].conf') | - map('split', ':') | - map('first') | - unique | - join(',') - }} - {% endif %} +ohpc_gres_types: + # toplevel nvml autodetect: + - "{{ ['gpu'] if openhpc_gres_autodetect == 'nvml' else [] }}" + # nodegroup nvml autodetect: + - "{{ ['gpu'] if openhpc_nodegroups | map(attribute='gres_autodetect', default='') | unique | select('eq', 'nvml') else [] }}" + # nodegroup specific gres conf: + - "{{ + openhpc_nodegroups | + community.general.json_query('[].gres[].conf') | + map('regex_search', '^(\\w+)') + }}" + openhpc_gres_template: gres.conf.j2 openhpc_cgroup_template: cgroup.conf.j2 diff --git a/files/nodegroup.schema b/files/nodegroup.schema deleted file mode 100644 index 814c60ad..00000000 --- a/files/nodegroup.schema +++ /dev/null @@ -1,86 +0,0 @@ -{ "$schema": "https://json-schema.org/draft/2020-12/schema", - "type": "object", - "definitions": { - "gres": { - "type": "array", - "items": { - "type": "object", - "properties": { - "conf": { - "type": "string", - "minLength": 1 - }, - "file": { - "type": "string", - "minLength": 1 - } - }, - "required": [ - "conf" - ] - } - } - }, - "properties": { - "name": { - "type": "string", - "minLength": 1 - }, - "ram_mb": { - "type": "number", - }, - "ram_multiplier": { - "type": "number", - }, - "features": { - "type": "array", - "items": { - "type": "string" - } - }, - "node_params": { - "type": "object", - }, - "gres_autodetect": { - "type": "string", - "minLength": 1 - }, - "gres": { - "$ref": "#/definitions/gres" - } - }, - "required": [ - "name" - ], - "if": { - "properties": { - "gres_autodetect": { - "const": "off" - } - } - }, - "then": { - "properties": { - "gres": { - "items": { - "required": [ - "file" - ] - } - } - } - }, - "else": { - "properties": { - "gres": { - "items": { - "not": { - "required": [ - "file" - ] - } - } - } - } - } -} diff --git a/tasks/validate.yml b/tasks/validate.yml index 55356ce2..dea23254 100644 --- a/tasks/validate.yml +++ b/tasks/validate.yml @@ -21,15 +21,35 @@ delegate_to: localhost run_once: true -- name: Validate openhpc_nodegroups - ansible.utils.validate: - criteria: "{{ lookup('file', 'nodegroup.schema') }}" - engine: 'ansible.utils.jsonschema' - data: "{{ item }}" - vars: - ansible_jsonschema_draft: '2020-12' + +# - name: Validate openhpc_nodegroups contain name +# ansible.builtin.assert: +# that: +# - "'name' in nodegroup +# - openhpc_cluster_name ~ '_' ~ nodegroup.name in group_names +# fail_msg: "nodegroup does not specify a name or openhpc_cluster_name ~ '_' ~ nodegroup.name not found in inventory" + +# loop: "{{ openhpc_nodegroups }}" + +- name: debug openhpc_nodegroups + ansible.builtin.debug: + msg: "{{ (item.gres | select('contains', 'file')) }} == {{ (item.gres | length) }}" + loop: "{{ openhpc_nodegroups }}" + when: + - item.gres_autodetect | default(openhpc_gres_autodetect) == 'off' + - "'gres' in item" delegate_to: localhost + run_once: true + +- name: Validate openhpc_nodegroups + ansible.builtin.assert: + that: "(item.gres | select('contains', 'file') | length) == (item.gres | length)" + fail_msg: "GRES configuration(s) in openhpc_nodegroup '{{ item.name }}' do not include 'file' but GRES autodetection is not enabled" loop: "{{ openhpc_nodegroups }}" + when: + - item.gres_autodetect | default(openhpc_gres_autodetect) == 'off' + - "'gres' in item" + delegate_to: localhost run_once: true - name: Fail if partition configuration is outdated diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index 708b1508..602a6ef6 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -1,11 +1,10 @@ AutoDetect={{ openhpc_gres_autodetect }} {% for nodegroup in openhpc_nodegroups %} -{% set nodegroup_gres_autodetect = nodegroup.gres_autodetect | default('off') %} {% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} {% set inventory_group_hosts = groups.get(inventory_group_name, []) %} {% set hostlist_string = inventory_group_hosts | hostlist_expression | join(',') %} {% for gres in nodegroup.gres | default([]) %} {% set gres_name, gres_type, _ = gres.conf.split(':') %} -NodeName={{ hostlist_string }} AutoDetect={{ nodegroup_gres_autodetect }} Name={{ gres_name }} Type={{ gres_type }}{% if openhpc_gres_autodetect == nodegroup_gres_autodetect == 'off' %} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is not specified. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }}{% endif %} +NodeName={{ hostlist_string }}{% if 'gres_autodetect' in nodegroup %} AutoDetect={{ nodegroup.gres_autodetect }}{% endif %} Name={{ gres_name }} Type={{ gres_type }}{% if 'file' in gres %} File={{ gres.file }}{% endif %} {% endfor %}{# gres #} {% endfor %}{# nodegroup #} From 1d63453600867d6ff657c596725698a61e634ace Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 11:56:31 +0100 Subject: [PATCH 07/14] v5 - fix grestypes when none --- defaults/main.yml | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index e12ad7fa..cb74c8bc 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -41,7 +41,7 @@ openhpc_default_config: PropagateResourceLimitsExcept: MEMLOCK Epilog: /etc/slurm/slurm.epilog.clean ReturnToService: 2 - GresTypes: "{{ ohpc_gres_types | flatten | sort | unique | join(',') if ohpc_gres_types else 'omit' }}" + GresTypes: "{{ ohpc_gres_types if ohpc_gres_types != '' else 'omit' }}" openhpc_cgroup_default_config: ConstrainCores: "yes" ConstrainDevices: "yes" @@ -50,18 +50,16 @@ openhpc_cgroup_default_config: openhpc_config: {} openhpc_cgroup_config: {} -ohpc_gres_types: - # toplevel nvml autodetect: - - "{{ ['gpu'] if openhpc_gres_autodetect == 'nvml' else [] }}" - # nodegroup nvml autodetect: - - "{{ ['gpu'] if openhpc_nodegroups | map(attribute='gres_autodetect', default='') | unique | select('eq', 'nvml') else [] }}" - # nodegroup specific gres conf: - - "{{ - openhpc_nodegroups | +ohpc_gres_types: >- + {{ + ( + ['gpu'] if openhpc_gres_autodetect == 'nvml' else [] + + ['gpu'] if openhpc_nodegroups | map(attribute='gres_autodetect', default='') | unique | select('eq', 'nvml') else [] + + openhpc_nodegroups | community.general.json_query('[].gres[].conf') | map('regex_search', '^(\\w+)') - }}" - + ) | flatten | reject('eq', '') | sort | unique | join(',') + }} openhpc_gres_template: gres.conf.j2 openhpc_cgroup_template: cgroup.conf.j2 From 6ca46ddbaee582f6894777a799d6951a0fef0e6c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 12:01:06 +0100 Subject: [PATCH 08/14] fixup validation --- tasks/validate.yml | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/tasks/validate.yml b/tasks/validate.yml index dea23254..3c2c424c 100644 --- a/tasks/validate.yml +++ b/tasks/validate.yml @@ -9,6 +9,16 @@ delegate_to: localhost run_once: true +- name: Validate nodegroups define names + # Needed for the next check.. + # NB: Don't validate names against inventory groups as those are allowed to be missing + ansible.builtin.assert: + that: "'name' in item" + fail_msg: "A mapping in openhpc_nodegroups is missing 'name'" + loop: "{{ openhpc_nodegroups }}" + delegate_to: localhost + run_once: true + - name: Check no host appears in more than one nodegroup assert: that: "{{ _openhpc_check_hosts.values() | select('greaterthan', 1) | length == 0 }}" @@ -21,30 +31,10 @@ delegate_to: localhost run_once: true - -# - name: Validate openhpc_nodegroups contain name -# ansible.builtin.assert: -# that: -# - "'name' in nodegroup -# - openhpc_cluster_name ~ '_' ~ nodegroup.name in group_names -# fail_msg: "nodegroup does not specify a name or openhpc_cluster_name ~ '_' ~ nodegroup.name not found in inventory" - -# loop: "{{ openhpc_nodegroups }}" - -- name: debug openhpc_nodegroups - ansible.builtin.debug: - msg: "{{ (item.gres | select('contains', 'file')) }} == {{ (item.gres | length) }}" - loop: "{{ openhpc_nodegroups }}" - when: - - item.gres_autodetect | default(openhpc_gres_autodetect) == 'off' - - "'gres' in item" - delegate_to: localhost - run_once: true - -- name: Validate openhpc_nodegroups +- name: Validate GRES definitions ansible.builtin.assert: that: "(item.gres | select('contains', 'file') | length) == (item.gres | length)" - fail_msg: "GRES configuration(s) in openhpc_nodegroup '{{ item.name }}' do not include 'file' but GRES autodetection is not enabled" + fail_msg: "GRES configuration(s) in openhpc_nodegroups '{{ item.name }}' do not include 'file' but GRES autodetection is not enabled" loop: "{{ openhpc_nodegroups }}" when: - item.gres_autodetect | default(openhpc_gres_autodetect) == 'off' From 19bf65600581e1a6124bb9774e2d985fd30094bf Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 14:34:20 +0100 Subject: [PATCH 09/14] update README --- README.md | 137 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 70 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 30677909..48157fe6 100644 --- a/README.md +++ b/README.md @@ -68,12 +68,20 @@ unique set of homogenous nodes: `free --mebi` total * `openhpc_ram_multiplier`. * `ram_multiplier`: Optional. An override for the top-level definition `openhpc_ram_multiplier`. Has no effect if `ram_mb` is set. - * `gres_autodetect`: Optional. The [auto detection mechanism](https://slurm.schedmd.com/gres.conf.html#OPT_AutoDetect) to use for the generic resources. Note: you must still define the `gres` dictionary (see below) but you only need the define the `conf` key. See [GRES autodetection](#gres-autodetection) section below. - * `gres`: Optional. List of dicts defining [generic resources](https://slurm.schedmd.com/gres.html). Each dict should define: - - `conf`: A string with the [resource specification](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1) but requiring the format `::`, e.g. `gpu:A100:2`. Note the `type` is an arbitrary string. - - `file`: Omit if `gres_autodetect` is set. A string with the [File](https://slurm.schedmd.com/gres.conf.html#OPT_File) (path to device(s)) for this resource, e.g. `/dev/nvidia[0-1]` for the above example. - - Note [GresTypes](https://slurm.schedmd.com/slurm.conf.html#OPT_GresTypes) is automatically set from `gres` entries. + * `gres_autodetect`: Optional. The [hardware autodetection mechanism](https://slurm.schedmd.com/gres.conf.html#OPT_AutoDetect) + to use for [generic resources](https://slurm.schedmd.com/gres.html). + **NB:** A value of `'off'` (the default) must be quoted to avoid yaml + conversion to `false`. + * `gres`: Optional. List of dicts defining [generic resources](https://slurm.schedmd.com/gres.html). + Not required if using `nvml` GRES autodetection. Keys/values in dicts are: + - `conf`: A string defining the [resource specification](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1) + in the format `::`, e.g. `gpu:A100:2`. + - `file`: A string defining device path(s) as per [File](https://slurm.schedmd.com/gres.conf.html#OPT_File), + e.g. `/dev/nvidia[0-1]`. Not required if using any GRES autodetection. + + Note [GresTypes](https://slurm.schedmd.com/slurm.conf.html#OPT_GresTypes) is + automatically set from the defined GRES or GRES autodetection. See [GRES Configuration](#gres-configuration) + for more discussion. * `features`: Optional. List of [Features](https://slurm.schedmd.com/slurm.conf.html#OPT_Features) strings. * `node_params`: Optional. Mapping of additional parameters and values for [node configuration](https://slurm.schedmd.com/slurm.conf.html#lbAE). @@ -106,6 +114,10 @@ partition. Each partition mapping may contain: If this variable is not set one partition per nodegroup is created, with default partition configuration for each. +`openhpc_gres_autodetect`: Optional. A global default for `openhpc_nodegroups.gres_autodetect` +defined above. **NB:** A value of `'off'` (the default) must be quoted to avoid yaml + conversion to `false`. + `openhpc_job_maxtime`: Maximum job time limit, default `'60-0'` (60 days), see [slurm.conf:MaxTime](https://slurm.schedmd.com/slurm.conf.html#OPT_MaxTime). **NB:** This should be quoted to avoid Ansible conversions. @@ -278,7 +290,7 @@ cluster-control This example shows how partitions can span multiple types of compute node. -This example inventory describes three types of compute node (login and +Assume an inventory containing two types of compute node (login and control nodes are omitted for brevity): ```ini @@ -293,17 +305,12 @@ cluster-general-1 # large memory nodes cluster-largemem-0 cluster-largemem-1 - -[hpc_gpu] -# GPU nodes -cluster-a100-0 -cluster-a100-1 ... ``` -Firstly the `openhpc_nodegroups` is set to capture these inventory groups and -apply any node-level parameters - in this case the `largemem` nodes have -2x cores reserved for some reason, and GRES is configured for the GPU nodes: +Firstly `openhpc_nodegroups` maps to these inventory groups and applys any +node-level parameters - in this case the `largemem` nodes have 2x cores +reserved for some reason: ```yaml openhpc_cluster_name: hpc @@ -312,104 +319,100 @@ openhpc_nodegroups: - name: large node_params: CoreSpecCount: 2 - - name: gpu - gres: - - conf: gpu:A100:2 - file: /dev/nvidia[0-1] ``` -or if using the NVML gres_autodection mechamism (NOTE: this requires recompilation of the slurm binaries to link against the [NVIDIA Management libray](#gres-autodetection)): -```yaml -openhpc_cluster_name: hpc -openhpc_nodegroups: - - name: general - - name: large - node_params: - CoreSpecCount: 2 - - name: gpu - gres_autodetect: nvml - gres: - - conf: gpu:A100:2 -``` -Now two partitions can be configured - a default one with a short timelimit and -no large memory nodes for testing jobs, and another with all hardware and longer -job runtime for "production" jobs: +Now two partitions can be configured using `openhpc_partitions`: A default +partition for testing jobs with a short timelimit and no large memory nodes, +and another partition with all hardware and longer job runtime for "production" +jobs: ```yaml openhpc_partitions: - name: test nodegroups: - general - - gpu maxtime: '1:0:0' # 1 hour default: 'YES' - name: general nodegroups: - general - large - - gpu maxtime: '2-0' # 2 days default: 'NO' ``` Users will select the partition using `--partition` argument and request nodes -with appropriate memory or GPUs using the `--mem` and `--gres` or `--gpus*` -options for `sbatch` or `srun`. +with appropriate memory using the `--mem` option for `sbatch` or `srun`. -Finally here some additional configuration must be provided for GRES: -```yaml -openhpc_config: - GresTypes: - -gpu -``` +## GRES Configuration -## GRES autodetection +### Autodetection -Some autodetection mechanisms require recompilation of the slurm packages to -link against external libraries. Examples are shown in the sections below. +Some autodetection mechanisms require recompilation of Slurm packages to link +against external libraries. Examples are shown in the sections below. -### Recompiling slurm binaries against the [NVIDIA Management libray](https://developer.nvidia.com/management-library-nvml) +#### Recompiling Slurm binaries against the [NVIDIA Management libray](https://developer.nvidia.com/management-library-nvml) -This will allow you to use `gres_autodetect: nvml` in your `nodegroup` -definitions. +This allows using `openhpc_gres_autodetect: nvml` or `openhpc_nodegroup.gres_autodetect: nvml`. First, [install the complete cuda toolkit from NVIDIA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/). You can then recompile the slurm packages from the source RPMS as follows: ```sh dnf download --source slurm-slurmd-ohpc - rpm -i slurm-ohpc-*.src.rpm - cd /root/rpmbuild/SPECS - dnf builddep slurm.spec - rpmbuild -bb -D "_with_nvml --with-nvml=/usr/local/cuda-12.8/targets/x86_64-linux/" slurm.spec | tee /tmp/build.txt ``` NOTE: This will need to be adapted for the version of CUDA installed (12.8 is used in the example). -The RPMs will be created in ` /root/rpmbuild/RPMS/x86_64/`. The method to distribute these RPMs to -each compute node is out of scope of this document. You can either use a custom package repository -or simply install them manually on each node with Ansible. +The RPMs will be created in `/root/rpmbuild/RPMS/x86_64/`. The method to distribute these RPMs to +each compute node is out of scope of this document. -#### Configuration example +## GRES configuration examples -A configuration snippet is shown below: +For NVIDIA GPUs, `nvml` GRES autodetection can be used. This requires: +- The relevant GPU nodes to have the `nvidia-smi` binary installed +- Slurm to be compiled against the NVIDIA management library as above + +Autodetection can then be enabled using either for all nodegroups: ```yaml -openhpc_cluster_name: hpc +openhpc_gres_autodetection: nvml +``` + +or for individual nodegroups e.g: +```yaml +openhpc_nodegroups: + - name: example + gres_autodetection: nvml + ... +``` + +In either case no additional configuration of GRES is required. Any nodegroups +with NVIDIA GPUs will automatically get `gpu` GRES defined for all GPUs found. +GPUs within a node do not need to be the same model but nodes in a nodegroup +must be homogenous. GRES types are set to the autodetected model names e.g. `H100`. + +For `nvml` GRES autodetection per-nodegroup `gres_autodetection` and/or `gres` keys +can be still be provided. These can be used to disable/override the default +autodetection method, or to allow checking autodetected resources against +expectations as described by [gres.conf documentation](https://slurm.schedmd.com/gres.conf.html). + +Without any autodetection, a GRES configuration for NVIDIA GPUs might be: + +``` openhpc_nodegroups: - name: general - - name: large - node_params: - CoreSpecCount: 2 - name: gpu - gres_autodetect: nvml gres: - - conf: gpu:A100:2 + conf: gpu:H200:2 + file: /dev/nvidia[0-1] ``` -for additional context refer to the GPU example in: [Multiple Nodegroups](#multiple-nodegroups). +Note that the `nvml` autodetection is special in this role. Other autodetection +mechanisms, e.g. `nvidia` or `rsmi` allow the `gres.file:` specification to be +omitted but still require `gres.conf:` to be defined. 1 Slurm 20.11 removed `accounting_storage/filetxt` as an option. This version of Slurm was introduced in OpenHPC v2.1 but the OpenHPC repos are common to all OpenHPC v2.x releases. [↩](#accounting_storage) From 24840330d50422fbc9a631a8f7081a15ad9ea9f5 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 15:00:30 +0100 Subject: [PATCH 10/14] disable waffly AI PR summary --- .gemini/config.yaml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gemini/config.yaml diff --git a/.gemini/config.yaml b/.gemini/config.yaml new file mode 100644 index 00000000..7cd1337d --- /dev/null +++ b/.gemini/config.yaml @@ -0,0 +1,3 @@ +code_review: + pull_request_opened: + summary: false From d7d1aa934233f40e4e54c0bc5f76fcc847a487b8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 15:03:01 +0100 Subject: [PATCH 11/14] fixup README example --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 48157fe6..1851e5cf 100644 --- a/README.md +++ b/README.md @@ -407,8 +407,8 @@ openhpc_nodegroups: - name: general - name: gpu gres: - conf: gpu:H200:2 - file: /dev/nvidia[0-1] + - conf: gpu:H200:2 + file: /dev/nvidia[0-1] ``` Note that the `nvml` autodetection is special in this role. Other autodetection From c2c72ecff97c27f8deef75a12a92ca9c99cebf62 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 15:04:17 +0100 Subject: [PATCH 12/14] fixup library boilerplate --- library/gpu_info.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/library/gpu_info.py b/library/gpu_info.py index e04b42cd..67f762cd 100644 --- a/library/gpu_info.py +++ b/library/gpu_info.py @@ -13,29 +13,15 @@ DOCUMENTATION = """ --- -module: sacct_cluster -short_description: Manages clusters in the accounting database -version_added: "2.9" +module: gpu_info +short_description: Gathers information about NVIDIA GPUs on a node description: - - "Adds/removes a cluster from the accounting database" -options: - name: - description: - - Name of the cluster - required: true - type: str - state: - description: - - If C(present), cluster will be added if it does't already exist - - If C(absent), cluster will be removed if it exists - type: str - required: true - choices: [ absent, present] - + - "This module queries for NVIDIA GPUs using `nvidia-smi` and returns information about them. It is designed to fail gracefully if `nvidia-smi` is not present or if the NVIDIA driver is not running." +options: {} requirements: - "python >= 3.6" author: - - Will Szumski, StackHPC + - Steve Brasier, StackHPC """ EXAMPLES = """ From 965053ce2439cd48b6d1b3264ccaa49cf55ac108 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 15:05:03 +0100 Subject: [PATCH 13/14] fix README typos --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1851e5cf..fcd9ae28 100644 --- a/README.md +++ b/README.md @@ -115,8 +115,8 @@ If this variable is not set one partition per nodegroup is created, with default partition configuration for each. `openhpc_gres_autodetect`: Optional. A global default for `openhpc_nodegroups.gres_autodetect` -defined above. **NB:** A value of `'off'` (the default) must be quoted to avoid yaml - conversion to `false`. +defined above. **NB:** A value of `'off'` (the default) must be quoted to avoid +yaml conversion to `false`. `openhpc_job_maxtime`: Maximum job time limit, default `'60-0'` (60 days), see [slurm.conf:MaxTime](https://slurm.schedmd.com/slurm.conf.html#OPT_MaxTime). @@ -308,7 +308,7 @@ cluster-largemem-1 ... ``` -Firstly `openhpc_nodegroups` maps to these inventory groups and applys any +Firstly `openhpc_nodegroups` maps to these inventory groups and applies any node-level parameters - in this case the `largemem` nodes have 2x cores reserved for some reason: @@ -350,7 +350,7 @@ with appropriate memory using the `--mem` option for `sbatch` or `srun`. Some autodetection mechanisms require recompilation of Slurm packages to link against external libraries. Examples are shown in the sections below. -#### Recompiling Slurm binaries against the [NVIDIA Management libray](https://developer.nvidia.com/management-library-nvml) +#### Recompiling Slurm binaries against the [NVIDIA Management library](https://developer.nvidia.com/management-library-nvml) This allows using `openhpc_gres_autodetect: nvml` or `openhpc_nodegroup.gres_autodetect: nvml`. From ed7f42254183148c312fac5125f043c1ca2216a0 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 9 Oct 2025 15:55:33 +0100 Subject: [PATCH 14/14] try to avoid jmespath failures in CI --- .github/workflows/ci.yml | 3 ++- molecule/requirements.txt | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78e5f888..25444fdc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,7 +86,8 @@ jobs: - name: Install test dependencies. run: | - pip3 install -U pip ansible>=2.9.0 molecule-plugins[podman]==23.5.0 yamllint ansible-lint + pip3 install -U pip + pip install -r molecule/requirements.txt ansible-galaxy collection install containers.podman:>=1.10.1 # otherwise get https://github.com/containers/ansible-podman-collections/issues/428 - name: Display ansible version diff --git a/molecule/requirements.txt b/molecule/requirements.txt index 4cd570c0..6fd45b86 100644 --- a/molecule/requirements.txt +++ b/molecule/requirements.txt @@ -1,5 +1,8 @@ pip setuptools molecule[lint,ansible] -molecule-plugins[podman] +molecule-plugins[podman]==23.5.0 ansible>=2.9.0 +yamllint +ansible-lint +jmespath