diff --git a/.github/workflows/stackhpc.yml b/.github/workflows/stackhpc.yml index 787374e9..ea18b310 100644 --- a/.github/workflows/stackhpc.yml +++ b/.github/workflows/stackhpc.yml @@ -90,11 +90,11 @@ jobs: . environments/.stackhpc/activate ansible-playbook -vv ansible/adhoc/hpctests.yml - - name: Run EESSI tests - run: | - . venv/bin/activate - . environments/.stackhpc/activate - ansible-playbook -vv ansible/ci/check_eessi.yml + # - name: Run EESSI tests + # run: | + # . venv/bin/activate + # . environments/.stackhpc/activate + # ansible-playbook -vv ansible/ci/check_eessi.yml - name: Confirm Open Ondemand is up (via SOCKS proxy) run: | diff --git a/ansible/ci/check_eessi.yml b/ansible/ci/check_eessi.yml index 0112509a..280f8658 100644 --- a/ansible/ci/check_eessi.yml +++ b/ansible/ci/check_eessi.yml @@ -17,18 +17,34 @@ repo: "https://github.com/eessi/eessi-demo.git" dest: "{{ eessi_test_rootdir }}/eessi-demo" - - name: Run test job - ansible.builtin.shell: - cmd: | + - name: Create batch script + copy: + dest: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow/tensorflow.sh" + content: | + #!/usr/bin/env bash + #SBATCH --output=%x.out + #SBATCH --error=%x.out source /cvmfs/pilot.eessi-hpc.org/latest/init/bash srun ./run.sh + + - name: Run test job + ansible.builtin.shell: + cmd: sbatch --wait tensorflow.sh chdir: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow" - executable: /bin/bash register: job_output + - name: Retrieve job output + slurp: + src: "{{ eessi_test_rootdir }}/eessi-demo/TensorFlow/tensorflow.sh.out" + register: _tensorflow_out + no_log: true # as its base64 encoded so useless + + - name: Show job output + debug: + msg: "{{ _tensorflow_out.content | b64decode }}" + - name: Fail if job output contains error fail: # Note: Job prints live progress bar to terminal, so use regex filter to remove this from stdout msg: "Test job using EESSI modules failed. Job output was: {{ job_output.stdout | regex_replace('\b', '') }}" - when: '"Epoch 5/5" not in job_output.stdout' - \ No newline at end of file + when: '"Epoch 5/5" not in _tensorflow_out.content | b64decode' diff --git a/ansible/roles/grafana-dashboards/files/slurm-jobs.json b/ansible/roles/grafana-dashboards/files/slurm-jobs.json index 40afb37a..8cbc4438 100644 --- a/ansible/roles/grafana-dashboards/files/slurm-jobs.json +++ b/ansible/roles/grafana-dashboards/files/slurm-jobs.json @@ -46,7 +46,7 @@ "gnetId": 13535, "graphTooltip": 0, "id": null, - "iteration": 1607441312744, + "iteration": 1607441312745, "links": [], "panels": [ { @@ -150,7 +150,7 @@ } ], "query": "*", - "queryType": "randomWalk", + "queryType": "lucene", "refId": "A", "timeField": "@timestamp" } @@ -302,4 +302,4 @@ "uid": "jYPt7MTGk", "version": 2, "description": "Requires https://github.com/stackhpc/slurm-openstack-tools" -} \ No newline at end of file +} diff --git a/ansible/roles/openondemand/files/jupyter_requirements.txt b/ansible/roles/openondemand/files/jupyter_requirements.txt new file mode 100644 index 00000000..ec28503c --- /dev/null +++ b/ansible/roles/openondemand/files/jupyter_requirements.txt @@ -0,0 +1,101 @@ +# Python3.9, pip 23.2.1 +anyio==3.7.1 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +arrow==1.2.3 +asttokens==2.2.1 +async-lru==2.0.4 +attrs==23.1.0 +Babel==2.12.1 +backcall==0.2.0 +beautifulsoup4==4.12.2 +bleach==6.0.0 +certifi==2023.7.22 +cffi==1.15.1 +charset-normalizer==3.2.0 +comm==0.1.3 +debugpy==1.6.7 +decorator==5.1.1 +defusedxml==0.7.1 +exceptiongroup==1.1.2 +executing==1.2.0 +fastjsonschema==2.18.0 +fqdn==1.5.1 +idna==3.4 +importlib-metadata==6.8.0 +ipykernel==6.25.0 +ipython==8.14.0 +ipython-genutils==0.2.0 +ipywidgets==8.1.0 +isoduration==20.11.0 +jedi==0.19.0 +Jinja2==3.1.2 +json5==0.9.14 +jsonpointer==2.4 +jsonschema==4.18.4 +jsonschema-specifications==2023.7.1 +jupyter==1.0.0 +jupyter-console==6.6.3 +jupyter-events==0.7.0 +jupyter-lsp==2.2.0 +jupyter_client==8.3.0 +jupyter_core==5.3.1 +jupyter_server==2.7.0 +jupyter_server_terminals==0.4.4 +jupyterlab==4.0.3 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.8 +jupyterlab_server==2.24.0 +MarkupSafe==2.1.3 +matplotlib-inline==0.1.6 +mistune==3.0.1 +nbclient==0.8.0 +nbconvert==7.7.3 +nbformat==5.9.2 +nest-asyncio==1.5.7 +notebook==7.0.1 +notebook_shim==0.2.3 +overrides==7.3.1 +packaging==23.1 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +platformdirs==3.10.0 +prometheus-client==0.17.1 +prompt-toolkit==3.0.39 +psutil==5.9.5 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pycparser==2.21 +Pygments==2.15.1 +python-dateutil==2.8.2 +python-json-logger==2.0.7 +PyYAML==6.0.1 +pyzmq==25.1.0 +qtconsole==5.4.3 +QtPy==2.3.1 +referencing==0.30.0 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rpds-py==0.9.2 +Send2Trash==1.8.2 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.4.1 +stack-data==0.6.2 +terminado==0.17.1 +tinycss2==1.2.1 +tomli==2.0.1 +tornado==6.3.2 +traitlets==5.9.0 +typing_extensions==4.7.1 +uri-template==1.3.0 +urllib3==2.0.4 +wcwidth==0.2.6 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.6.1 +widgetsnbextension==4.0.8 +zipp==3.16.2 diff --git a/ansible/roles/openondemand/tasks/jupyter_compute.yml b/ansible/roles/openondemand/tasks/jupyter_compute.yml index 4aa7e7e9..a87d07da 100644 --- a/ansible/roles/openondemand/tasks/jupyter_compute.yml +++ b/ansible/roles/openondemand/tasks/jupyter_compute.yml @@ -2,20 +2,31 @@ # See https://osc.github.io/ood-documentation/latest/app-development/tutorials-interactive-apps/add-jupyter/software-requirements.html # - Will already have openssl and lmod +- name: Ensure python3.9 installed + dnf: + name: python39 + tags: install + - name: Install jupyter venv # Requires separate step so that the upgraded pip is used to install packages pip: name: pip state: latest - virtualenv: /opt/jupyter - virtualenv_command: python3 -m venv + virtualenv: /opt/jupyter-py39 + virtualenv_command: python3.9 -m venv + tags: install + +- name: Copy jupyter requirements file + copy: + src: jupyter_requirements.txt + dest: /opt/jupyter-py39/jupyter_requirements.txt tags: install - name: Install jupyter package in venv pip: - name: jupyter - virtualenv: /opt/jupyter - virtualenv_command: python3 -m venv + virtualenv: /opt/jupyter-py39 + virtualenv_command: python3.9 -m venv + requirements: /opt/jupyter-py39/jupyter_requirements.txt tags: install diff --git a/environments/.stackhpc/ansible.cfg b/environments/.stackhpc/ansible.cfg index d7a3783f..2a12e06b 100644 --- a/environments/.stackhpc/ansible.cfg +++ b/environments/.stackhpc/ansible.cfg @@ -11,5 +11,5 @@ roles_path = ../../ansible/roles filter_plugins = ../../ansible/filter_plugins [ssh_connection] -ssh_args = -o ControlMaster=auto -o ControlPersist=240s -o PreferredAuthentications=publickey -o UserKnownHostsFile=/dev/null +ssh_args = -o ControlMaster=auto -o ControlPath=~/.ssh/%r@%h-%p -o ControlPersist=240s -o PreferredAuthentications=publickey -o UserKnownHostsFile=/dev/null pipelining = True diff --git a/environments/common/inventory/group_vars/all/grafana.yml b/environments/common/inventory/group_vars/all/grafana.yml index ce83e3c9..8222a3cc 100644 --- a/environments/common/inventory/group_vars/all/grafana.yml +++ b/environments/common/inventory/group_vars/all/grafana.yml @@ -63,6 +63,7 @@ grafana_datasources: url: "http://{{ prometheus_address }}:9090" # default prometheus port editable: true - name: slurmstats + # see https://github.com/grafana/opensearch-datasource#configure-the-data-source-with-provisioning type: grafana-opensearch-datasource url: "https://{{ opensearch_address }}:9200" basicAuth: true @@ -74,12 +75,15 @@ grafana_datasources: tlsSkipVerify: true database: filebeat-* timeField: "@timestamp" - flavor: opensearch + # Have to set flavor and version, but ansible/roles/opensearch/templates/opensearch.yml.j2 fakes version for filebeat + # so need to set to fake version here: + version: '7.10.2' + flavor: elasticsearch editable: true # readOnly: false grafana_plugins: - - grafana-opensearch-datasource + - grafana-opensearch-datasource 2.8.1 # want to set grafana_server.serve_from_sub_path if have Open Ondemand to proxy: grafana_server: diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index 4652b375..c29b6a55 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -37,7 +37,7 @@ openondemand_clusters: basic: script_wrapper: | module purge - export PATH=/opt/jupyter/bin/:$PATH + export PATH=/opt/jupyter-py39/bin/:$PATH %s set_host: host=$(hostname -s) vnc: