Skip to content

Commit

Permalink
start of work to add flux
Browse files Browse the repository at this point in the history
Flux has a race condition where after submit (when we get
back a jobid) the file might still be needed. This means
that we need custom logic to not delete the temporary
file until cancel / worker completion. Flux also does
better getting an executable file, and a full path,
and so the submit function is modified for that.
Finally, flux does not support the concept of mem
or an account.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Apr 22, 2023
1 parent af044b4 commit 43229c7
Show file tree
Hide file tree
Showing 20 changed files with 711 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
strategy:
fail-fast: false
matrix:
jobqueue: ["htcondor", "pbs", "sge", "slurm", "none"]
jobqueue: ["flux", "htcondor", "pbs", "sge", "slurm", "none"]

steps:
- name: Cancel previous runs
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ ci/slurm/environment.yml
ci/pbs/environment.yml
ci/sge/environment.yml
ci/htcondor/environment.yml
ci/flux/environment.yml
ci/flux/conf/tmp
.vscode/
ca.pem
key.pem
45 changes: 45 additions & 0 deletions ci/flux.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash

function jobqueue_before_install {
docker version
docker-compose version

# build images and start flux cluster
cd ./ci/flux
cp ../environment.yml ./environment.yml
docker-compose build node-1
docker-compose up -d
cd -

# Set shared space permissions (use sudo as owned by root and we are flux user)
docker exec node-1 /bin/bash -c "chmod -R 777 /shared_space"

docker ps -a
docker images
show_network_interfaces
}

function show_network_interfaces {
for c in node-1 node-2 node-3; do
echo '------------------------------------------------------------'
echo docker container: $c
docker exec $c python -c 'import psutil; print(psutil.net_if_addrs().keys())'
echo '------------------------------------------------------------'
done
}

function jobqueue_install {
docker exec node-1 /bin/bash -c "cd /dask-jobqueue; pip install -e ."
}

function jobqueue_script {
docker exec node-1 /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --verbose -E flux -s"
}

function jobqueue_after_script {
docker exec node-1 bash -c 'flux jobs -a'
cd ./ci/flux
docker-compose stop
docker-compose rm --force
cd -
}
39 changes: 39 additions & 0 deletions ci/flux/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
FROM fluxrm/flux-sched:el8
ARG replicas=3
ENV workers=${replicas}
USER root

# These are the STATE_DIR, system, and resources directories
ENV STATE_DIR=/var/lib/flux
ENV LC_ALL en_US.UTF-8
RUN mkdir -p ${STATE_DIR} /etc/flux/system /etc/flux/system/cron.d /etc/flux/config /run/flux && \
mkdir -p /etc/flux/system/cron.d && \
mkdir -p /mnt/curve && \
flux keygen /mnt/curve/curve.cert && \
# Important: "basic" is the directory name here
flux R encode --hosts="node-[1-${workers}]" > /etc/flux/system/R

WORKDIR /home/fluxuser
RUN pip3 install --upgrade pip && \
pip3 install pika --upgrade

# bind-utils provides nslookup
RUN yum install -y iproute bind-utils

# Use mamba for slightly faster install
RUN /bin/bash -c "curl -L https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh > mambaforge.sh && \
bash mambaforge.sh -b -p /opt/anaconda && \
rm mambaforge.sh" && \
export PATH=/opt/conda/bin:$PATH && \
/opt/anaconda/bin/conda clean -tipy
ENV PATH /opt/anaconda/bin:$PATH

# environment.yml file is copied by CI script. If manually building, you should copy it too from parent directory
COPY environment.yml .
RUN mamba env update -n base --file environment.yml

# Important! In production flux should not be run as root
# USER fluxuser
WORKDIR /home/fluxuser/
COPY ./conf/entrypoint.sh ./
ENTRYPOINT /bin/bash /home/fluxuser/entrypoint.sh
22 changes: 22 additions & 0 deletions ci/flux/conf/broker.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Flux needs to know the path to the IMP executable
[exec]
imp = "/usr/libexec/flux/flux-imp"

[access]
allow-guest-user = true
allow-root-owner = true

# Point to resource definition generated with flux-R(1).
[resource]
path = "/etc/flux/system/R"
noverify = true

[bootstrap]
curve_cert = "/mnt/curve/curve.cert"
default_port = 8050
default_bind = "tcp://eth0:%%p"
default_connect = "tcp://%%h:%%p"
# docker-compose starts counting at 1, what a monster
hosts = [
{ host="node-[1-3]"},
]
64 changes: 64 additions & 0 deletions ci/flux/conf/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/sh

# Broker Options: important!
# The local-uri setting places the unix domain socket in rundir
# if FLUX_URI is not set, tools know where to connect.
# -Slog-stderr-level= can be set to 7 for larger debug level
# or exposed as a variable
brokerOptions="-Scron.directory=/etc/flux/system/cron.d \
-Stbon.fanout=256 \
-Srundir=/run/flux \
-Sstatedir=${STATE_DIRECTORY:-/var/lib/flux} \
-Slocal-uri=local:///run/flux/local \
-Slog-stderr-level=6 \
-Slog-stderr-mode=local"

# quorum settings influence how the instance treats missing ranks
# by default all ranks must be online before work is run, but
# we want it to be OK to run when a few are down
# These are currently removed because we want the main rank to
# wait for all the others, and then they clean up nicely
# -Sbroker.quorum=0 \
# -Sbroker.quorum-timeout=none \

# This should be added to keep running as a service
# -Sbroker.rc2_none \

# Derive hostname (this is a hack to get the one defined by the docker-compose network)
address=$(echo $( nslookup "$( hostname -i )" | head -n 1 ))
parts=(${address//=/ })
hostName=${parts[2]}
thisHost=(${hostName//./ })
thisHost=${thisHost[0]}
echo $thisHost

# Export this hostname
export FLUX_FAKE_HOSTNAME=$thisHost

cd ${workdir}
printf "\n👋 Hello, I'm ${thisHost}\n"
printf "The main host is ${mainHost}\n\n"
printf "🔍️ Here is what I found in the working directory, ${workdir}\n"
ls ${workdir}

# --cores=IDS Assign cores with IDS to each rank in R, so we assign 1-N to 0
printf "\n📦 Resources\n"
sudo cat /etc/flux/system/R

printf "\n🦊 Independent Minister of Privilege\n"
cat /etc/flux/imp/conf.d/imp.toml

# The curve cert is generated on container build
# We assume the munge.key is the same also since we use the same base container!
# located at /etc/munge/munge.key

# Give broker time to start before workers
if [ ${thisHost} != "${mainHost}" ]; then
printf "\n😪 Sleeping to give broker time to start...\n"
sleep 15
FLUX_FAKE_HOSTNAME=$thisHost flux start -o --config /etc/flux/config ${brokerOptions} sleep inf
else
echo "Extra arguments are: $@"
printf "flux start -o --config /etc/flux/config ${brokerOptions} sleep inf\n"
FLUX_FAKE_HOSTNAME=$thisHost flux start -o --config /etc/flux/config ${brokerOptions} sleep inf
fi
3 changes: 3 additions & 0 deletions ci/flux/conf/imp.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[exec]
allowed-users = [ "flux", "root" ]
allowed-shells = [ "/usr/libexec/flux/flux-shell" ]
88 changes: 88 additions & 0 deletions ci/flux/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
version: "2.2"

# Shared number of replicas (workers) for build and runtime
# This includes the broker (node-1)
x-shared-workers:
&workers
replicas: 3

# Build args that go into building container
x-shared-build-args: &shared-build-args
flux_sched_version: focal
<<: *workers

# Shared environment for runtime
x-shared-environment: &shared-environment
SPL_BROKER_URL: amqp://fluxuser:fluxrabbit@rabbit:5672//
mainHost: node-1
workdir: /code/workdir
CI_SHARED_SPACE: /shared_space
<<: *workers

x-shared-volumes: &shared-volumes
- ./conf/imp.toml:/etc/flux/imp/conf.d/imp.toml
- ./conf/broker.toml:/etc/flux/config/broker.toml
- ./conf/tmp:/tmp
- ./:/code/workdir
- slurm_jobdir:/data
- ../..:/dask-jobqueue
- shared_space:/shared_space

services:
node-1:
build:
context: ./
args: *shared-build-args
hostname: node-1
container_name: node-1
environment: *shared-environment
volumes: *shared-volumes
networks:
common-network:
ipv4_address: 10.1.1.10
cap_add:
- NET_ADMIN

node-2:
build:
context: ./
args: *shared-build-args
hostname: node-2
container_name: node-2
environment: *shared-environment
volumes: *shared-volumes
networks:
common-network:
ipv4_address: 10.1.1.11
cap_add:
- NET_ADMIN

node-3:
build:
context: ./
args: *shared-build-args
hostname: node-3
container_name: node-3
environment: *shared-environment
volumes: *shared-volumes
networks:
common-network:
ipv4_address: 10.1.1.12
cap_add:
- NET_ADMIN

volumes:
etc_munge:
etc_slurm:
slurm_jobdir:
var_lib_mysql:
var_log_slurm:
shared_space:

networks:
common-network:
driver: bridge
ipam:
driver: default
config:
- subnet: 10.1.1.0/24
1 change: 1 addition & 0 deletions dask_jobqueue/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# flake8: noqa
from . import config
from .core import JobQueueCluster
from .flux import FluxCluster
from .moab import MoabCluster
from .pbs import PBSCluster
from .slurm import SLURMCluster
Expand Down
2 changes: 1 addition & 1 deletion dask_jobqueue/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ class Job(ProcessInterface, abc.ABC):
See Also
--------
PBSCluster
FluxCluster
SLURMCluster
SGECluster
OARCluster
Expand Down Expand Up @@ -745,7 +746,6 @@ def _get_worker_security(self, security):
for key, value in worker_security_dict.items():
# dump worker in-memory keys for use in job_script
if value is not None and "\n" in value:

try:
f = tempfile.NamedTemporaryFile(
mode="wt",
Expand Down

0 comments on commit 43229c7

Please sign in to comment.