Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions dockerfiles/app_images/nvidia_hpc_bench/Dockerfile.nvidia_hpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#Use an ARG to spesify the base image
ARG BASE_IMAGE=harbor.stfc.ac.uk/stfc-cloud-staging/iris-bench/nvidia_hpc_base:latest

# USe the base image
FROM ${BASE_IMAGE}

# Copy built HPL from the build image
WORKDIR /root

# Run HPL automatically on container start
ENTRYPOINT ["/bin/bash", "-c", "./launch_hpl_experiment.py -c 1 -s dgxa100_80GG --maxnodes 6 --cruntime enroot"]
24 changes: 24 additions & 0 deletions dockerfiles/base_images/Dockerfile.nvidia_hpc_base
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the official Ubuntu base image
FROM ubuntu:20.04

# Set the working directory
WORKDIR /root

# Set non-interactive frontend for apt-get
ENV DEBIAN_FRONTEND=noninteractive

# Install dependencies
RUN apt-get update && \
apt-get install -y wget python3-pip git

# Download Nvidia NGC HPC Benchmarks
RUN git clone https://github.com/NVIDIA/deepops.git && \
cd deepops/workloads/bit/hpl

RUN apt install unzip

# Install NGC CLI
RUN wget --content-disposition https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/3.60.1/files/ngccli_linux.zip -O ngccli_linux.zip && unzip ngccli_linux.zip && \
chmod u+x ngc-cli/ngc && \
echo "export PATH=\"\$PATH:$(pwd)/ngc-cli\"" >> ~/.bash_profile && source ~/.bash_profile && \
ngc config set --format_type json
13 changes: 13 additions & 0 deletions dockerfiles/base_images/auth.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"key": "apikey",
"source": "user settings",
"value": "<API_KEY>"
},{
"key": "format_type",
"source": "global argument",
"value": "json"
},{
"key": "org",
"source": "user settings",
"value": "<ORG_NAME_OR_NUMBER>"
}
15 changes: 13 additions & 2 deletions dockerfiles/build_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ BASE_IMAGES_DIR="base_images"
APP_IMAGES_DIR="app_images"

# List of base images to build
BASE_IMAGES=("mantid_base" "sciml_base")
BASE_IMAGES=("mantid_base" "sciml_base" "nvidia_hpc_base")

# List of app images to build
SCIML_IMAGES=("mnist_tf_keras" "stemdl_classification" "synthetic_regression")
MANTID_IMAGES=("mantid_run_1" "mantid_run_4" "mantid_run_5" "mantid_run_8")
NVIDIA_IMAGES=("nvidia_hpl")

# Build base images first
echo "Building base images..."
Expand Down Expand Up @@ -55,11 +56,21 @@ for IMAGE in "${MANTID_IMAGES[@]}"; do
docker build -f $DOCKERFILE -t ${IMAGE_TAG} --build-arg BASE_IMAGE=mantid_base:latest .
done

# Build nvidia_hpc_bench images
echo "Building nvidia_hpc_bench images..."
for IMAGE in "${NVIDIA_IMAGES[@]}"; do
DOCKERFILE="${APP_IMAGES_DIR}/nvidia_hpc_bench/Dockerfile.${IMAGE}"
IMAGE_TAG="${IMAGE}:latest"
echo "Building app image: ${IMAGE_TAG}..."
docker build -f $DOCKERFILE -t ${IMAGE_TAG} --build-arg BASE_IMAGE=nvidia_hpc_base:latest .
done


# Build dummy image
echo "Building dummy image..."
DOCKERFILE="${APP_IMAGES_DIR}/Dockerfile.dummy"
IMAGE_TAG="dummy:latest"
echo "Building app image: ${IMAGE_TAG}..."
docker build -f $DOCKERFILE -t ${IMAGE_TAG} .

echo -e "Build process completed.\n"
echo -e "Build process completed.\n"
Loading