diff --git a/CHANGELOG.md b/CHANGELOG.md index 77ba8a90..e1df14e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ _Add new changes here_ - Added ability to save and reload text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536) - Added minimal dataclasses for text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536) - Added `skip_blank_patches` argument to `MapImages.patchify_all()` ([#540](https://github.com/maps-as-data/MapReader/pull/540)) +- Added `container` directory with everything needed for running MapReader with podman-hpc ([#552](https://github.com/maps-as-data/MapReader/pull/552)) ## [v1.6.1](https://github.com/Living-with-machines/MapReader/releases/tag/v1.6.1) (2024-11-18) diff --git a/container/Dockerfile b/container/Dockerfile new file mode 100644 index 00000000..b13ecb94 --- /dev/null +++ b/container/Dockerfile @@ -0,0 +1,82 @@ +# Use the NVIDIA PyTorch base image with CUDA support +FROM nvcr.io/nvidia/pytorch:24.06-py3 + +# Set environment variables to prevent Python from writing .pyc files and buffering stdout/stderr +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Install system-level dependencies and build tools +RUN apt-get update && apt-get install -y \ + build-essential \ + cmake \ + wget \ + software-properties-common \ + libgeos-dev \ + libproj-dev \ + proj-data \ + proj-bin \ + libffi-dev \ + libssl-dev \ + libcurl4-openssl-dev \ + libexpat1-dev \ + libxerces-c-dev \ + zlib1g-dev \ + libpng-dev \ + libjpeg-dev \ + libtiff-dev \ + libspatialite-dev \ + libgl1 \ + python3-dev \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* + +# Upgrade pip to the latest version +RUN pip3 install --upgrade pip + +# Install GDAL dependencies +RUN apt-get update && apt-get install -y \ + libcurl4-openssl-dev \ + libexpat1-dev \ + libxerces-c-dev \ + zlib1g-dev \ + libpng-dev \ + libjpeg-dev \ + libtiff-dev \ + libspatialite-dev \ + && rm -rf /var/lib/apt/lists/* + +# Download and build GDAL 3.5.3 from source +RUN wget https://download.osgeo.org/gdal/3.5.3/gdal-3.5.3.tar.gz -O /tmp/gdal-3.5.3.tar.gz && \ + tar -xzf /tmp/gdal-3.5.3.tar.gz -C /tmp && \ + cd /tmp/gdal-3.5.3 && \ + ./configure --prefix=/usr/local && \ + make -j$(nproc) && \ + make install && \ + ldconfig && \ + cd / && \ + rm -rf /tmp/gdal-3.5.3 /tmp/gdal-3.5.3.tar.gz + +# Verify GDAL installation +RUN gdalinfo --version + +# Set environment variables to help rasterio find GDAL +ENV CPLUS_INCLUDE_PATH=/usr/local/include +ENV C_INCLUDE_PATH=/usr/local/include +ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + +# Copy requirements into the container +COPY requirements.txt /tmp/ +COPY text-requirements.txt /tmp/ + +# Install Python dependencies from requirements.txt +RUN pip3 install --no-cache-dir -r /tmp/requirements.txt +RUN pip3 install --no-cache-dir -r /tmp/text-requirements.txt + +# Install MapReader from GitHub without dependencies to prevent altering existing packages +RUN pip3 install git+https://github.com/Living-with-machines/MapReader.git --no-deps + +# Set the working directory +WORKDIR /workspace + +# Define the default command to run when starting the container +CMD ["/bin/bash"] diff --git a/container/README.md b/container/README.md new file mode 100644 index 00000000..a84a05e6 --- /dev/null +++ b/container/README.md @@ -0,0 +1,83 @@ +# Run MapReader on Isambard-AI + +This document helps you to run MapReader on [Isambard-AI](https://docs.isambard.ac.uk/specs/#system-specifications-isambard-ai-phase-1). + +## Set-up + +1. Get familar with [Podman-HPC at Isambard](https://docs.isambard.ac.uk/user-documentation/guides/containers/podman-hpc/). + +2. Clone MapReader and navigate to the container directory. + +```bash +git clone https://github.com/maps-as-data/MapReader.git +cd MapReader/container +``` +## Build and run the image + +### Build the image + +After navigating the `container` directory in the MapReader repo, run the following command to build the image: + +```bash +podman-hpc build -t mapreader . +``` + +If you run into any issues with this, see the ```Pull the image``` section at the bottom of this document. + +### Migrate the image + +Migration is the process of moving the image to the shared filesystem. +This is needed to run the MapReader image on the compute nodes. + +To migrate the image, run the following command: + +```bash +podman-hpc migrate mapreader:latest +``` + +### Run as a batch job + +To run the MapReader image as part of a batch job, you will need a slurm script. +Ours looks like this: + + +To run the script, run the following command: + +```bash +sbatch pytorch_run_podmanhpc.sh +``` + +### Run in an interactive shell + +If instead you'd like to run in an interactive shell, you can use: + +```bash +podman-hpc run -it --gpu --rm --name mapreader \ + localhost/mapreader:latest /bin/bash +``` + +If you'd like to do this using GPU, you will need to launch an interactive job first using srun: + +```bash +srun --gres=gpu:1 -A --time 1:00:00 --pty /bin/bash +``` + +Then you can run the podman-hpc command above, the `--gpu` flag will ensure that your requested GPUs are available to the container. + +### Run the dependency test + +To check everything is working as expected, run the dependency test using the following command: + +```bash +python test.py +``` + +## Pull the image + +If you have issues building the image, please to pull it from the [docker hub](https://hub.docker.com/repository/docker/oxfordfun/mapreader/tags) + +```bash +podman-hpc pull oxfordfun/mapreader:3.5.3-f +``` + +Then go on to migrate and run the image as described above. diff --git a/container/requirements.txt b/container/requirements.txt new file mode 100644 index 00000000..41cf64a2 --- /dev/null +++ b/container/requirements.txt @@ -0,0 +1,42 @@ +aiohttp>=3.8.1,<4.0.0 +azure-storage-blob>=12.9.0,<13.0.0 +black>=23.7.0,<25.0.0 +cartopy>=0.22.0 +Cython>=0.29.24,<0.30.0 +fiona>=1.10.1 +flake8>=6.0.0,<8.0.0 +geopandas<1.0.0 +geopy==2.1.0 +ipyannotate==0.1.0-beta.0 +ipykernel>=6.5.1,<7.0.0 +ipywidgets>=8.0.0,<9.0.0 +isodate>=0.7.2 +joblib>=1.4.0 +jupyter>=1.0.0,<2.0.0 +jupyter-console>=6.6.3 +jupyterlab-widgets>=3.0.13 +mapclassify>=2.0.0,<3.0.0 +matplotlib>=3.5.0,<4.0.0 +nest-asyncio>=1.5.1,<2.0.0 +numpy>=1.21.5,<2.0.0 +opencv-python>=4.8.1.78,<5.0.0 +pandas>=2.0.0 +parhugin>=0.0.3,<0.0.4 +pyogrio>=0.7.2 +pyproj>=3.2.0,<4.0.0 +pytest<9.0.0 +pytest-cov>=4.1.0,<6.0.0 +PyYAML>=6.0,<7.0 +rasterio>=1.2.10,<2.0.0 +scikit-image>=0.18.3 +scikit-learn>=1.0.1,<2.0.0 +Shapely>=2.0.0,<3.0.0 +simplekml>=1.3.6,<2.0.0 +tensorboard>=2.7.0,<3.0.0 +timm<1.0.0 +torchinfo<2.0.0 +tqdm<5.0.0 +transformers<5.0.0 +versioneer>=0.28 +widgetsnbextension>=4.0.13 +xyzservices==2024.9.0 diff --git a/container/run.sh b/container/run.sh new file mode 100644 index 00000000..9935251f --- /dev/null +++ b/container/run.sh @@ -0,0 +1,7 @@ + +#! /bin/bash + +# podman-hpc build -t mapreader . + +podman-hpc run -it --gpu --rm --name mapreader \ + localhost/mapreader:latest /bin/bash diff --git a/container/test.output b/container/test.output new file mode 100644 index 00000000..a3c7c171 --- /dev/null +++ b/container/test.output @@ -0,0 +1,140 @@ +=== Running Container: mapreader === +✅ Container 'mapreader' is running with ID: 85c4da8d8d7b131499ec37d83822efa0cb8f21f8a5450add23ab0e9a4529d0f3 + +=== Executing Dependency Tests Inside Container 'mapreader' === + +🔍 Running Test: PyTorch +📦 Library: PyTorch +✅ PyTorch Output: 2.4.0a0+f70bd71a48.nv24.06 + +🔍 Running Test: PyTorch GPU +📦 Library: PyTorch GPU +✅ PyTorch GPU Output: True + +🔍 Running Test: Torchvision +📦 Library: Torchvision +✅ Torchvision Output: 0.19.0a0 + +🔍 Running Test: Geopandas +📦 Library: Geopandas +✅ Geopandas Output: 0.14.4 + +🔍 Running Test: Geopy +📦 Library: Geopy +✅ Geopy Output: 2.1.0 + +🔍 Running Test: Cython Python +📦 Library: Cython Python +✅ Cython Python Output: 0.29.37 + +🔍 Running Test: Torchinfo +📦 Library: Torchinfo +✅ Torchinfo Output: 1.8.0 + +🔍 Running Test: Parhugin MultiFun +📦 Library: Parhugin MultiFun +✅ Parhugin MultiFun Output: [INFO] #requested processes: 10 + +🔍 Running Test: MapReader +📦 Library: MapReader +✅ MapReader Output: 1.6.2.post0.dev4 + +🔍 Running Test: GDAL +📦 Library: GDAL +✅ GDAL Output: GDAL 3.5.3, released 2022/10/21 + +🔍 Running Test: Fiona +📦 Library: Fiona +✅ Fiona Output: 1.10.1 + +🔍 Running Test: Shapely +📦 Library: Shapely +✅ Shapely Output: 2.0.6 + +🔍 Running Test: Scikit-learn +📦 Library: Scikit-learn +✅ Scikit-learn Output: 1.5.0 + +🔍 Running Test: Scikit-image +📦 Library: Scikit-image +✅ Scikit-image Output: 0.25.0 + +🔍 Running Test: Tensorboard +📦 Library: Tensorboard +✅ Tensorboard Output: 2.9.0 + +🔍 Running Test: Jupyter +📦 Library: Jupyter +✅ Jupyter Output: Selected Jupyter core packages... +IPython : 8.21.0 +ipykernel : 6.29.4 +ipywidgets : 8.1.5 +jupyter_client : 8.6.2 +jupyter_core : 5.7.2 +jupyter_server : not installed +jupyterlab : 2.3.2 +nbclient : 0.10.0 +nbconvert : 7.16.4 +nbformat : 5.10.4 +notebook : 6.4.10 +qtconsole : not installed +traitlets : 5.9.0 + +🔍 Running Test: IPython Kernel +📦 Library: IPython Kernel +✅ IPython Kernel Output: 6.29.4 + +🔍 Running Test: IPyWidgets +📦 Library: IPyWidgets +✅ IPyWidgets Output: 8.1.5 + +🔍 Running Test: OpenCV +📦 Library: OpenCV +✅ OpenCV Output: 4.5.5 + +🔍 Running Test: Rasterio +📦 Library: Rasterio +✅ Rasterio Output: 1.4.3 + +=== Analyzing Test Results === + +✅ PyTorch: 2.4.0a0+f70bd71a48.nv24.06 +✅ PyTorch GPU: True +✅ Torchvision: 0.19.0a0 +✅ Geopandas: 0.14.4 +✅ Geopy: 2.1.0 +✅ Cython Python: 0.29.37 +✅ Torchinfo: 1.8.0 +✅ Parhugin MultiFun: [INFO] #requested processes: 10 +✅ MapReader: 1.6.2.post0.dev4 +✅ GDAL: GDAL 3.5.3, released 2022/10/21 +✅ Fiona: 1.10.1 +✅ Shapely: 2.0.6 +✅ Scikit-learn: 1.5.0 +✅ Scikit-image: 0.25.0 +✅ Tensorboard: 2.9.0 +✅ Jupyter: Selected Jupyter core packages... +IPython : 8.21.0 +ipykernel : 6.29.4 +ipywidgets : 8.1.5 +jupyter_client : 8.6.2 +jupyter_core : 5.7.2 +jupyter_server : not installed +jupyterlab : 2.3.2 +nbclient : 0.10.0 +nbconvert : 7.16.4 +nbformat : 5.10.4 +notebook : 6.4.10 +qtconsole : not installed +traitlets : 5.9.0 +✅ IPython Kernel: 6.29.4 +✅ IPyWidgets: 8.1.5 +✅ OpenCV: 4.5.5 +✅ Rasterio: 1.4.3 + +=== Test Analysis Complete === + +🎉 All dependency tests passed successfully! +=== Cleaning Up: Removing Container 'mapreader' === +✅ Container 'mapreader' has been removed. + diff --git a/container/test.py b/container/test.py new file mode 100644 index 00000000..7a396f80 --- /dev/null +++ b/container/test.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 + +""" +run_tests.py + +A Python script to automate the building of the Docker image, running the container, +executing dependency tests for MapReader, and reporting the results. + +Usage: + python3 run_tests.py + +Requirements: + - Python 3.6+ + - podman-hpc installed and configured + - Dockerfile and requirements.txt present in the current directory +""" + +import subprocess +import sys +import os +from typing import List, Tuple + +# Configuration Variables +IMAGE_NAME = "mapreader:latest" +CONTAINER_NAME = "mapreader" +WORKDIR = "/workspace" + +# Define the dependency tests as a list of tuples (Test Description, Command) +# Each dependency has two commands: one to print the library name and another to print its version +DEPENDENCY_TESTS: List[Tuple[str, str]] = [ + ("PyTorch", "python3 -c 'import torch; print(torch.__version__)'"), + ("PyTorch GPU", "python3 -c 'import torch; print(torch.cuda.is_available())'"), + ("Torchvision", "python3 -c 'import torchvision; print(torchvision.__version__)'"), + ("Geopandas", "python3 -c 'import geopandas; print(geopandas.__version__)'"), + ("Geopy", "python3 -c 'import geopy; print(geopy.__version__)'"), + ("Cython Python", "python3 -c 'import Cython; print(Cython.__version__)'"), + ("Torchinfo", "python3 -c 'import torchinfo; print(torchinfo.__version__)'"), + ("Parhugin MultiFun", "python3 -c 'from parhugin import multiFunc; myproc = multiFunc(num_req_p=10)'"), + ("MapReader", "python3 -c 'import mapreader; print(mapreader.__version__)'"), + ("GDAL", "gdalinfo --version"), + ("Fiona", "python3 -c 'import fiona; print(fiona.__version__)'"), + ("Shapely", "python3 -c 'import shapely; print(shapely.__version__)'"), + ("Scikit-learn", "python3 -c 'import sklearn; print(sklearn.__version__)'"), + ("Scikit-image", "python3 -c 'import skimage; print(skimage.__version__)'"), + ("Tensorboard", "python3 -c 'import tensorboard; print(tensorboard.__version__)'"), + ("Jupyter", "jupyter --version"), + ("IPython Kernel", "python3 -c 'import ipykernel; print(ipykernel.__version__)'"), + ("IPyWidgets", "python3 -c 'import ipywidgets; print(ipywidgets.__version__)'"), + ("OpenCV", "python3 -c 'import cv2; print(cv2.__version__)'"), + ("Rasterio", "python3 -c 'import rasterio; print(rasterio.__version__)'"), +] + +def check_file_exists(filename: str) -> bool: + """ + Check if a file exists in the current directory. + """ + exists = os.path.isfile(filename) + if not exists: + print(f"❌ {filename} not found in the current directory.") + return exists + +def run_command(command: str, capture_output: bool = True) -> subprocess.CompletedProcess: + """ + Run a shell command and return the CompletedProcess object. + """ + try: + result = subprocess.run( + command, + shell=True, + check=True, + stdout=subprocess.PIPE if capture_output else None, + stderr=subprocess.PIPE if capture_output else None, + text=True + ) + return result + except subprocess.CalledProcessError as e: + print(f"❌ Command failed: {command}") + if capture_output: + print(f"--- STDOUT ---\n{e.stdout}") + print(f"--- STDERR ---\n{e.stderr}") + raise + +def build_image(): + """ + Build the Docker image using podman-hpc. + """ + print(f"=== Building Docker Image: {IMAGE_NAME} ===") + build_command = f"podman-hpc build -t {IMAGE_NAME} ." + run_command(build_command) + print(f"✅ Docker Image '{IMAGE_NAME}' built successfully.\n") + +def run_container() -> str: + """ + Run the container and return the container ID. + """ + print(f"=== Running Container: {CONTAINER_NAME} ===") + run_command( + f"podman-hpc run -d --gpu --name {CONTAINER_NAME} " + f"{IMAGE_NAME} sleep infinity" + ) + # Get container ID + container_id = run_command(f"podman-hpc inspect -f '{{{{.Id}}}}' {CONTAINER_NAME}").stdout.strip() + print(f"✅ Container '{CONTAINER_NAME}' is running with ID: {container_id}\n") + return container_id + +def execute_tests(container_id: str) -> List[Tuple[str, bool, str]]: + """ + Execute dependency tests inside the container and return the results. + + Returns: + List of tuples containing (Test Description, Success (True/False), Output) + """ + print(f"=== Executing Dependency Tests Inside Container '{CONTAINER_NAME}' ===\n") + results = [] + + for test_desc, test_cmd in DEPENDENCY_TESTS: + print(f"🔍 Running Test: {test_desc}") + + # Execute the library name command + name_cmd = f"echo '{test_desc}'" + try: + name_result = run_command(f"podman-hpc exec {container_id} bash -c \"{name_cmd}\"") + library_name = name_result.stdout.strip() + print(f"📦 Library: {library_name}") + except subprocess.CalledProcessError: + print(f"❌ Failed to retrieve library name for {test_desc}.") + results.append((test_desc, False, "Failed to retrieve library name.")) + continue # Skip to the next test + + # Execute the version command + version_cmd = test_cmd + try: + version_result = run_command(f"podman-hpc exec {container_id} bash -c \"{version_cmd}\"") + version_output = version_result.stdout.strip() + print(f"✅ {library_name} Output: {version_output}\n") + results.append((library_name, True, version_output)) + except subprocess.CalledProcessError: + print(f"❌ {library_name} Output: Failed to execute.\n") + results.append((library_name, False, "Failed to execute.")) + + return results + +def analyze_results(results: List[Tuple[str, bool, str]]) -> bool: + """ + Analyze the test results and return True if all tests passed, False otherwise. + """ + all_passed = True + print(f"=== Analyzing Test Results ===\n") + for test_desc, success, output in results: + if success: + print(f"✅ {test_desc}: {output}") + else: + print(f"❌ {test_desc}: {output}") + all_passed = False + print("\n=== Test Analysis Complete ===\n") + return all_passed + +def cleanup(container_id: str): + """ + Stop and remove the container. + """ + print(f"=== Cleaning Up: Removing Container '{CONTAINER_NAME}' ===") + try: + run_command(f"podman-hpc stop {container_id}") + run_command(f"podman-hpc rm {container_id}") + print(f"✅ Container '{CONTAINER_NAME}' has been removed.\n") + except subprocess.CalledProcessError: + print(f"⚠️ Failed to remove container '{CONTAINER_NAME}'. Please remove it manually.\n") + +def main(): + """ + Main function to orchestrate the build, run, test, and cleanup processes. + """ + # Check for necessary files + if not check_file_exists("Dockerfile") or not check_file_exists("requirements.txt"): + sys.exit(1) + + try: + # Build the Docker image + # build_image() + + # Run the container and get container ID + container_id = run_container() + + # Execute dependency tests + test_results = execute_tests(container_id) + + # Analyze results + all_passed = analyze_results(test_results) + + if all_passed: + print("🎉 All dependency tests passed successfully!") + else: + print("⚠️ Some dependency tests failed. Please review the above output for details.") + sys.exit(1) + + except Exception as e: + print(f"❌ An error occurred: {e}") + sys.exit(1) + + finally: + # Cleanup: Stop and remove the container + if 'container_id' in locals(): + cleanup(container_id) + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index 2a79ba1e..22ee6d62 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ "pyogrio>=0.7.2", "cartopy>=0.22.0", "joblib>=1.4.0", - "opencv-python<5.0.0.0", + "opencv-python>=4.8.1.78,<5.0.0.0", "folium>=0.12,<1.0.0", "mapclassify>=2.0.0,<3.0.0", "xyzservices==2024.9.0",