Skip to content

distcheck: Use the EFA installer like the other steps #87

distcheck: Use the EFA installer like the other steps

distcheck: Use the EFA installer like the other steps #87

Workflow file for this run

name: PR CI
on: [push, pull_request]
env:
APT_PACKAGES: >-
build-essential
clang
gcc
git
libhwloc-dev
make
jobs:
al2build:
runs-on: codebuild-ghactions-al2-${{ github.run_id }}-${{ github.run_attempt }}
strategy:
matrix:
sdk:
- cuda
efainstaller:
- latest
- 1.32.0
- 1.31.0
- 1.30.0
name: al2/${{ matrix.sdk }}/efa@${{ matrix.efainstaller }}/distcheck
steps:
# note, do not bump to v4: https://github.com/actions/checkout/issues/1590
- uses: actions/checkout@v3
- name: Fetch and Install EFA Installer Dependencies
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-${{ matrix.efainstaller }}.tar.gz
tar -xvf aws-efa-installer-*.tar.gz
pushd aws-efa-installer/
sudo ./efa_installer.sh -y --skip-kmod
popd
- name: Install hwloc, utilities.
run: |
sudo yum -y install hwloc-devel yum-utils
- name: Configure EPEL and Install CUDA
run: |
sudo yum -y install \
https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
sudo yum-config-manager --add-repo \
http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \
--save
sudo yum -y clean expire-cache
sudo yum -y install cuda libcudnn8-devel
- name: Call `autoreconf -ivf`
run: ./autogen.sh
- name: Call `./configure`
run: |
./configure --prefix=/opt/aws-ofi-nccl --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--with-cuda=/usr/local/cuda \
--enable-platform-aws
- name: Call `make distcheck`
run: make distcheck -j
- name: Call `make install`
run: sudo make install
distcheck:
runs-on: ubuntu-22.04
strategy:
matrix:
cc:
- gcc
- clang
sdk:
- cuda
- neuron
fail-fast: false
steps:
- uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y ${{ env.APT_PACKAGES }}
- name: Install CUDA SDK
if: matrix.sdk == 'cuda'
run: |
sudo apt-get install -y nvidia-cuda-toolkit
- name: Install Neuron SDK
if: matrix.sdk == 'neuron'
run: |
# Configure Linux for Neuron repository updates
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null << EOF
deb https://apt.repos.neuron.amazonaws.com jammy main
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt update -y
# Install Neuron Runtime
sudo apt-get install aws-neuronx-runtime-lib -y
- name: Fetch and Install EFA Installer Dependencies
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz
tar -xvf aws-efa-installer-*.tar.gz
pushd aws-efa-installer/
sudo ./efa_installer.sh -y --skip-kmod
popd
- name: Build Plugin
run: |
set -x
# actions/checkout@v4 would drop the plugin source in $PWD,
# so go ahead and build it.
./autogen.sh
if [ ${{ matrix.sdk }} == "cuda" ]
then
./configure --with-libfabric=/opt/amazon/efa/ \
--with-mpi=/opt/amazon/openmpi/ \
--with-cuda=/usr/local/cuda/ \
--enable-platform-aws \
CC=${{ matrix.cc }}
else
./configure --with-libfabric=/opt/amazon/efa/ \
--with-mpi=/opt/amazon/openmpi/ \
--enable-neuron \
--enable-platform-aws \
CC=${{ matrix.cc }}
fi
make -j $(nproc)
- name: Run Dist Check
run: make distcheck
- name: Upload build logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.cc }}-config.log
path: config.log
- uses: actions/setup-python@v5
if: matrix.cc == 'clang'
with:
python-version: '3.9'
- name: Run CodeChecker
if: matrix.cc == 'clang'
uses: whisperity/codechecker-analysis-action@v1
id: codechecker
with:
# clean and rebuild so that compile_commands.json can be detected
build-command: "make clean && make"
ctu: true
- name: Save CodeChecker HTML output.
if: matrix.cc == 'clang'
uses: actions/upload-artifact@v4
with:
name: "CodeChecker Bug Reports for ${{ matrix.sdk }}"
path: ${{ steps.codechecker.outputs.result-html-dir }}/*.html
- name: CodeChecker Pass Or Fail?
if: matrix.cc == 'clang' && ${{ steps.codechecker.outputs.warnings-in-diff == 'true' }}
shell: bash
run: |
echo "::error title=Static Analyzers Failed::Analysed commit(s) caused static analysis warnings"
exit 0