forked from aws/aws-ofi-nccl
-
Notifications
You must be signed in to change notification settings - Fork 0
167 lines (149 loc) · 5.19 KB
/
distcheck.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
name: PR CI
on: [push, pull_request]
env:
APT_PACKAGES: >-
build-essential
clang
gcc
git
libhwloc-dev
make
jobs:
al2build:
runs-on: codebuild-ghactions-al2-${{ github.run_id }}-${{ github.run_attempt }}
strategy:
matrix:
sdk:
- cuda
efainstaller:
- latest
- 1.32.0
- 1.31.0
- 1.30.0
name: al2/${{ matrix.sdk }}/efa@${{ matrix.efainstaller }}/distcheck
steps:
# note, do not bump to v4: https://github.com/actions/checkout/issues/1590
- uses: actions/checkout@v3
- name: Fetch and Install EFA Installer Dependencies
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-${{ matrix.efainstaller }}.tar.gz
tar -xvf aws-efa-installer-*.tar.gz
pushd aws-efa-installer/
sudo ./efa_installer.sh -y --skip-kmod
popd
- name: Install hwloc, utilities.
run: |
sudo yum -y install hwloc-devel yum-utils
- name: Configure EPEL and Install CUDA
run: |
sudo yum -y install \
https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
sudo yum-config-manager --add-repo \
http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \
--save
sudo yum -y clean expire-cache
sudo yum -y install cuda libcudnn8-devel
- name: Call `autoreconf -ivf`
run: ./autogen.sh
- name: Call `./configure`
run: |
./configure --prefix=/opt/aws-ofi-nccl --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--with-cuda=/usr/local/cuda \
--enable-platform-aws
- name: Call `make distcheck`
run: make distcheck -j
- name: Call `make install`
run: sudo make install
distcheck:
runs-on: ubuntu-22.04
strategy:
matrix:
cc:
- gcc
- clang
sdk:
- cuda
- neuron
fail-fast: false
steps:
- uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y ${{ env.APT_PACKAGES }}
- name: Install CUDA SDK
if: matrix.sdk == 'cuda'
run: |
sudo apt-get install -y nvidia-cuda-toolkit
- name: Install Neuron SDK
if: matrix.sdk == 'neuron'
run: |
# Configure Linux for Neuron repository updates
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null << EOF
deb https://apt.repos.neuron.amazonaws.com jammy main
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt update -y
# Install Neuron Runtime
sudo apt-get install aws-neuronx-runtime-lib -y
- name: Fetch and Install EFA Installer Dependencies
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz
tar -xvf aws-efa-installer-*.tar.gz
pushd aws-efa-installer/
sudo ./efa_installer.sh -y --skip-kmod
popd
- name: Build Plugin
run: |
set -x
# actions/checkout@v4 would drop the plugin source in $PWD,
# so go ahead and build it.
./autogen.sh
if [ ${{ matrix.sdk }} == "cuda" ]
then
./configure --with-libfabric=/opt/amazon/efa/ \
--with-mpi=/opt/amazon/openmpi/ \
--with-cuda=/usr/local/cuda/ \
--enable-platform-aws \
CC=${{ matrix.cc }}
else
./configure --with-libfabric=/opt/amazon/efa/ \
--with-mpi=/opt/amazon/openmpi/ \
--enable-neuron \
--enable-platform-aws \
CC=${{ matrix.cc }}
fi
make -j $(nproc)
- name: Run Dist Check
run: make distcheck
- name: Upload build logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.cc }}-config.log
path: config.log
- uses: actions/setup-python@v5
if: matrix.cc == 'clang'
with:
python-version: '3.9'
- name: Run CodeChecker
if: matrix.cc == 'clang'
uses: whisperity/codechecker-analysis-action@v1
id: codechecker
with:
# clean and rebuild so that compile_commands.json can be detected
build-command: "make clean && make"
ctu: true
- name: Save CodeChecker HTML output.
if: matrix.cc == 'clang'
uses: actions/upload-artifact@v4
with:
name: "CodeChecker Bug Reports for ${{ matrix.sdk }}"
path: ${{ steps.codechecker.outputs.result-html-dir }}/*.html
- name: CodeChecker Pass Or Fail?
if: matrix.cc == 'clang' && ${{ steps.codechecker.outputs.warnings-in-diff == 'true' }}
shell: bash
run: |
echo "::error title=Static Analyzers Failed::Analysed commit(s) caused static analysis warnings"
exit 0