-
Notifications
You must be signed in to change notification settings - Fork 324
/
config.yml
474 lines (435 loc) · 16.4 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
version: 2.1
# -------------------------------------------------------------------------------------
# Commands
# -------------------------------------------------------------------------------------
commands:
py_3_7_setup:
description: "Install and switch to Python 3.7.5; also install pip and pytest."
steps:
- run:
name: "Setup Python v3.7.5 environment"
command: |
cd /opt/circleci/.pyenv && git pull && cd -
pyenv install -s 3.7.5
pyenv global 3.7.5
pyenv local 3.7.5
pyenv versions
echo "In venv: $(pyenv local) - $(python -V), $(pip -V)"
sudo "$(which python)" -m pip install --upgrade pip
sudo "$(which python)" -m pip install pytest
run_nvidia_smi:
description: "Prints GPU capabilities from nvidia-smi"
steps:
- run:
name: "Run Nvidia-SMI"
command: |
nvidia-smi
pip_dev_install:
description: "Install dependencies via pip, including extra deps. Also supports more options, such as building on top of PyTorch nightly."
parameters:
args:
type: string
default: ""
steps:
- run:
name: "Install dependencies via pip"
command: ./scripts/install_via_pip.sh << parameters.args >>
lint_flake8:
description: "Lint with flake8"
steps:
- run:
name: "Lint with flake8"
command: flake8 --config ./.circleci/flake8_config.ini
lint_black:
description: "Lint with black"
steps:
- run:
name: "Lint with black"
command: black --check --diff --color .
isort:
description: "Check import order with isort"
steps:
- run:
name: "Check import order with isort"
command: isort -v -l 88 -o opacus --lines-after-imports 2 -m 3 --trailing-comma --check-only .
configure_docusaurus_bot:
description: "Configure Docusaurus GitHub bot"
steps:
- run:
name: "Configure Docusaurus GitHub bot"
command: |
git config --global user.email "docusaurus-bot@users.noreply.github.com"
git config --global user.name "Opacus website deployment script"
echo "machine github.com login docusaurus-bot password $DOCUSAURUS_GITHUB_TOKEN" > ~/.netrc
deploy_site:
description: "Deploy website to GitHub Pages"
steps:
- run:
name: "Deploy website to GitHub Pages"
# TODO: make the installation above conditional on there being relevant changes (no need to install if there are none)
command: |
mkdir -p website/static/.circleci && cp -a .circleci/. website/static/.circleci/.
cd website
./scripts/build_website.sh -b
GIT_USER=docusaurus-bot yarn run publish-gh-pages
unit_tests:
description: "Run unit tests"
steps:
- run:
name: "Unit tests & doctests"
no_output_timeout: 1h
command: |
mkdir unittest-reports
python -m pytest --doctest-modules -p conftest --junitxml=unittest-reports/junit.xml opacus
- store_test_results:
path: unittest-reports
- store_artifacts:
path: unittest-reports
mnist_integration_test:
description: "Runs MNIST example end to end"
parameters:
device:
default: "cpu"
type: string
steps:
- run:
name: MNIST example
command: |
mkdir -p runs/mnist/data
mkdir -p runs/mnist/test-reports
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
python examples/mnist.py --lr 0.25 --sigma 0.7 -c 1.5 --batch-size 64 --epochs 1 --data-root runs/mnist/data --n-runs 1 --device <<parameters.device>>
python -c "import torch; accuracy = torch.load('run_results_mnist_0.25_0.7_1.5_64_1.pt'); exit(0) if (accuracy[0]>0.78 and accuracy[0]<0.95) else exit(1)"
when: always
- store_test_results:
path: runs/mnist/test-reports
- store_artifacts:
path: runs/mnist/test-reports
mnist_lightning_integration_test:
description: "Runs MNIST-Lightning example end to end"
parameters:
device:
default: "cpu"
type: string
steps:
- run:
name: MNIST-Lightning example
command: |
mkdir -p runs/mnist/data
mkdir -p runs/mnist/test-reports
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
python examples/mnist_lightning.py fit --trainer.accelerator <<parameters.device>> --model.lr 0.25 --model.sigma 0.7 --model.max_per_sample_grad_norm 1.5 --model.sample_rate 0.004 --trainer.max_epochs 1 --data.data_dir runs/mnist/data --data.sample_rate 0.004
python -c "import torch; exit(0)"
when: always
- store_test_results:
path: runs/mnist-lightning/test-reports
- store_artifacts:
path: runs/mnist-lightning/test-reports
cifar10_integration_test:
description: "Runs CIFAR10 example end to end"
parameters:
device:
default: "cpu"
type: string
steps:
- run:
name: CIFAR10 example
command: |
mkdir -p runs/cifar10/data
mkdir -p runs/cifar10/logs
mkdir -p runs/cifar10/test-reports
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
pip install tensorboard
python examples/cifar10.py --lr 0.1 --sigma 1.5 -c 10 --sample-rate 0.04 --epochs 10 --data-root runs/cifar10/data --log-dir runs/cifar10/logs --device <<parameters.device>>
python -c "import torch; model = torch.load('model_best.pth.tar'); exit(0) if (model['best_acc1']>0.4 and model['best_acc1']<0.49) else exit(1)"
when: always
- store_test_results:
path: runs/cifar10/test-reports
- store_artifacts:
path: runs/cifar10/test-reports
dcgan_integration_test:
description: "Runs dcgan example end to end"
parameters:
device:
default: "cpu"
type: string
steps:
- run:
name: dcgan example
command: |
mkdir -p runs/dcgan/data
mkdir -p runs/dcgan/test-reports
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
python examples/dcgan.py --lr 2e-4 --sigma 0.7 -c 1.5 --batch-size 32 --epochs 1 --data-root runs/dcgan/data --device <<parameters.device>>
when: always
- store_test_results:
path: runs/dcgan/test-reports
- store_artifacts:
path: runs/dcgan/test-reports
imdb_integration_test:
description: "Runs imdb example end to end"
parameters:
device:
default: "cpu"
type: string
steps:
- run:
name: imdb example
command: |
mkdir -p runs/imdb/data
mkdir -p runs/imdb/test-reports
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
pip install --user datasets transformers
python examples/imdb.py --lr 0.02 --sigma 0.56 -c 1.0 --batch-size 32 --max-sequence-length 256 --epochs 1 --data-root runs/imdb/data --device <<parameters.device>>
python -c "import torch; accuracy = torch.load('run_results_imdb_classification.pt'); exit(0) if (accuracy>0.54 and accuracy<0.66) else exit(1)"
when: always
- store_test_results:
path: runs/imdb/test-reports
- store_artifacts:
path: runs/imdb/test-reports
charlstm_integration_test:
description: "Runs charlstm example end to end"
parameters:
device:
default: "cpu"
type: string
steps:
- run:
name: charlstm example
command: |
mkdir -p runs/charlstm/data
wget https://download.pytorch.org/tutorial/data.zip -O runs/charlstm/data/data.zip
unzip runs/charlstm/data/data.zip -d runs/charlstm/data
rm runs/charlstm/data/data.zip
mkdir -p runs/charlstm/test-reports
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
pip install scikit-learn
python examples/char-lstm-classification.py --epochs=20 --learning-rate=2.0 --hidden-size=128 --delta=8e-5 --batch-size 400 --n-layers=1 --sigma=1.0 --max-per-sample-grad-norm=1.5 --data-root="runs/charlstm/data/data/names/" --device=<<parameters.device>> --test-every 5
python -c "import torch; accuracy = torch.load('run_results_chr_lstm_classification.pt'); exit(0) if (accuracy>0.60 and accuracy<0.80) else exit(1)"
when: always
- store_test_results:
path: runs/charlstm/test-reports
- store_artifacts:
path: runs/charlstm/test-reports
benchmark_layers_integration_test:
description: "Runs benchmark end to end"
parameters:
device:
default: "cpu"
type: string
layers:
default: "mha dpmha gsm_dpmha embedding gsm_embedding instancenorm gsm_instancenorm groupnorm gsm_groupnorm layernorm gsm_layernorm lstm dplstm gsm_dplstm rnn dprnn gsm_dprnn linear gsm_linear gru dpgru gsm_dpgru"
type: string
runtime_ratio_threshold:
default: "7.0"
type: string
memory_ratio_threshold:
default: "2.0"
type: string
steps:
- run:
name: benchmarks
command: |
mkdir -p benchmarks/results/raw
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
python benchmarks/run_benchmarks.py --batch_size 16 --layers <<parameters.layers>> --config_file ./benchmarks/config.json --root ./benchmarks/results/raw/ --cont
IFS=$' ';layers=(<<parameters.layers>>); rm -rf /tmp/report_layers; mkdir -p /tmp/report_layers; IFS=$'\n'; files=`( echo "${layers[*]}" ) | sed 's/.*/.\/benchmarks\/results\/raw\/&*/'`
cp -v ${files[@]} /tmp/report_layers
report_id=`IFS=$'-'; echo "${layers[*]}"`
python benchmarks/generate_report.py --path-to-results /tmp/report_layers --save-path benchmarks/results/report-${report_id}.csv --format csv
python benchmarks/generate_report.py --path-to-results /tmp/report_layers --save-path benchmarks/results/report-${report_id}.pkl --format pkl
python -c "import pandas as pd; r = pd.read_pickle('./benchmarks/results/report-"$report_id".pkl').fillna(0); th="<<parameters.runtime_ratio_threshold>>"; exit(0) if (r.loc[:, ('runtime', 'dp/control')] < th).all() and (r.loc[:, ('runtime', 'gsm/control')] < th).all() else exit(1)"
python -c "import pandas as pd; r = pd.read_pickle('./benchmarks/results/report-"$report_id".pkl').fillna(0); th="<<parameters.memory_ratio_threshold>>"; exit(0) if (r.loc[:, ('memory', 'dp/control')] < th).all() and (r.loc[:, ('memory', 'gsm/control')] < th).all() else exit(1)"
when: always
- store_artifacts:
path: benchmarks/results/
# -------------------------------------------------------------------------------------
# Jobs
# -------------------------------------------------------------------------------------
jobs:
lint_py37_torch_release:
docker:
- image: cimg/python:3.7.5
steps:
- checkout
- pip_dev_install
- lint_flake8
- lint_black
- isort
unittest_py37_torch_release:
docker:
- image: cimg/python:3.7.5
steps:
- checkout
- pip_dev_install
- unit_tests
unittest_py38_torch_release:
docker:
- image: cimg/python:3.8
steps:
- checkout
- pip_dev_install
- unit_tests
unittest_py39_torch_release:
docker:
- image: cimg/python:3.9
steps:
- checkout
- pip_dev_install
- unit_tests
unittest_py39_torch_nightly:
docker:
- image: cimg/python:3.9
steps:
- checkout
- pip_dev_install:
args: "-n"
- unit_tests
integrationtest_py37_torch_release_cpu:
docker:
- image: cimg/python:3.7.5
steps:
- checkout
- py_3_7_setup
- pip_dev_install
- mnist_integration_test:
device: "cpu"
integrationtest_py37_torch_release_cuda:
machine:
resource_class: gpu.nvidia.small.multi
image: ubuntu-2004-cuda-11.4:202110-01
steps:
- checkout
- py_3_7_setup
- pip_dev_install
- run_nvidia_smi
- benchmark_layers_integration_test:
device: "cuda"
layers: "groupnorm gsm_groupnorm instancenorm gsm_instancenorm layernorm gsm_layernorm dpmha"
runtime_ratio_threshold: "2.5"
memory_ratio_threshold: "1.6"
- benchmark_layers_integration_test:
device: "cuda"
layers: "linear gsm_linear"
runtime_ratio_threshold: "3.6"
memory_ratio_threshold: "13.0"
- benchmark_layers_integration_test:
device: "cuda"
layers: "mha gsm_dpmha"
runtime_ratio_threshold: "3.5"
memory_ratio_threshold: "2.0"
- benchmark_layers_integration_test:
device: "cuda"
layers: "gru dpgru gsm_dpgru"
runtime_ratio_threshold: "18.5"
memory_ratio_threshold: "1.5"
- benchmark_layers_integration_test:
device: "cuda"
layers: "lstm dplstm gsm_dplstm"
runtime_ratio_threshold: "16.5"
memory_ratio_threshold: "1.5"
- benchmark_layers_integration_test:
device: "cuda"
layers: "rnn dprnn gsm_dprnn"
runtime_ratio_threshold: "16.5"
memory_ratio_threshold: "1.2"
- benchmark_layers_integration_test:
device: "cuda"
layers: "embedding gsm_embedding"
runtime_ratio_threshold: "6.0"
memory_ratio_threshold: "15.0"
- mnist_integration_test:
device: "cuda"
- cifar10_integration_test:
device: "cuda"
- imdb_integration_test:
device: "cuda"
- charlstm_integration_test:
device: "cuda"
- dcgan_integration_test:
device: "cuda"
unittest_multi_gpu:
machine:
resource_class: gpu.nvidia.medium.multi
image: ubuntu-2004-cuda-11.4:202110-01
steps:
- checkout
- py_3_7_setup
- pip_dev_install
- run_nvidia_smi
- run:
name: "Unit test multi_gpu"
no_output_timeout: 1h
command: |
mkdir unittest-multigpu-reports
python -m unittest opacus.tests.multigpu_gradcheck.GradientComputationTest.test_gradient_correct
auto_deploy_site:
docker:
- image: cimg/python:3.9-node
steps:
- run: node --version
- run: yarn --version
- checkout
- pip_dev_install:
args: "-n -d"
- configure_docusaurus_bot
- deploy_site
aliases:
- &exclude_ghpages
branches:
ignore:
- gh-pages
# -------------------------------------------------------------------------------------
# Workflows
# -------------------------------------------------------------------------------------
workflows:
commit:
when:
not:
equal: [ scheduled_pipeline, << pipeline.trigger_source >> ]
jobs:
- lint_py37_torch_release:
filters: *exclude_ghpages
- unittest_py37_torch_release:
filters: *exclude_ghpages
- unittest_py38_torch_release:
filters: *exclude_ghpages
- unittest_py39_torch_release:
filters: *exclude_ghpages
- unittest_py39_torch_nightly:
filters: *exclude_ghpages
- unittest_multi_gpu:
filters: *exclude_ghpages
- integrationtest_py37_torch_release_cpu:
filters: *exclude_ghpages
- integrationtest_py37_torch_release_cuda:
filters: *exclude_ghpages
nightly:
when:
equal: [ scheduled_pipeline, << pipeline.trigger_source >> ]
jobs:
- unittest_py39_torch_nightly:
filters: *exclude_ghpages
- integrationtest_py37_torch_release_cpu:
filters: *exclude_ghpages
- integrationtest_py37_torch_release_cuda:
filters: *exclude_ghpages
- lint_py37_torch_release:
filters: *exclude_ghpages
website_deployment:
when:
not:
equal: [ scheduled_pipeline, << pipeline.trigger_source >> ]
jobs:
- auto_deploy_site:
filters:
branches:
only:
- main