Skip to content

Commit 4354690

Browse files
BordawilliamFalcon
andauthored
add apex test (Lightning-AI#2921)
* add apex test * rename * level * events * wrap * evt * miss * apex * apex * apex * apex * apex * apex * Update tests/models/test_amp.py Co-authored-by: William Falcon <waf2107@columbia.edu> * notes * notes Co-authored-by: William Falcon <waf2107@columbia.edu>
1 parent 6c5a0a1 commit 4354690

22 files changed

+191
-101
lines changed

.github/workflows/tpu-testing.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ env:
1414
GKE_CLUSTER: lightning-cluster
1515
GKE_ZONE: us-central1-a
1616
IMAGE: gcr.io/${{ secrets.GKE_PROJECT }}/tpu-testing-image
17-
MAX_CHECKS: 240
17+
MAX_CHECKS: 360
1818
CHECK_SPEEP: 5
1919

2020
jobs:

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
4646

4747
- Tracks all outputs including TBPTT and multiple optimizers ([#2890](https://github.com/PyTorchLightning/pytorch-lightning/pull/2890))
4848

49+
- Added GPU Usage Logger ([#2932](https://github.com/PyTorchLightning/pytorch-lightning/pull/2932))
50+
4951
### Changed
5052

5153
- Truncated long version numbers in progress bar ([#2594](https://github.com/PyTorchLightning/pytorch-lightning/pull/2594))
@@ -351,7 +353,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
351353
### Deprecated
352354

353355
- Deprecated `tags_csv` in favor of `hparams_file` ([#1271](https://github.com/PyTorchLightning/pytorch-lightning/pull/1271))
354-
- Deprecated `amp_level` in favor of native AMP ([#1561](https://github.com/PyTorchLightning/pytorch-lightning/pull/1561))
355356

356357
### Fixed
357358

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies:
1717
- future>=0.17.1
1818
- PyYAML>=5.1
1919
- tqdm>=4.41.0
20+
- nvidia-apex
2021

2122
# For dev and testing
2223
- black==19.10b0

pytorch_lightning/accelerators/cpu_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(self, trainer):
2222

2323
def setup(self, model):
2424
# run through amp wrapper
25-
if self.trainer.amp_type:
25+
if self.trainer.amp_backend:
2626
raise MisconfigurationException('amp + cpu is not supported. Please use a GPU option')
2727

2828
# call setup after the ddp process has connected

pytorch_lightning/accelerators/ddp2_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def ddp_train(self, process_idx, mp_queue, model, is_master=False, proc_offset=0
134134
self.trainer.copy_trainer_model_properties(model)
135135

136136
# AMP - run through amp wrapper before going to distributed DP
137-
if self.trainer.amp_type == AMPType.APEX:
137+
if self.trainer.amp_backend == AMPType.APEX:
138138
model, optimizers = model.configure_apex(amp, model, self.trainer.optimizers, self.trainer.amp_level)
139139
self.trainer.optimizers = optimizers
140140
self.trainer.reinit_scheduler_properties(self.trainer.optimizers, self.trainer.lr_schedulers)

pytorch_lightning/accelerators/ddp_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def ddp_train(self, process_idx, mp_queue, model, is_master=False, proc_offset=0
201201
self.trainer.copy_trainer_model_properties(model)
202202

203203
# AMP - run through amp wrapper before going to distributed DP
204-
if self.trainer.amp_type == AMPType.APEX:
204+
if self.trainer.amp_backend == AMPType.APEX:
205205
model, optimizers = model.configure_apex(amp, model, self.trainer.optimizers, self.trainer.amp_level)
206206
self.trainer.optimizers = optimizers
207207
self.trainer.reinit_scheduler_properties(self.trainer.optimizers, self.trainer.lr_schedulers)

pytorch_lightning/accelerators/ddp_spawn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def ddp_train(self, process_idx, mp_queue, model):
134134

135135
# AMP -
136136
# run through amp wrapper before going to distributed DP
137-
if self.trainer.amp_type == AMPType.APEX:
137+
if self.trainer.amp_backend == AMPType.APEX:
138138
model, optimizers = model.configure_apex(amp, model, self.trainer.optimizers, self.trainer.amp_level)
139139
self.trainer.optimizers = optimizers
140140
self.trainer.reinit_scheduler_properties(self.trainer.optimizers, self.trainer.lr_schedulers)

pytorch_lightning/accelerators/dp_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def setup(self, model):
4949
self.model_autocast_original_forward = model.forward
5050

5151
# init half precision
52-
if self.trainer.amp_type:
52+
if self.trainer.amp_backend:
5353
model = self.__init_half_precision(model)
5454

5555
# init torch data parallel
@@ -69,7 +69,7 @@ def __init_torch_data_parallel(self, model):
6969
return model
7070

7171
def __init_half_precision(self, model):
72-
if self.trainer.amp_type == AMPType.NATIVE:
72+
if self.trainer.amp_backend == AMPType.NATIVE:
7373
self.__init_native_amp(model)
7474
else:
7575
model = self.__init_nvidia_apex(model)

pytorch_lightning/accelerators/gpu_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323

2424
class GPUBackend(object):
25-
amp_type: AMPType
25+
amp_backend: AMPType
2626

2727
def __init__(self, trainer):
2828
self.trainer = trainer
@@ -41,7 +41,7 @@ def setup(self, model):
4141
self.trainer.lr_schedulers = lr_schedulers
4242
self.trainer.optimizer_frequencies = optimizer_frequencies
4343

44-
if self.trainer.amp_type == AMPType.APEX:
44+
if self.trainer.amp_backend == AMPType.APEX:
4545
model = self._setup_nvidia_apex(model)
4646
return model
4747

pytorch_lightning/core/hooks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,8 @@ def backward(self, trainer, loss, optimizer, optimizer_idx):
309309
"""
310310
loss.backward()
311311

312-
def amp_scale_loss(self, unscaled_loss, optimizer, optimizer_idx, amp_type: AMPType):
313-
if amp_type == AMPType.NATIVE:
312+
def amp_scale_loss(self, unscaled_loss, optimizer, optimizer_idx, amp_backend: AMPType):
313+
if amp_backend == AMPType.NATIVE:
314314
scaled_loss = self.trainer.scaler.scale(unscaled_loss)
315315
else:
316316
scaled_loss = amp.scale_loss(unscaled_loss, optimizer)

0 commit comments

Comments
 (0)