Skip to content

Commit

Permalink
Merge branch 'main' into benchmarks-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
moaradwan committed Aug 31, 2022
2 parents 82474f1 + 12cf9ed commit 8fb03aa
Show file tree
Hide file tree
Showing 26 changed files with 798 additions and 672 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Expand Up @@ -204,7 +204,7 @@ commands:
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
pip install --user datasets transformers
python examples/imdb.py --lr 0.02 --sigma 0.56 -c 1.0 --batch-size 32 --max-sequence-length 256 --epochs 1 --data-root runs/imdb/data --device <<parameters.device>>
python examples/imdb.py --lr 0.02 --sigma 1.0 -c 1.0 --batch-size 64 --max-sequence-length 256 --epochs 2 --data-root runs/imdb/data --device <<parameters.device>>
python -c "import torch; accuracy = torch.load('run_results_imdb_classification.pt'); exit(0) if (accuracy>0.54 and accuracy<0.66) else exit(1)"
when: always
- store_test_results:
Expand Down
26 changes: 26 additions & 0 deletions CONTRIBUTING.md
Expand Up @@ -95,6 +95,32 @@ Run following command from `website` folder. It will build the docs and serve th
./scripts/build_website.sh
```

You can also perform spell checks on documentation automatically (besides IDEs) using [```sphinxcontrib-spelling```](https://sphinxcontrib-spelling.readthedocs.io/en/latest/install.html)
Note that you will also need [```PyEnchant```](https://pyenchant.github.io/pyenchant/) to run ```sphinxcontrib-spelling```, and thus the Enchant C library. Use this guide for ```PyEnchant```.

Steps:
1. Install the extension with pip: ```pip install sphinxcontrib-spelling```
2. Add ```sphinxcontrib.spelling``` to the extensions list in ```conf.py```.
3. Install ```PyEnchant```. Please follow the [installation guide](https://pyenchant.github.io/pyenchant/install.html). Noticed that Apple Silicons may require a way around under section "Apple Silicon related errors".
4. Make sure you have a ```source``` and ```build``` folder. Pass "spelling" as the builder argument to ```sphinx-build```.
```
cd website/sphnix
mkdir build # if you do not already have one
sphinx-build -b spelling source build
```
5. Find files with spelling errors in ```build``` (remember to check each folder). A file will be generated for each source file that contains spelling error. Example:
* File name: ```batch_memory_manager.spelling```
* File content:
```
../../opacus/utils/batch_memory_manager.py:docstring of opacus.utils.batch_memory_manager.BatchMemoryManager:5: (occasinal) safeguarding against occasinal large batches produced by
../../opacus/utils/batch_memory_manager.py:docstring of opacus.utils.batch_memory_manager.BatchMemoryManager:13: (optimzer) On every step optimzer will check if the batch was the last physical batch comprising
../../opacus/utils/batch_memory_manager.py:docstring of opacus.utils.batch_memory_manager.BatchMemoryManager:14: (behaviour) a logical one, and will change behaviour accordignly.
../../opacus/utils/batch_memory_manager.py:docstring of opacus.utils.batch_memory_manager.BatchMemoryManager:14: (accordignly) a logical one, and will change behaviour accordignly.
../../opacus/utils/batch_memory_manager.py:docstring of opacus.utils.batch_memory_manager.BatchSplittingSampler:4: (physocal) Used to split large logical batches into physocal batches of a smaller size,
```
6. Manually review the spelling files and make changes in source files accordingly. Some detections are not perfect. For example, "nn" (from torch.nn) can be detected as a spelling error.


## Pull Requests
We actively welcome your pull requests.

Expand Down
2 changes: 1 addition & 1 deletion dev_requirements.txt
@@ -1,4 +1,4 @@
torch==1.8.1
torch
torchvision>=0.9.1
tqdm>=4.40
requests>=2.25.1
Expand Down
4 changes: 2 additions & 2 deletions opacus/accountants/accountant.py
Expand Up @@ -72,7 +72,7 @@ def get_optimizer_hook_fn(
"""
Returns a callback function which can be used to attach to DPOptimizer
Args:
sample_rate: Expected samping rate used for accounting
sample_rate: Expected sampling rate used for accounting
"""

def hook_fn(optim: DPOptimizer):
Expand All @@ -88,7 +88,7 @@ def hook_fn(optim: DPOptimizer):

def state_dict(self, destination: T_state_dict = None) -> T_state_dict:
"""
Retruns a dictionary containing the state of the accountant.
Returns a dictionary containing the state of the accountant.
Args:
destination: a mappable object to populate the current state_dict into.
If this arg is None, an OrderedDict is created and populated.
Expand Down
61 changes: 61 additions & 0 deletions opacus/grad_sample/functorch.py
@@ -0,0 +1,61 @@
from opacus.layers.dp_rnn import RNNLinear


def prepare_layer(layer, batch_first=True):
"""
Prepare a layer to compute grad samples using functorch.
The grad samples are computed by redoing the forward and
backward passes on the functional version of the module.
Args:
layer: the layer to prepare
batch_first: whether the input is batch_first or not
"""
from functorch import grad, make_functional, vmap

if len(list(layer.buffers())) > 0:
raise NotImplementedError(
"This layer has buffers and is not supported by Opacus"
)
flayer, _ = make_functional(layer)

def compute_loss_stateless_model(params, activations, backprops):
if batch_first or type(layer) is RNNLinear:
batched_activations = activations.unsqueeze(0)
batched_backprops = backprops.unsqueeze(0)
else:
# If batch_first is False, the batch dimension is the second dimension
batched_activations = activations.unsqueeze(1)
batched_backprops = backprops.unsqueeze(1)

output = flayer(params, batched_activations)
loss = (output * batched_backprops).sum()

return loss

ft_compute_grad = grad(compute_loss_stateless_model)
# Note that the vmap is done on the first dimension, regardless of batch_first
# This is because the activations and backprops given by the GradSampleModule
# are always batch_first=True
layer.ft_compute_sample_grad = vmap(ft_compute_grad, in_dims=(None, 0, 0))


def ft_compute_per_sample_gradient(layer, activations, backprops):
"""
Compute the per-sample gradient of the layer.
Args:
layer: the layer on which to compute the gradient
activations: the input to the layer
backprops: the gradient of the loss w.r.t. outputs of the layer
"""
parameters = list(layer.parameters())
if not hasattr(layer, "ft_compute_sample_grad"):
prepare_layer(layer)

per_sample_grads = layer.ft_compute_sample_grad(parameters, activations, backprops)

ret = {}
for i_p, p in enumerate(parameters):
ret[p] = per_sample_grads[i_p]

return ret
77 changes: 59 additions & 18 deletions opacus/grad_sample/grad_sample_module.py
Expand Up @@ -16,13 +16,15 @@
from __future__ import annotations

import logging
import warnings
from functools import partial
from typing import List, Tuple

import torch
import torch.nn as nn
from opacus.grad_sample.functorch import ft_compute_per_sample_gradient, prepare_layer
from opacus.grad_sample.gsm_base import AbstractGradSampleModule
from opacus.layers.dp_rnn import DPRNNBase, DPRNNCellBase, RNNLinear
from opacus.layers.dp_rnn import DPGRU, DPLSTM, DPRNN, RNNLinear
from opacus.utils.module_utils import (
requires_grad,
trainable_modules,
Expand Down Expand Up @@ -89,6 +91,7 @@ def __init__(
batch_first=True,
loss_reduction="mean",
strict: bool = True,
force_functorch=False,
):
"""
Expand All @@ -108,6 +111,9 @@ def __init__(
possible and set to None otherwise. This is not recommended, because
some unsupported modules (e.g. BatchNorm) affect other parameters and
invalidate the concept of per sample gradients for the entire model.
force_functorch: If set to ``True``, will use functorch to compute
all per sample gradients. Otherwise, functorch will be used only
for layers without registered grad sampler methods.
Raises:
NotImplementedError
Expand All @@ -128,13 +134,24 @@ def __init__(
)

self.hooks_enabled = False
self.add_hooks(loss_reduction=loss_reduction, batch_first=batch_first)
self.batch_first = batch_first
self.loss_reduction = loss_reduction
self.force_functorch = force_functorch
self.add_hooks(
loss_reduction=loss_reduction,
batch_first=batch_first,
force_functorch=force_functorch,
)

def forward(self, *args, **kwargs):
return self._module(*args, **kwargs)

def add_hooks(
self, *, loss_reduction: str = "mean", batch_first: bool = True
self,
*,
loss_reduction: str = "mean",
batch_first: bool = True,
force_functorch: bool = False,
) -> None:
"""
Adds hooks to model to save activations and backprop values.
Expand All @@ -151,6 +168,8 @@ def add_hooks(
``[K, batch_size, ...]``
loss_reduction: Indicates if the loss reduction (for aggregating the gradients)
is a sum or a mean operation. Can take values "sum" or "mean"
force_functorch: If set to ``True``, will use functorch to compute all per sample gradients.
Otherwise, functorch will be used only for layers without registered grad sampler methods.
"""
if hasattr(self._module, "autograd_grad_sample_hooks"):
raise ValueError("Trying to add hooks twice to the same model")
Expand All @@ -159,20 +178,27 @@ def add_hooks(
self.autograd_grad_sample_hooks = self._module.autograd_grad_sample_hooks

for _module_name, module in trainable_modules(self._module):
if type(module) in self.GRAD_SAMPLERS:
self.autograd_grad_sample_hooks.append(
module.register_forward_hook(self.capture_activations_hook)
)
# Do not add hooks to DPRNN, DPLSTM or DPGRU as the hooks are handled by the `RNNLinear`
if type(module) in [DPRNN, DPLSTM, DPGRU]:
continue

if force_functorch or not type(module) in self.GRAD_SAMPLERS:
prepare_layer(module, batch_first=batch_first)

self.autograd_grad_sample_hooks.append(
module.register_forward_hook(self.capture_activations_hook)
)

self.autograd_grad_sample_hooks.append(
module.register_backward_hook(
partial(
self.capture_backprops_hook,
loss_reduction=loss_reduction,
batch_first=batch_first,
)
self.autograd_grad_sample_hooks.append(
module.register_backward_hook(
partial(
self.capture_backprops_hook,
loss_reduction=loss_reduction,
batch_first=batch_first,
)
)
)

self.enable_hooks()

def remove_hooks(self) -> None:
Expand All @@ -197,6 +223,11 @@ def remove_hooks(self) -> None:
delattr(self, "autograd_grad_sample_hooks")
delattr(self._module, "autograd_grad_sample_hooks")

# Remove functorch hooks
for _module_name, module in trainable_modules(self._module):
if hasattr(module, "ft_compute_sample_grad"):
delattr(module, "ft_compute_sample_grad")

def disable_hooks(self) -> None:
r"""
Globally disable all hooks installed by this library.
Expand Down Expand Up @@ -282,7 +313,11 @@ def capture_backprops_hook(
loss_reduction=loss_reduction,
batch_first=batch_first,
)
grad_sampler_fn = self.GRAD_SAMPLERS[type(module)]
if not self.force_functorch and type(module) in self.GRAD_SAMPLERS:
grad_sampler_fn = self.GRAD_SAMPLERS[type(module)]
else:
grad_sampler_fn = ft_compute_per_sample_gradient

grad_samples = grad_sampler_fn(module, activations, backprops)
for param, gs in grad_samples.items():
create_or_accumulate_grad_sample(
Expand Down Expand Up @@ -374,10 +409,13 @@ def is_supported(cls, module: nn.Module) -> bool:
Returns:
``True`` if grad sampler is found, ``False`` otherwise
"""
return type(module) in cls.GRAD_SAMPLERS or isinstance(
module, (DPRNNBase, DPRNNCellBase)
warnings.warn(
"GradSampleModule.is_supported is deprecated, as all layers can now be used with functorch.",
DeprecationWarning,
)

return True

@classmethod
def validate(
cls, module: nn.Module, *, strict: bool = False
Expand Down Expand Up @@ -409,7 +447,10 @@ def validate(
f"(See opacus.grad_sample.utils.register_grad_sampler)"
)
for m_name, m in trainable_modules(module)
if not cls.is_supported(m)
# With functorch, all modules are trainable
# We still want to avoid module that have buffers (e.g. BatchNorm)
# as the buffers are not private
if len(list(m.buffers())) > 0
]
)
# raise or return errors as needed
Expand Down
4 changes: 3 additions & 1 deletion opacus/grad_sample/utils.py
Expand Up @@ -52,6 +52,8 @@ def decorator(f):

def wrap_model(model: nn.Module, grad_sample_mode: str, *args, **kwargs):
cls = get_gsm_class(grad_sample_mode)
if grad_sample_mode == "functorch":
kwargs["force_functorch"] = True
return cls(model, *args, **kwargs)


Expand All @@ -63,7 +65,7 @@ def get_gsm_class(grad_sample_mode: str) -> Type[AbstractGradSampleModule]:
:param grad_sample_mode:
:return:
"""
if grad_sample_mode == "hooks":
if grad_sample_mode in ["hooks", "functorch"]:
return GradSampleModule
elif grad_sample_mode == "ew":
return GradSampleModuleExpandedWeights
Expand Down
2 changes: 1 addition & 1 deletion opacus/optimizers/ddp_perlayeroptimizer.py
Expand Up @@ -67,7 +67,7 @@ def __init__(
class DistributedPerLayerOptimizer(DPOptimizer):
"""
:class:`~opacus.optimizers.optimizer.DPOptimizer` that implements
per layer clipping strategy and is compatible with distibured data parallel
per layer clipping strategy and is compatible with distributed data parallel
"""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions opacus/optimizers/optimizer.py
Expand Up @@ -113,7 +113,7 @@ def _generate_noise(
reference: The reference Tensor to get the appropriate shape and device
for generating the noise
generator: The PyTorch noise generator
secure_mode: boolean showing if "secure" noise need to be generate
secure_mode: boolean showing if "secure" noise need to be generated
(see the notes)
Notes:
Expand Down Expand Up @@ -186,7 +186,7 @@ class DPOptimizer(Optimizer):
Examples:
>>> module = MyCustomModel()
>>> optimizer = torch.optim.SGD(module.parameters(), lr=0.1)
>>> dp_optimzer = DPOptimizer(
>>> dp_optimizer = DPOptimizer(
... optimizer=optimizer,
... noise_multiplier=1.0,
... max_grad_norm=1.0,
Expand Down
16 changes: 10 additions & 6 deletions opacus/tests/grad_sample_module_test.py
Expand Up @@ -212,11 +212,18 @@ def __init__(self, in_f, out_f):
def forward(self, x: torch.Tensor):
return F.linear(x, self.p)

with self.assertRaises(NotImplementedError):
GradSampleModule(SimpleLinear(4, 2))
# Should be handled by functorch
try:
gsm = GradSampleModule(SimpleLinear(4, 2))
self.assertTrue(hasattr(gsm._module, "ft_compute_sample_grad"))
except ImportError:
print("Test could not be ran because functorch not available")

# Should not raise exception if strict=False
GradSampleModule(SimpleLinear(4, 2), strict=False)
try:
GradSampleModule(SimpleLinear(4, 2), strict=False)
except ImportError:
print("Test could not be ran because functorch not available")

# Should not fail after relevant grad sampler has been registered
register_grad_sampler(SimpleLinear)(compute_linear_grad_sample)
Expand All @@ -226,9 +233,6 @@ def test_custom_module_validation(self):
with self.assertRaises(NotImplementedError):
GradSampleModule(mobilenet_v3_small())

# Should not raise exception if strict=False
GradSampleModule(mobilenet_v3_small(), strict=False)

def test_submodule_access(self):
_ = self.grad_sample_module.fc1
_ = self.grad_sample_module.fc2
Expand Down

0 comments on commit 8fb03aa

Please sign in to comment.