Skip to content
Browse files

Release 0.7.0 preparation (#567)

* Version bump

* Remove deprecated skorch.callbacks.CyclicLR

Use torch.optim.lr_scheduler.CyclicLR instead.

* Prepare for new release

* Update links in

* Bump min. torch version to 1.1.0
  • Loading branch information
BenjaminBossan authored and ottonemo committed Nov 29, 2019
1 parent 2a96147 commit 4097e90b96d85b1d8d3d558da3f1e034ea4bab42
@@ -9,30 +9,40 @@ and this project adheres to [Semantic Versioning](

### Added

- More careful check for wrong parameter names being passed to NeuralNet (#500)
### Changed

### Fixed

## [0.7.0] - 2019-11-29

### Added

- More careful check for wrong parameter names being passed to `NeuralNet` (#500)
- More helpful error messages when trying to predict using an uninitialized model
- Add TensorBoard callback for automatic logging to tensorboard
- Make NeuralNetBinaryClassifier work with sklearn.calibration.CalibratedClassifierCV
- Improve NeuralNetBinaryClassifier compatibility with certain sklearn metrics (#515)
- NeuralNetBinaryClassifier automatically squeezes module output if necessary (#515)
- NeuralNetClassifier now has a classes_ attribute after fit is called, which is inferred from y by default (#465, #486)
- NeuralNet.load_params with a checkpoint now initializes when needed (#497)
- Add `TensorBoard` callback for automatic logging to tensorboard
- Make `NeuralNetBinaryClassifier` work with `sklearn.calibration.CalibratedClassifierCV`
- Improve `NeuralNetBinaryClassifier` compatibility with certain sklearn metrics (#515)
- `NeuralNetBinaryClassifier` automatically squeezes module output if necessary (#515)
- `NeuralNetClassifier` now has a `classes_` attribute after fit is called, which is inferred from y by default (#465, #486)
- `NeuralNet.load_params` with a checkpoint now initializes when needed (#497)

### Changed

- Improve numerical stability when using `NLLLoss` in `NeuralNetClassifer` (#491)
- Refactor code to make gradient accumulation easier to implement (#506)
- NeuralNetBinaryClassifier.predict_proba now returns a 2-dim array; to access the "old" y_proba, take y_proba[:, 1] (#515)
- net.history is now a property that accesses net.history_, which stores the History object (#527)
- `NeuralNetBinaryClassifier.predict_proba` now returns a 2-dim array; to access the "old" `y_proba`, take `y_proba[:, 1]` (#515)
- `net.history` is now a property that accesses `net.history_`, which stores the `History` object (#527)
- Remove deprecated `skorch.callbacks.CyclicLR`, use `torch.optim.lr_scheduler.CyclicLR` instead

### Future Changes

- WARNING: In a future release, the behavior of method `net.get_params` will change to make it more consistent with sklearn: it will no longer return "learned" attributes like `module_`. Therefore, functions like `sklearn.base.clone`, when called with a fitted net, will no longer return a fitted net but instead an uninitialized net. If you want a copy of a fitted net, use `copy.deepcopy` instead. Note that `net.get_params` is used under the hood by many sklearn functions and classes, such as `GridSearchCV`, whose behavior may thus be affected by the change. (#521, #527)

### Fixed

- Fixed a bug that caused LoadInitState not to work with TrainEndCheckpoint (#528)
- Fixed NeuralNetBinaryClassifier wrongly squeezing the batch dimension when using batch_size = 1 (#558)
- Fixed a bug that caused `LoadInitState` not to work with `TrainEndCheckpoint` (#528)
- Fixed `NeuralNetBinaryClassifier` wrongly squeezing the batch dimension when using `batch_size = 1` (#558)

## [0.6.0] - 2019-07-19
@@ -152,6 +162,8 @@ and this project adheres to [Semantic Versioning](
the net was configured to use the CPU (#354, #358)

@@ -1 +1 @@
@@ -261,7 +261,7 @@
"outputs": [],
"source": [
"from skorch.callbacks import LRScheduler\n",
"from skorch.callbacks.lr_scheduler import CyclicLR\n",
"from torch.optim.lr_scheduler import CyclicLR\n",
"cyclicLR = LRScheduler(policy=CyclicLR, \n",
" base_lr=0.002, \n",
@@ -11,7 +11,7 @@
from . import callbacks


# pylint: disable=wrong-import-position
@@ -14,7 +14,7 @@
from .lr_scheduler import *

__all__ = ['Callback', 'EpochTimer', 'PrintLog', 'ProgressBar',
'LRScheduler', 'WarmRestartLR', 'CyclicLR', 'GradientNormClipping',
'LRScheduler', 'WarmRestartLR', 'GradientNormClipping',
'BatchScoring', 'EpochScoring', 'Checkpoint', 'EarlyStopping',
'Freezer', 'Unfreezer', 'Initializer', 'ParamMapper',
'LoadInitState', 'TrainEndCheckpoint']
@@ -22,7 +22,7 @@
from skorch.callbacks import Callback

__all__ = ['LRScheduler', 'WarmRestartLR', 'CyclicLR']
__all__ = ['LRScheduler', 'WarmRestartLR']

@@ -165,14 +165,11 @@ def _get_scheduler(self, net, policy, **scheduler_kwargs):
"""Return scheduler, based on indicated policy, with appropriate
if policy not in [CyclicLR, ReduceLROnPlateau] and \
if policy not in [ReduceLROnPlateau] and \
'last_epoch' not in scheduler_kwargs:
last_epoch = len(net.history) - 1
scheduler_kwargs['last_epoch'] = last_epoch

if policy is CyclicLR and \
'last_batch_idx' not in scheduler_kwargs:
scheduler_kwargs['last_batch_idx'] = self.batch_idx_ - 1
return policy(net.optimizer_, **scheduler_kwargs)

@@ -246,223 +243,3 @@ def get_lr(self):
return current_lrs.tolist()

class CyclicLR:
"""Sets the learning rate of each parameter group according to
cyclical learning rate policy (CLR). The policy cycles the learning
rate between two boundaries with a constant frequency, as detailed in
the paper.
The distance between the two boundaries can be scaled on a per-iteration
or per-cycle basis.
Cyclical learning rate policy changes the learning rate after every batch.
``batch_step`` should be called after a batch has been used for training.
To resume training, save `last_batch_idx` and use it to instantiate
This class has three built-in policies, as put forth in the paper:
A basic triangular cycle w/ no amplitude scaling.
A basic triangular cycle that scales initial amplitude by half each
A cycle that scales initial amplitude by gamma**(cycle iterations)
at each cycle iteration.
This implementation was adapted from the github repo:
`bckenstler/CLR <>`_
optimizer : torch.optimizer.Optimizer instance.
Optimizer algorithm.
base_lr : float or list of float (default=1e-3)
Initial learning rate which is the lower boundary in the
cycle for each param groups (float) or each group (list).
max_lr : float or list of float (default=6e-3)
Upper boundaries in the cycle for each parameter group (float)
or each group (list). Functionally, it defines the cycle
amplitude (max_lr - base_lr). The lr at any cycle is the sum
of base_lr and some scaling of the amplitude; therefore max_lr
may not actually be reached depending on scaling function.
step_size_up : int (default=2000)
Number of training iterations in the increasing half of a cycle.
step_size_down : int (default=None)
Number of training iterations in the decreasing half of a cycle.
If step_size_down is None, it is set to step_size_up.
mode : str (default='triangular')
One of {triangular, triangular2, exp_range}. Values correspond
to policies detailed above. If scale_fn is not None, this
argument is ignored.
gamma : float (default=1.0)
Constant in 'exp_range' scaling function:
gamma**(cycle iterations)
scale_fn : function (default=None)
Custom scaling policy defined by a single argument lambda
function, where 0 <= scale_fn(x) <= 1 for all x >= 0.
mode paramater is ignored.
scale_mode : str (default='cycle')
One of {'cycle', 'iterations'}. Defines whether scale_fn
is evaluated on cycle number or cycle iterations (training
iterations since start of cycle).
last_batch_idx : int (default=-1)
The index of the last batch.
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> scheduler = torch.optim.CyclicLR(optimizer)
>>> data_loader =
>>> for epoch in range(10):
>>> for batch in data_loader:
>>> scheduler.batch_step()
>>> train_batch(...)
.. [1] Leslie N. Smith, 2017, "Cyclical Learning Rates for
Training Neural Networks,". "ICLR"

def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3,
step_size_up=2000, step_size_down=None, mode='triangular',
gamma=1., scale_fn=None, scale_mode='cycle',
last_batch_idx=-1, step_size=None):

# TODO: Remove class in 0.7
"skorch.callbacks.CyclicLR is deprecated, please use "
"skorch.callbacks.LRScheduler together with "
"torch.optim.lr_scheduler.CyclicLR instead",

if not isinstance(optimizer, Optimizer):
raise TypeError('{} is not an Optimizer'.format(
self.optimizer = optimizer

# copied from torch.optim._lr_scheduler._LRScheduler
base_lrs = _check_lr('base_lr', optimizer, base_lr)
if last_batch_idx == -1:
for lr, group in zip(base_lrs, optimizer.param_groups):
group['lr'] = lr
for group in optimizer.param_groups:
group.setdefault('initial_lr', group['lr'])
last_batch_idx = 0
for i, group in enumerate(optimizer.param_groups):
if 'initial_lr' not in group:
raise KeyError("param 'initial_lr' is not specified "
"in param_groups[{}] when resuming an optimizer"
self.base_lrs = list(map(
lambda group: group['initial_lr'], optimizer.param_groups))

self.max_lrs = _check_lr('max_lr', optimizer, max_lr)

# TODO: Remove warning in a future release
if step_size is not None:
"step_size is deprecated in CycleLR, please use step_size_up "
"and step_size_down instead",
step_size_up = step_size
step_size_down = step_size

step_size_down = step_size_down or step_size_up
self.total_size = float(step_size_up + step_size_down)
self.step_ratio = float(step_size_up) / self.total_size

if mode not in ['triangular', 'triangular2', 'exp_range'] \
and scale_fn is None:
raise ValueError('mode is invalid and scale_fn is None')

self.mode = mode
self.gamma = gamma

if scale_fn is None:
if self.mode == 'triangular':
self.scale_fn = self._triangular_scale_fn
self.scale_mode = 'cycle'
elif self.mode == 'triangular2':
self.scale_fn = self._triangular2_scale_fn
self.scale_mode = 'cycle'
elif self.mode == 'exp_range':
self.scale_fn = self._exp_range_scale_fn
self.scale_mode = 'iterations'
self.scale_fn = scale_fn
self.scale_mode = scale_mode


def step(self, epoch=None):
"""Not used by ``CyclicLR``, use batch_step instead."""

def batch_step(self, batch_idx=None):
"""Updates the learning rate for the batch index: ``batch_idx``.
If ``batch_idx`` is None, ``CyclicLR`` will use an internal
batch index to keep track of the index.
if batch_idx is None:
batch_idx = self.last_batch_idx + 1
self.last_batch_idx = batch_idx
for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
param_group['lr'] = lr

# pylint: disable=unused-argument
def _triangular_scale_fn(self, x):
"""Cycle amplitude remains contant"""
return 1.

def _triangular2_scale_fn(self, x):
Decreases the cycle amplitude by half after each period,
while keeping the base lr constant.
return 1 / (2. ** (x - 1))

def _exp_range_scale_fn(self, x):
Scales the cycle amplitude by a factor ``gamma**x``,
while keeping the base lr constant.
return self.gamma**(x)

def get_lr(self):
"""Calculates the learning rate at batch index:
cycle = np.floor(1 + self.last_batch_idx / self.total_size)
x = 1 + self.last_batch_idx / self.total_size - cycle
if x <= self.step_ratio:
scale_factor = x / self.step_ratio
scale_factor = (x-1)/(self.step_ratio-1)

lrs = []
for base_lr, max_lr in zip(self.base_lrs, self.max_lrs):
base_height = (max_lr - base_lr) * scale_factor
if self.scale_mode == 'cycle':
lr = base_lr + base_height * self.scale_fn(cycle)
lr = base_lr + base_height * self.scale_fn(self.last_batch_idx)
return lrs

0 comments on commit 4097e90

Please sign in to comment.
You can’t perform that action at this time.