Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set default LR value of SGD to 1e-3 #114467

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions torch/optim/sgd.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import torch
from torch import Tensor
from .optimizer import (Optimizer, required, _use_grad_for_differentiable, _default_to_fused_or_foreach,
from .optimizer import (Optimizer, _use_grad_for_differentiable, _default_to_fused_or_foreach,
_differentiable_doc, _foreach_doc, _maximize_doc)
from typing import List, Optional

__all__ = ['SGD', 'sgd']


class SGD(Optimizer):
def __init__(self, params, lr=required, momentum=0, dampening=0,
def __init__(self, params, lr=1e-3, momentum=0, dampening=0,
weight_decay=0, nesterov=False, *, maximize: bool = False, foreach: Optional[bool] = None,
differentiable: bool = False):
if lr is not required and lr < 0.0:
if lr < 0.0:
raise ValueError(f"Invalid learning rate: {lr}")
if momentum < 0.0:
raise ValueError(f"Invalid momentum value: {momentum}")
Expand Down Expand Up @@ -51,7 +52,6 @@ def _init_group(self, group, params_with_grad, d_p_list, momentum_buffer_list):

return has_sparse_grad


@_use_grad_for_differentiable
def step(self, closure=None):
"""Performs a single optimization step.
Expand Down Expand Up @@ -130,7 +130,7 @@ def step(self, closure=None):
Args:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float): learning rate
lr (float, optional): learning rate (default: 1e-3)
momentum (float, optional): momentum factor (default: 0)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
dampening (float, optional): dampening for momentum (default: 0)
Expand Down
2 changes: 1 addition & 1 deletion torch/optim/sgd.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class SGD(Optimizer):
def __init__(
self,
params: ParamsT,
lr: float,
lr: float = ...,
momentum: float = ...,
dampening: float = ...,
weight_decay: float = ...,
Expand Down
Loading