diff --git a/docs/source/autograd.rst b/docs/source/autograd.rst
index 7863169222a9c..135cfcf3393a3 100644
--- a/docs/source/autograd.rst
+++ b/docs/source/autograd.rst
@@ -65,7 +65,7 @@ Variable (deprecated)
 Tensor autograd functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. autoclass:: torch.Tensor
-    :members: backward, detach, detach_, register_hook, retain_grad
+   :members: grad, requires_grad, is_leaf, backward, detach, detach_, register_hook, retain_grad
 
 :hidden:`Function`
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/docs/source/tensors.rst b/docs/source/tensors.rst
index c73a36832f0c6..23d67e769f65d 100644
--- a/docs/source/tensors.rst
+++ b/docs/source/tensors.rst
@@ -139,6 +139,9 @@ view of a storage and defines numeric operations on it.
    .. automethod:: new_ones
    .. automethod:: new_zeros
 
+   .. autoattribute:: is_cuda
+   .. autoattribute:: device
+
    .. automethod:: abs
    .. automethod:: abs_
    .. automethod:: acos
@@ -198,8 +201,6 @@ view of a storage and defines numeric operations on it.
    .. automethod:: cumsum
    .. automethod:: data_ptr
    .. automethod:: det
-   .. autoattribute:: device
-      :annotation:
    .. automethod:: diag
    .. automethod:: diag_embed
    .. automethod:: dim
@@ -257,8 +258,6 @@ view of a storage and defines numeric operations on it.
    .. automethod:: int
    .. automethod:: inverse
    .. automethod:: is_contiguous
-   .. autoattribute:: is_cuda
-      :annotation:
    .. automethod:: is_pinned
    .. automethod:: is_set_to
    .. automethod:: is_signed
diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py
index 17deab870372d..999f225c29de3 100644
--- a/torch/_tensor_docs.py
+++ b/torch/_tensor_docs.py
@@ -2918,3 +2918,72 @@ def callable(a, b) -> number
 
 See :func:`torch.pinverse`
 """)
+
+add_docstr_all('grad',
+               r"""
+This attribute is ``None`` by default and becomes a Tensor the first time a call to
+:func:`backward` computes gradients for ``self``.
+The attribute will then contain the gradients computed and future calls to
+:func:`backward` will accumulate (add) gradients into it.
+""")
+
+add_docstr_all('requires_grad',
+               r"""
+Is ``True`` if gradients need to be computed for this Tensor, ``False`` otherwise.
+
+.. note::
+
+    The fact that gradients need to be computed for a Tensor do not mean that the :attr:`grad`
+    attribute will be populated, see :attr:`is_leaf` for more details.
+
+""")
+
+add_docstr_all('is_leaf',
+               r"""
+All Tensors that have :attr:`requires_grad` which is ``False`` will be leaf Tensors by convention.
+
+For Tensors that have :attr:`requires_grad` which is ``True``, they will be leaf Tensors if they were
+created by the user. This means that they are not the result of an operation and so
+:attr:`grad_fn` is None.
+
+Only leaf Tensors will have their :attr:`grad` populated during a call to :func:`backward`.
+To get :attr:`grad` populated for non-leaf Tensors, you can use :func:`retain_grad`.
+
+Example::
+
+    >>> a = torch.rand(10, requires_grad=True)
+    >>> a.is_leaf
+    True
+    >>> b = torch.rand(10, requires_grad=True).cuda()
+    >>> b.is_leaf
+    False
+    # b was created by the operation that cast a cpu Tensor into a cuda Tensor
+    >>> c = torch.rand(10, requires_grad=True) + 2
+    >>> c.is_leaf
+    False
+    # c was created by the addition operation
+    >>> d = torch.rand(10).cuda()
+    >>> d.is_leaf
+    True
+    # d does not require gradients and so has no operation creating it (that is tracked by the autograd engine)
+    >>> e = torch.rand(10).cuda().requires_grad_()
+    >>> e.is_leaf
+    True
+    # e requires gradients and has no operations creating it
+    >>> f = torch.rand(10, requires_grad=True, device="cuda")
+    >>> f.is_leaf
+    True
+    # f requires grad, has not operation creating it
+
+
+""")
+
+add_docstr_all('is_cuda',
+               r"""
+Is ``True`` if the Tensor is stored on the GPU, ``False`` otherwise.
+""")
+
+add_docstr_all('device',
+               r"""
+Is the :class:`torch.device` where this Tensor is.
+""")
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
index c5d07e416dc5c..5356c1d80f141 100644
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@@ -223,6 +223,18 @@ PyObject *THPModule_addDocStr(PyObject *_unused, PyObject *args)
           "method '%s' already has a docstring", m->d_method->ml_name);
     }
     m->d_method->ml_doc = doc_str;
+  } else if (strcmp(Py_TYPE(obj)->tp_name, "getset_descriptor") == 0) {
+    //NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast)
+    PyGetSetDescrObject* m = (PyGetSetDescrObject *)obj;
+    if (m->d_getset->doc) {
+      //NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
+      return PyErr_Format(PyExc_RuntimeError,
+          "attribute '%s' already has a docstring", m->d_getset->name);
+    }
+    // This field is not const for python < 3.7 yet the content is
+    // never modified.
+    //NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+    m->d_getset->doc = const_cast<char *>(doc_str);
   } else {
     return PyErr_Format(PyExc_TypeError,
         "don't know how to add docstring to type '%s'", Py_TYPE(obj)->tp_name);
diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py
index 4f020df7b41ef..2bd6775e7347e 100644
--- a/torch/nn/modules/module.py
+++ b/torch/nn/modules/module.py
@@ -398,6 +398,16 @@ def register_backward_hook(self, hook):
             :class:`torch.utils.hooks.RemovableHandle`:
                 a handle that can be used to remove the added hook by calling
                 ``handle.remove()``
+
+        .. warning ::
+
+            The current implementation will not have the presented behavior
+            for complex :class:`Module` that perform many operations.
+            In some failure cases, :attr:`grad_input` and :attr:`grad_output` will only
+            contain the gradients for a subset of the inputs and outputs.
+            For such :class:`Module`, you should use :func:`torch.Tensor.register_hook`
+            directly on a specific input or output to get the required gradients.
+
         """
         handle = hooks.RemovableHandle(self._backward_hooks)
         self._backward_hooks[handle.id] = hook