#### Gradient descent:
<br>

$f(x) \longrightarrow  \displaystyle \min_{x} $

$x^{t+1} = x^t-\alpha{f'(x^t)}$

$f(x^{t+1}) = f(x^t-\alpha{f'(x^t)})$

In [1]:
import torch

In [2]:
device = torch.device('cuda:0' 
                      if torch.cuda.is_available() 
                      else 'cpu')
device

device(type='cpu')

In [3]:
x = torch.tensor([1.], requires_grad=True)

#######
x = x.to(device)
#######

function = 10 * (x ** 2).sum()
function.backward()
print(x.grad, '<- gradient')

tensor([20.]) <- gradient


In [4]:
x = torch.tensor(list(range(1,13)), requires_grad=True, dtype=float)
x = x.to(device)

function = (x.log()).sum()
function.backward()
print(x)
print(x.grad, '<- gradient')

tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.],
       dtype=torch.float64, requires_grad=True)
tensor([1.0000, 0.5000, 0.3333, 0.2500, 0.2000, 0.1667, 0.1429, 0.1250, 0.1111,
        0.1000, 0.0909, 0.0833], dtype=torch.float64) <- gradient


In [5]:
x = torch.tensor(
    [[1.,  2.,  3.,  4.],
     [5.,  6.,  7.,  8.],
     [9., 10., 11., 12.]], requires_grad=True)

In [6]:
function = (x ** 2).sum()
function.backward()

In [7]:
x, function

(tensor([[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]], requires_grad=True),
 tensor(650., grad_fn=<SumBackward0>))

In [8]:
x.grad

tensor([[ 2.,  4.,  6.,  8.],
        [10., 12., 14., 16.],
        [18., 20., 22., 24.]])

In [9]:
x

tensor([[ 1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.]], requires_grad=True)

In [10]:
x.data -= 0.001 * x.grad

In [11]:
x.grad.zero_()

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [12]:
import numpy as np

In [13]:
x.data.numpy()

array([[ 0.998,  1.996,  2.994,  3.992],
       [ 4.99 ,  5.988,  6.986,  7.984],
       [ 8.982,  9.98 , 10.978, 11.976]], dtype=float32)

In [14]:
float(function.data.numpy())

650.0

In [15]:
np.asarray(x.data).reshape((1,-1))

array([[ 0.998,  1.996,  2.994,  3.992,  4.99 ,  5.988,  6.986,  7.984,
         8.982,  9.98 , 10.978, 11.976]], dtype=float32)

In [16]:
x = torch.tensor([10.], requires_grad=True)
for step in range(100):
    fun = ((x - 2)**2).sum()
    fun.backward()
    x.data -= 0.1*x.grad
    x.grad.zero_()
    if step % 10 == 0:
        print('x = {} | y = {} | step = {}'.format(float(x.data.numpy()), float(fun.data.numpy()), step))

x = 8.399999618530273 | y = 64.0 | step = 0
x = 2.687194585800171 | y = 0.7378695011138916 | step = 10
x = 2.073786973953247 | y = 0.008507047779858112 | step = 20
x = 2.007922649383545 | y = 9.807794413063675e-05 | step = 30
x = 2.0008506774902344 | y = 1.1307065506116487e-06 | step = 40
x = 2.000091075897217 | y = 1.2987811714992858e-08 | step = 50
x = 2.0000100135803223 | y = 1.5370460459962487e-10 | step = 60
x = 2.0000011920928955 | y = 2.0463630789890885e-12 | step = 70
x = 2.000000476837158 | y = 2.2737367544323206e-13 | step = 80
x = 2.000000476837158 | y = 2.2737367544323206e-13 | step = 90


In [18]:
import torch

x = torch.tensor([10.], requires_grad=True)
optimizer = torch.optim.SGD([x], lr=0.1)

def func(variable):
    return ((variable - 3) ** 2).sum()

def grad_step(function, variable):
    optimizer.zero_grad()
    function_result = function(variable)
    function_result.backward()
    optimizer.step()

for step in range(100):
    if step % 10 == 0:
        print('x = {} | y = {} | step = {}'.format(float(x.data.numpy()), 
                                                       float(func(x).data.numpy()), step))
#     print(float(x.data.numpy()), float(func(x).data.numpy()))
    grad_step(func, x)

x = 10.0 | y = 49.0 | step = 0
x = 3.751619338989258 | y = 0.5649316310882568 | step = 10
x = 3.0807044506073 | y = 0.0065132081508636475 | step = 20
x = 3.0086655616760254 | y = 7.50919571146369e-05 | step = 30
x = 3.0009303092956543 | y = 8.65475385580794e-07 | step = 40
x = 3.0000996589660645 | y = 9.93190951703582e-09 | step = 50
x = 3.0000107288360596 | y = 1.1510792319313623e-10 | step = 60
x = 3.0000011920928955 | y = 1.4210854715202004e-12 | step = 70
x = 3.000000476837158 | y = 2.2737367544323206e-13 | step = 80
x = 3.000000476837158 | y = 2.2737367544323206e-13 | step = 90
