# How gradient descent works

In [None]:
from ipywidgets import interact
from fastai.basics import *

In [None]:
# Figure parameters
plt.rc('figure', dpi=60)

def plot_function(f, title=None, min=-2.1, max=2.1, color='r', ylim=None):
    x = torch.linspace(min,max, 100)[:,None]
    if ylim: plt.ylim(ylim)
    plt.plot(x, f(x), color)
    if title is not None: plt.title(title)

## Quadratic function example

### Function definition

In [None]:
def f(x): return 3*x**2 + 2*x + 1

plot_function(f, "$3x^2 + 2x + 1$")

In [None]:
def quad(a, b, c, x): return a*x**2 + b*x + c
def mk_quad(a,b,c): return partial(quad, a,b,c)


In [None]:
plot_function(mk_quad(3,2,1))

In [None]:
def noise(x, scale): return torch.from_numpy(
    np.random.normal(scale=scale, size=list(x.shape))
)
def add_noise(x, mult, add): return x * (1+noise(x,mult)) + noise(x,add)

In [None]:
np.random.seed(42)

x = torch.linspace(-2, 2, steps=20)[:,None]
y = add_noise(f(x), 0.15, 1.5)

In [None]:
plt.scatter(x, y);

In [None]:
# rq : le interact ne marche pas sur onyxia
@interact(a=1.1, b=1.1, c=1.1) 
def plot_quad(a, b, c):
    plt.scatter(x,y)
    plot_function(mk_quad(a,b,c), ylim=(-3,13)) 

### Definition de la perte

In [None]:
def mae(preds, acts): return (torch.abs(preds-acts)).mean()

In [None]:
float(mae(y, f(x)))

In [None]:
@interact(a=1.1, b=1.1, c=1.1)
def plot_quad(a, b, c):
    f = mk_quad(a,b,c)
    plt.scatter(x,y)
    loss = mae(f(x), y)
    plot_function(f, ylim=(-3,12), title=f"MAE: {loss:.2f}")

### Automatic gradient descent

In [None]:
def quad_mae(params):
    f = mk_quad(*params)
    return mae(f(x), y)

In [None]:
quad_mae((1.1, 1.1, 1.1))

In [None]:
param = torch.tensor((1.1, 1.1, 1.1), requires_grad = True)
print(f'param : {param}')
print(f'param gradient : {param.grad}')

In [None]:
loss = quad_mae(param)
print(f'loss : {loss:.3}')
print(f'param : {param}')
print(f'param gradient : {param.grad}')

In [None]:
loss.backward()
print(f'loss : {loss: .3}')
print(f'param : {param}')
print(f'param gradient : {param.grad}')

In [None]:
with torch.no_grad():
    param -= param.grad*0.01
    loss = quad_mae(param)
    
print(f'loss={loss:.2f}')
print(f'param : {param}')
print(f'param gradient : {param.grad}')

In [None]:
for i in range(13) : 
    loss = quad_mae(param)
    loss.backward()
    print(f'step {i}, loss={loss:.2f}')
    with torch.no_grad() : param -= param.grad*0.01

In [None]:
param = param.detach().numpy()
print(f'param : {param}')

f = mk_quad(*param)
plt.scatter(x,y)
loss = mae(f(x), y)
plot_function(f, ylim=(-3,12), title=f"MAE: {loss:.2f}")

## Neural Net function

In [None]:
def rectified_linear(m,b,x):
    y = m*x+b
    return torch.clip(y, 0.)

plot_function(partial(rectified_linear, 1, 1))

In [None]:
def double_relu(m1,b1,m2,b2,x):
    return rectified_linear(m1,b1,x) + rectified_linear(m2,b2,x)

def mk_double_relu(m1,b1,m2,b2) : 
    return partial(double_relu, m1,b1,m2,b2)

plot_function(partial(double_relu, -1, 0, 1, 2))

In [None]:
def rn_mae(params):
    f = mk_double_relu(*params)
    return mae(f(x), y)

In [None]:
rn_params = torch.tensor((-1.0, 0.0, 1.0, 2.0), requires_grad = True)

In [None]:
for i in range(50) : 
    loss = rn_mae(rn_params)
    loss.backward()
    if i%5 == 0 : print(f'step {i}, loss={loss:.2f}')
    with torch.no_grad() : rn_params -= rn_params.grad*0.01

In [None]:
params = rn_params.detach().numpy()
print(f'param : {params}')

f = mk_double_relu(*params)
plt.scatter(x,y)
loss = mae(f(x), y)
plot_function(f, ylim=(-3,12), title=f"MAE: {loss:.2f}")