In [2]:
# imports
import numpy as np
import matplotlib.pyplot as plt
import descent
try:
    from ipywidgets import interact
except ImportError:
    from IPython.html.widgets import interact

plt.style.use(['ocean', 'seaborn-notebook'])
%matplotlib inline

# Helper utilities

Demonstrations of some useful helper functions and utilities in `descent`:

## Numerical gradient checks

Checks the given (analytic) objective and gradient function with a numerical comparison:

$$ \frac{f(x + \delta x) - f(x)}{\delta x} \approx \frac{\partial f}{\partial x} $$

In [3]:
def f_df(x):
    objective = 0.5 * np.linalg.norm(x)**2
    gradient = x.copy()
    gradient[4] = 5.     # Error! incorrect gradient here
    return objective, gradient

In [4]:
x0 = np.random.randn(10)
descent.check_grad(f_df, x0)

------------------------------------
Numerical  | Analytic   | Error          
------------------------------------
0.1871     | 0.1871     | 0.000000 | [32m✔[0m
-0.8438    | -0.8438    | 0.000000 | [32m✔[0m
-0.5080    | -0.5080    | 0.000000 | [32m✔[0m
-0.8242    | -0.8242    | 0.000000 | [32m✔[0m
-0.4985    | 5.0000     | 1.000000 | [31m✗[0m
-0.5464    | -0.5464    | 0.000000 | [32m✔[0m
2.1707     | 2.1707     | 0.000000 | [32m✔[0m
1.7297     | 1.7297     | 0.000000 | [32m✔[0m
-0.7656    | -0.7656    | 0.000000 | [32m✔[0m
1.3949     | 1.3949     | 0.000000 | [32m✔[0m


## Function wrapping

In [5]:
A = np.random.randn(10,5)
def f_df(theta):
    objective = 0.5 * np.linalg.norm(A.dot(theta['w']) - theta['b']) ** 2
    gradient = dict()
    gradient['w'] = A.T.dot(A.dot(theta['w']) - theta['b'])
    gradient['b'] = theta['b'] - A.dot(theta['w'])
    return objective, gradient

In [6]:
theta_init = {'w': np.random.randn(5,), 'b': np.random.randn(10,)}

In [7]:
descent.check_grad(f_df, theta_init)

------------------------------------
Numerical  | Analytic   | Error          
------------------------------------
-2.0024    | -2.0024    | 0.000000 | [32m✔[0m
2.3995     | 2.3995     | 0.000000 | [32m✔[0m
1.0175     | 1.0175     | 0.000000 | [32m✔[0m
-0.1162    | -0.1162    | 0.000000 | [32m✔[0m
2.9358     | 2.9358     | 0.000000 | [32m✔[0m
-6.9781    | -6.9781    | 0.000000 | [32m✔[0m
-4.2751    | -4.2751    | 0.000000 | [32m✔[0m
0.5536     | 0.5536     | 0.000000 | [32m✔[0m
-0.5720    | -0.5720    | 0.000000 | [32m✔[0m
-2.4563    | -2.4563    | 0.000000 | [32m✔[0m
-35.5516   | -35.5516   | 0.000000 | [32m✔[0m
-11.1993   | -11.1993   | 0.000000 | [32m✔[0m
4.2058     | 4.2058     | 0.000000 | [32m✔[0m
-0.3671    | -0.3671    | 0.000000 | [32m✔[0m
3.8574     | 3.8574     | 0.000000 | [32m✔[0m


In [12]:
opt = descent.GradientDescent(theta_init, f_df, 'sgd', {})
opt.run(maxiter=1e4)
opt.theta

{'b': array([-0.79429533,  0.34228628, -0.41107495,  0.17159443,  0.57893234,
        -0.45601154, -1.55100329,  0.33428903,  0.4542238 ,  0.05294524]),
 'w': array([ 0.14644834,  0.49675253, -0.47772738,  0.64859601, -0.0684283 ])}

## Interrupts

In [14]:
opt = descent.GradientDescent(theta_init, f_df, 'sgd', {})
opt.run(maxiter=1e5)

In [15]:
opt.theta

{'b': array([-0.79430186,  0.34229873, -0.41112776,  0.17158984,  0.57892172,
        -0.45601113, -1.55099055,  0.33429401,  0.45421084,  0.05294425]),
 'w': array([ 0.14644834,  0.49675253, -0.47772738,  0.64859601, -0.0684283 ])}