In [1]:
# imports
import numpy as np
import matplotlib.pyplot as plt
import descent
from ipywidgets import interact

plt.style.use(['ocean', 'seaborn-notebook'])
%matplotlib inline

# Helper utilities

Demonstrations of some useful helper functions and utilities in `descent`:

## Numerical gradient checks

Checks the given (analytic) objective and gradient function with a numerical comparison:

$$ \frac{f(x + \delta x) - f(x)}{\delta x} \approx \frac{\partial f}{\partial x} $$

In [2]:
def f_df(x):
    objective = 0.5 * np.linalg.norm(x)**2
    gradient = x.copy()
    gradient[4] = 5.     # Error! incorrect gradient here
    return objective, gradient

In [3]:
x0 = np.random.randn(10)
descent.check_grad(f_df, x0, style='grid')

+---------------+---------------+---------------+
|   Numerical   |   Analytic    |     Error     |
+---------------+---------------+---------------+
|         1.1457|         1.1457|    [92m2.350e-10[0m [32m✓[0m|
|        -1.0136|        -1.0136|    [92m5.219e-10[0m [32m✓[0m|
|        -2.1064|        -2.1064|    [92m4.612e-11[0m [32m✓[0m|
|        0.48629|        0.48629|    [92m2.568e-10[0m [32m✓[0m|
|        -1.6413|              5|    [91m1.000e+00[0m [31m✗[0m|
|       -0.49855|       -0.49855|    [92m4.600e-10[0m [32m✓[0m|
|        0.88376|        0.88376|    [92m2.479e-10[0m [32m✓[0m|
|        0.72003|        0.72003|    [92m5.598e-10[0m [32m✓[0m|
|         1.5633|         1.5633|    [92m1.616e-10[0m [32m✓[0m|
|        0.80923|        0.80923|    [92m2.940e-10[0m [32m✓[0m|
+---------------+---------------+---------------+


## Function wrapping

In [4]:
A = np.random.randn(10,5)
def f_df(theta):
    objective = 0.5 * np.linalg.norm(A.dot(theta['w']) - theta['b']) ** 2
    gradient = dict()
    gradient['w'] = A.T.dot(A.dot(theta['w']) - theta['b'])
    gradient['b'] = theta['b'] - A.dot(theta['w'])
    return objective, gradient

In [5]:
theta_init = {'w': np.random.randn(5,), 'b': np.random.randn(10,)}

In [6]:
descent.check_grad(f_df, theta_init, style='grid')

+---------------+---------------+---------------+
|   Numerical   |   Analytic    |     Error     |
+---------------+---------------+---------------+
|        0.96965|        0.96965|    [92m8.457e-10[0m [32m✓[0m|
|        -2.2092|        -2.2092|    [92m1.942e-10[0m [32m✓[0m|
|          1.623|          1.623|    [92m5.464e-10[0m [32m✓[0m|
|       -0.85081|       -0.85081|    [92m1.743e-09[0m [32m✓[0m|
|        -3.4339|        -3.4339|    [92m3.638e-11[0m [32m✓[0m|
|       -0.36753|       -0.36753|    [92m4.385e-09[0m [32m✓[0m|
|         2.4502|         2.4502|    [92m1.517e-10[0m [32m✓[0m|
|        -1.0157|        -1.0157|    [92m2.843e-09[0m [32m✓[0m|
|        0.34199|        0.34199|    [92m3.445e-09[0m [32m✓[0m|
|            4.2|            4.2|    [92m2.059e-11[0m [32m✓[0m|
|        -2.7002|        -2.7002|    [92m2.279e-10[0m [32m✓[0m|
|        -4.2531|        -4.2531|    [92m1.093e-11[0m [32m✓[0m|
|        -21.915|        -21.915| 

In [13]:
opt = descent.sgd().minimize(f_df, theta_init, display=None, maxiter=1e4)
opt.x

{'b': array([ 0.35583133,  1.03033417, -0.97902932,  0.56691045,  1.65152562,
         0.4426982 ,  0.86465542,  1.75014944,  0.46537717,  0.81386008]),
 'w': array([ 0.47234534, -0.33726185, -0.27469297,  0.10492002, -0.91796569])}