# Neural Networks and Automatic Differentiation

revision: 78571a2

In [None]:
# @formatter:off
# PREAMBLE

import autograd.numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from autograd import grad
from autograd.misc.optimizers import adam
from numpy import testing
from sklearn import preprocessing

from mlis.arrays import asinput, aslabel

%matplotlib inline
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")
%load_ext autoreload
%autoreload 2
# @formatter:on

In [None]:
# load data
df = pd.read_csv('energy.csv', sep=' ')
X = asinput(df['temp'])
y = aslabel(df['energy'])
t = np.linspace(X.min(), X.max())

# plot data
fig, ax = plt.subplots()
sns.scatterplot(ax=ax, x='temp', y='energy', data=df)

Implement a vectorized version of the ReLU activation function $z \mapsto \max \{0, z\}$.

In [None]:
# Open activation.py and implement the function ReLu

from mlis.neuralnetwoks.activation import ReLu

z = np.array([-.1, 0, 1.23, -1])
testing.assert_array_almost_equal(ReLu(z), [0, 0, 1.23, 0], decimal=2)

In [None]:
def init(layer_sizes, scale, rng=np.random.RandomState(0)):
    """Build a list of randomly initialized (U, c) tuples, one for each layer."""
    params = []
    for insize, outsize in zip(layer_sizes[:-1], layer_sizes[1:]):
        U = rng.randn(insize, outsize) * scale
        c = rng.randn(outsize) * scale
        params.append((U, c))
    return params


params = init(layer_sizes=[1, 4, 1], scale=0.5)

for i, (U, c) in enumerate(params):
    print(f'U_{i}: {U}, c_{i}: {c}')

Implement the $k$-layer hypothesis function

\begin{align*}
z_1 &= x\\
z_{i+1} &= \sigma(U_i z_i + c_i), \quad i=1, \dotsc, k-1\\
h(x) &= U_k z_k + c_k
\end{align*}

where $\sigma$ is the activation function and the list `params` contains the tuples $(U_i, c_i)$ i.e. `[ (U_1, c_1), ..., (U_k, c_k) ]`.

In [None]:
# Open feedforward.py and implement the function predict
from mlis.neuralnetwoks.feedforward import predict

# test the correctness of the implementation
X = np.array([1, 2, 3])
params = init(layer_sizes=[1, 4, 1], scale=0.5, rng=np.random.RandomState(0))

for i, (U, c) in enumerate(params):
    print(f'U_{i}: {U}, c_{i}: {c}')

actual = predict(X, params)
expected = np.array([[1.12], [1.92], [2.75]])
testing.assert_array_almost_equal(actual, expected, decimal=2)

In [None]:
# fit scaler
scaler = preprocessing.StandardScaler().fit(asinput(df['temp']))
# load data
X = scaler.transform(asinput(df['temp']))
y = aslabel(df['energy'])

# Here is our initial guess:
params = init(scale=0.5, layer_sizes=[1, 4, 1], rng=np.random.RandomState(0))


# Objective function
def J(params, _):
    pred = predict(X, params)
    err = aslabel(y).reshape((-1, 1)) - pred
    return np.mean(err ** 2)


# print information during optimization
def callback(params, step, g):
    if step % 250 == 0:
        print(f"J(w): {J(params, step):1.5e}")

In [None]:
# good values are:
# num_iters = 200
# epochs = 20
# step_size in [0.1, 0.05]

In [None]:
# try different numbers iterations, epochs and step_sizes until the network learned a good approximation
num_iters = 1
epochs = 1

for step_size in [1]:
    print(f'step size: {step_size}')
    for epoch in range(epochs):
        print(f'\tepoch: {epoch}', end='\t')
        params = adam(grad(J), params, step_size=step_size, num_iters=num_iters, callback=callback)
        if J(params, None) < 2e-5:
            break

for i, (U, c) in enumerate(params):
    print(f'U_{i}: {U}, c_{i}: {c}')

t = np.linspace(df['temp'].min(), df['temp'].max())

# plot data
fig, ax = plt.subplots()
sns.scatterplot(ax=ax, x='temp', y='energy', data=df)
ax.plot(t, predict(scaler.transform(asinput(t)), params), 'r')

In [None]:
# try different numbers iterations, epochs and step_sizes until the network learned a good approximation
num_iters = 1
epochs = 1

for step_size in [1]:
    print(f'step size: {step_size}')
    for epoch in range(epochs):
        print(f'\tepoch: {epoch}', end='\t')
        params = adam(grad(J), params, step_size=step_size, num_iters=num_iters, callback=callback)
        if J(params, None) < 2e-5:
            break

for i, (U, c) in enumerate(params):
    print(f'U_{i}: {U}, c_{i}: {c}')

t = np.linspace(df['temp'].min(), df['temp'].max())

# plot data
fig, ax = plt.subplots()
sns.scatterplot(ax=ax, x='temp', y='energy', data=df)
ax.plot(t, predict(scaler.transform(asinput(t)), params), 'r')