# Illustration of functions approximated by a Neural Network

In this notebook we try to build some intuition for the nature of neural networks as “universal function approximators”. We will explore how the input get warped by a multilayer perceptron that has been initialized with random weights. We restrict ourselves to input and output that can be easily plotted.

## 1D Example

The simplest case is the $\mathbb{R} \rightarrow \mathbb{R}$ or in more familiar form $y = f(x)$. We will see how a fully connected neural network warps this input for $y = mx + c$. 

Check out how adding more layers or changing the nodes per layer heaps on more twisting and distorting.  

In [None]:
# Setup some imports and basic plotting parameters

import numpy
import torch
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

%matplotlib inline

plt.rcParams.update(
    { 'font.size': 14,
      'font.family': "sans-serif"
    }
)

In [None]:
def get_multilayer_perceptron(n_network, n_class, n_hidden_layers, n_hidden_dimension, activation='tanh'):
    """Create a multi-layer perceptron
    
    Parameters
    ----------
    n_network: int
        
    n_class: int
    
    n_hidden_layers: int
        The number of hidden layers in the perceptron
        
    n_hidden_dimension: int
        The number of nodes in each of the hidden layers
        
    activation: str
        Activation function. One of `tanh` or `relu`
        
    Returns
    -------
    model = torch.nn
        A pytorch neural network
    """
    if activation == 'tanh':
        activationfn = torch.nn.Tanh()
    elif activation == 'relu':
        activationfn = torch.nn.ReLU()
    else:
        raise NotImplementedError('Activation function not implemented')
        
    model = torch.nn.Sequential(
        torch.nn.Linear(n_network, n_hidden_dimension),
        activationfn)
    
    for i in range(n_hidden_layers - 1):
        model = torch.nn.Sequential(
            model,
            torch.nn.Linear(n_hidden_dimension, n_hidden_dimension),
            activationfn)

    model = torch.nn.Sequential(
        model, torch.nn.Linear(n_hidden_dimension, n_class))
    
    return model


def init_weights(m):
    """Initialize the weights of a neural network
    
    This function initializes the parameters of the neural network from 
    a Normal distribution with 0 mean and 0.5 standard deviation. Both the
    weights and biases are initialized using this distribution.
    
    Parameters
    ----------
    m: torch.nn
        The neural network whose parameters are being initialized
    """
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.normal_(m.weight, 0, 0.5)
        m.bias.data.normal_(0, 0.5)




### Define the input

In [None]:
N = 400
d = 1.0
x = torch.linspace(-d, d, N)

# Setup a line - x = y example
y = x.clone()

### Effect of activation function

In [None]:
# Setup the neural network parameters
num_layers = 6
n_hidden_dimension = 32
activation_function = 'tanh' # 'relu'

# Setup the plotting
fig = plt.figure(figsize=(16.0,7.0))
gs = gridspec.GridSpec(1,2)
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[0,1])
ax1.plot(x,y, color='k', ls='--')
ax2.plot(x,y, color='k', ls='--')
fig.suptitle('Random neural networks as function approximators')

# Loop over different activation functions and random initializations
# to generate outputs
nruns = 5
colors = plt.cm.viridis(numpy.linspace(0,1,nruns))
for ax, afn in zip([ax1, ax2],['tanh','relu']):
    ax.set_title(f'Using activation function : {afn}')
    net = get_multilayer_perceptron(1,1,num_layers,n_hidden_dimension,afn)
    for run in range(nruns):
        net.apply(init_weights)
        y_new = []    
        for i in x:
            i = torch.Tensor([i])
            y_new.append(torch.squeeze(net(i)).detach().numpy())
    
        y_new = numpy.array(y_new).flatten()
        ax.plot(x,y_new, color=colors[run], lw=2, alpha=0.5)


### Effect of the number of layers

In [None]:
fig = plt.figure(figsize=(16.0,7.0))
gs = gridspec.GridSpec(1,2)
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[0,1])

ax1.plot(x,y, color='k', ls='--')
ax2.plot(x,y, color='k', ls='--')

nruns = 10
colors = plt.cm.Spectral(numpy.linspace(0,1,nruns))
for ax, num_layers in zip([ax1, ax2],[3,7]):
    ax.set_title(f'Number of layers : {num_layers}')
    net = get_multilayer_perceptron(1,1,num_layers,n_hidden_dimension,'tanh')
    for run in range(nruns):
        net.apply(init_weights)
        y_new = []    
        for i in x:
            i = torch.Tensor([i])
            y_new.append(torch.squeeze(net(i)).detach().numpy())
    
        y_new = numpy.array(y_new).flatten()
        ax.plot(x,y_new, color=colors[run],alpha=0.5)

## 2D Example

The second example is mapping a 2D input to a 2D output, i.e., $\mathbb{R}^{2} \rightarrow \mathbb{R}^{2}$. The visualization below shows the warping of the area $[-1, 1]^2$ after being fed through a multilayer perceptron initialized using the same method as the 1D Example above.

In [None]:
N = 100
d = 1.0
x = torch.linspace(-d, d, N)
y = torch.linspace(-d, d, N)

xx = torch.matmul(torch.ones([N, 1]), x.reshape([1, N]))
yy = torch.matmul(y.reshape([N, 1]), torch.ones([1, N]));
xx = torch.reshape(xx, [-1]);
yy = torch.reshape(yy, [-1]);

net3 = get_multilayer_perceptron(2,1,3,n_hidden_dimension,'tanh')
net7 = get_multilayer_perceptron(2,1,7,n_hidden_dimension,'tanh')

net3.apply(init_weights)
net7.apply(init_weights)

zi3 = []
zi7 = []
for xi, yi in zip(xx, yy):
    i = torch.Tensor([xi, yi])
    zi3.append(torch.squeeze(net3(i)).detach().numpy())
    zi7.append(torch.squeeze(net7(i)).detach().numpy())

print(xx.shape, yy.shape)
zi3 = numpy.array(zi3).flatten()
zi7 = numpy.array(zi7).flatten()

fig = plt.figure(figsize=(16.0,6.0))
gs = gridspec.GridSpec(1,2)
ax1 = fig.add_subplot(gs[0,0], projection='3d')
ax2 = fig.add_subplot(gs[0,1], projection='3d')

ax1.plot_trisurf(xx, yy, zi3, cmap='viridis', edgecolor='none')
ax2.plot_trisurf(xx, yy, zi7, cmap='viridis', edgecolor='none')
ax1.set_title(f'Number of layers: 3')
ax2.set_title(f'Number of layers: 7')

ax1.set_axis_off()
ax2.set_axis_off()
fig.tight_layout()

In [None]:
fig = plt.figure(figsize=(16.0,6.0))
gs = gridspec.GridSpec(1,2)
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[0,1])
ax1.tripcolor(xx, yy, zi3, cmap = plt.get_cmap('YlGnBu_r'))
ax2.tripcolor(xx, yy, zi7, cmap = plt.get_cmap('YlGnBu_r'))