# Backpropagation
Hier wollen wir nun Backpropagation selbst implementieren... 

In [1]:
%matplotlib widget
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)

### Funktionen

In [2]:
def forward(inpX, w1, w2, b1, b2):
    '''
    Perform a forward step of the network. For the transfer function in the hidden layer, use tanh.
    
    Parameters
    ----------
    inpX : data matrix
        input matrix, shaped as: samples x dimensions
    w1 : matrix
        weight matrix between input and hidden neurons
    w2 : matrix
        weight matrix between hidden and output neurons
    b1 : vector
        bias vector for the hidden neurons
    b2 : vector
        bias vector for the output neurons
    '''
    u1 = np.matmul(inpX,w1) + b1                # u1.shape = (k,100) , b1.shape = (k,100)
    y1 = np.tanh(u1)                            # y1.shape = (k,100)
    y2 = np.matmul(y1,w2) + b2                  # y2.shape = (k,1)
    return u1,y1,y2          

def initialize_weights(inpDim, hiddenNeurons, outDim):
    '''
    Initialize the weight matrix based on input Dimension, amount of hidden neurons and output dimension.
    The range for the initial weights is given by [-.5; .5].
    
    Parameters
    ----------
    inpDim : int
        Number of input neurons
    hiddenNeurons : int
        Number of hidden neurons
    outDim : int
        Number of output neurons
    weights : list
        List containing the weights and biases in the following order: [w1, w2, b1, b2]
    '''
    
    w1 = np.random.random((inpDim,hiddenNeurons)) - 0.5
    w2 = np.random.random((hiddenNeurons,outDim)) - 0.5
    b1 = np.random.random((1,hiddenNeurons)) - 0.5
    b2 = np.random.random((1,outDim)) - 0.5
    return w1,w2,b1,b2

def prop_error(T, y2, w2, transDiff_u1):
    '''
    Calculation of the error of the network
    
    Parameters
    ----------
    T : float
        teaching signal of the current sample
    y2 : float
        output of the last neuron
    w2 : data matrix
        weight matrix between hidden and output layer
    transDiff_u1 : vector
        differential of the transfer function used on u1
    '''
    delta2 = T - y2
    delta1 = np.multiply(np.transpose(w2),transDiff_u1)*delta2
    return delta1, delta2     # delta1.shape = (1,100) , delta2.shape = (1,1)

def training(hiddenNeurons, lernRate, inpX, outT, epoch):
    '''
    Train the neural network. 
    
    Parameters
    ----------
    hiddenNeurons : int
        Number of hidden Neurons
    lernRate : float
        Lernrate \eta
    inpX : data matrix
        input data and shaped as: samples x dimensions 
    outT : vector
        teaching signal: one dimensional vector
    epoch : int
        number of training epochs
    '''
    
    w1,w2,b1,b2 = initialize_weights(2,hiddenNeurons,1)
    n = np.size(inpX,0)
    error = np.empty(epoch)
    results_this_epoch = np.empty(n)
    for i in range(epoch):
        for j in np.random.permutation(n):
            u1,y1,y2 = forward(inpX[j],w1,w2,b1,b2)
            transDiff_u1 = 1-np.square(np.tanh(u1))    # tanh'(x) = 1 - tanh^2(x)
            delta1,delta2 = prop_error(outT[j],y2,w2,transDiff_u1)
            w2 = w2 + lernRate*delta2*np.transpose(y1)
            w1[0] = w1[0] + lernRate*inpX[j][0]*delta1
            w1[1] = w1[1] + lernRate*inpX[j][1]*delta1            
            b2 = b2 + lernRate*delta2
            b1 = b1 + lernRate*delta1
        # berechne Fehler nach jeder Epoche
        _,_,y2 = forward(inpX,w1,w2,b1,b2)
        error[i] = np.sum(np.square(outT - np.matrix.flatten(y2)))  # E
    return w1,w2,b1,b2,error

### Initialiserung der Parameter.
- X und Y entsprechen dem Datensatz
- Z ist das Lehrersignal

In [3]:
############
## Generate some sample data
def f(x,y):
    return np.sin(np.sqrt(x**2 + y**2)) + np.cos(.9*(x-y))

X = np.linspace(-6, 6, 30)
Y = np.linspace(-6, 6, 30)
x, y = np.meshgrid(X, Y)
z = f(x, y)

############
## Initialize network parameter
hiddenNeuronen = 100
lernRate       = 0.01
epochen        = 1000

############
## prepare sample data, train network and prepare result data for plotts
inpX = np.column_stack((np.matrix.flatten(x),np.matrix.flatten(y)))
outT = np.matrix.flatten(z)
w1,w2,b1,b2,error = training(hiddenNeuronen,lernRate,inpX,outT,epochen)
_,_,y2 = forward(inpX,w1,w2,b1,b2)
approx = np.reshape(y2,(30,30)) # zum plotten der approximierten Funktion
abs_err = z - approx            # zum plotten des Absolutfehlers

### Visualisierung der Daten

In [4]:
fig = plt.figure()
ax1 = fig.add_subplot(221, projection='3d')
surf = ax1.plot_surface(x,y,z, cmap = 'plasma',
                       linewidth=0, antialiased=False)
ax1.set_title('Original')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_zlabel('Original');

ax2 = fig.add_subplot(222, projection='3d')
surf = ax2.plot_surface(x,y,approx, cmap = 'plasma',
                       linewidth=0, antialiased=False)
ax2.set_title('Approximation')
ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_zlabel('Approximation');

ax3 = fig.add_subplot(223, projection='3d')
surf = ax3.plot_surface(x,y,abs_err, cmap = 'plasma',
                       linewidth=0, antialiased=False)
ax3.set_title('Differenz')
ax3.set_xlabel('x')
ax3.set_ylabel('y')
ax3.set_zlabel('Differenz');

ax4 = fig.add_subplot(224)
ax4.plot(np.arange(epochen),error)
ax4.grid(True)
ax4.set_xticks([0,epochen/2,epochen])
ax4.set_title('Quadratischer Fehler')
ax4.set_xlabel('Epoche')
ax4.set_ylabel('Fehler');

fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)

FigureCanvasNbAgg()