In [None]:
import numpy as np
import sklearn
from sklearn.datasets import make_classification
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
% matplotlib inline

import sys
print(sys.version_info)
print(np.__version__)

In [None]:
def make_sin(n_samples=400, n_discr_points = 10):
    x = np.arange(start=0.0, stop=2.0*np.pi, step=2.0*np.pi/n_discr_points)
    omega = np.random.uniform(1.0, 4.0, (n_samples, x.shape[0]) )
    return np.sin(omega*x)

In [None]:
def make_sum(n_samples=600, n_discr_points=5):
    Y = np.random.randint(low=0,high=2,size=(n_samples,n_discr_points-1))
    Y = np.c_[Y, np.sum(Y, axis=1)]
    return Y

In [None]:
n_samples = 600
n_discr_points = 8
y = make_sum(n_samples, n_discr_points)
print (y.shape)

In [None]:
from sklearn.model_selection import train_test_split
y_train, y_test = train_test_split( y, test_size=0.33, random_state=42 )
print(y_train.shape)
print(y_test.shape)

In [None]:
n_hidden_neurons = [8,5]
n_hidden_layers = len(n_hidden_neurons)
nlayers = n_hidden_layers+2

In [None]:
unroll_factor = n_discr_points - 1

In [None]:
class MeanSquareError():
        
    def error(self, y, a):
        return (a-y)**2
    
    def derror(self,a, y):
        return 2*(a-y)

In [None]:
def sigmoid(z):
    return 1.0/(1.0 + np.exp(-z))

In [None]:
class InputLayer():
    def __init__(self, n=2):
        self.z = np.zeros(shape=(n,1))
        self.N = n
    def forward(self, x):
        self.z = np.array(x).reshape(self.N, 1)
        return x
    def last_activ(self):
        return self.z

In [None]:
class OutputLayer():
    def __init__(self, n=1, nprev=5, cost=MeanSquareError() ):
        self.N = n
        self.Nprev = nprev
        
        self.z = np.zeros(shape=(n,1))
        
        self.b = np.random.uniform(low=0., high=1., size=(n,1))
        self.bupdates = np.zeros_like(self.b)
        
        # W represents the matrix of weights from the PREVIOUS layer to THIS layer
        self.W = np.random.uniform(low=0., high=1., size=(n,nprev))
        self.Wupdates = np.zeros_like(self.W)
        
        self.cost_ = cost
        
    def forward(self, x, tt=0):
        self.z = (self.W @ x).reshape(self.N,1) + self.b
        return self.z
    
    def last_activ(self):
        return self.z
    
    def backward(self, y):
        return self.cost_.derror(self.z,y)
    
    def update(self):
        self.b -= self.bupdates
        self.W -= self.Wupdates
        self.bupdates = np.zeros_like(self.b)
        self.Wupdates = np.zeros_like(self.W)

In [None]:
class RecurrentSigmoidLayer():
    def __init__(self, n=5, nprev=5, unroll_fac=1):
        self.N = n
        self.Nprev = nprev
        self.unroll_ = unroll_fac
        
        self.z = np.zeros(shape=(n,1))
        self.activations = np.zeros(shape=(n, self.unroll_))
        
        self.b = np.random.uniform(low=0., high=1., size=(n,1))
        self.bupdates = np.zeros_like(self.b)
        
        # W represents the matrix of weights from the PREVIOUS layer to THIS layer
        self.W = np.random.uniform(low=0., high=1., size=(n,nprev))
        self.Wupdates = np.zeros_like(self.W)
        
        self.Whh = np.random.uniform(low=0., high=1., size=(n,n))
        self.Whhupdates = np.zeros_like(self.Whh)
        
    def forward(self, x, tt):
        if np.isscalar(x):
            self.z = np.dot(self.W, x).reshape(self.N,1) + np.dot(self.Whh, sigmoid(self.z)) + self.b
        else:
            self.z = (self.W @ x).reshape(self.N,1) + (self.Whh @ sigmoid(self.z)) + self.b               
   
        self.activations[:,tt] = sigmoid(self.z).reshape(self.N,)
        return sigmoid(self.z)
    
    def last_activ(self):
        return sigmoid(self.z)
    
    def backward(self, W, dLdz):
        '''Note: this W is NOT self.W, but the one from the downstream layer!'''
        return (W.T @ dLdz)*sigmoid(self.z)*(1. - sigmoid(self.z))
                
    
    def update(self):
        self.b -= self.bupdates
        self.W -= self.Wupdates
        self.Whh -= self.Whhupdates
        self.bupdates = np.zeros_like(self.b)
        self.Wupdates = np.zeros_like(self.W)        
        self.Whhupdates = np.zeros_like(self.Whh)

In [None]:
CostFunction = MeanSquareError()

In [None]:
layers = []
layers.append( InputLayer(1) )
nprev = 1
for n in n_hidden_neurons:
    layers.append(RecurrentSigmoidLayer(n, nprev, unroll_factor))
    nprev = n
layers.append(OutputLayer(1,nprev,CostFunction))

In [None]:
alpha = 0.005
n_epochs = 400

training_error = []
validation_error = []
    
for i in range(n_epochs):

    errors = []
    for idx, data_sample in enumerate(y_train):
        
        # feedforward
        for t in range(unroll_factor):
            a = layers[0].forward(data_sample[t])
            for i in range(1,nlayers):
                a = layers[i].forward(a,t)

        # now a holds the prediction for the next value
        # the true next value is data_sample[unroll_factor]
        errors.append(CostFunction.error(data_sample[unroll_factor], a))
    
        ###########################################
        # backprop through time
        
        # output layer: no time        
        error = layers[nlayers-1].backward(data_sample[unroll_factor])
        top_error = error        
        layers[nlayers-1].Wupdates = \
            alpha*np.outer(error, layers[nlayers-2].last_activ() )
        layers[nlayers-1].bupdates = alpha*error    
        
        # hidden layers: back through time
        # but this is the last instant, so don't compute Whh update
        for i in range(nlayers-2,1,-1):
            error = layers[i].backward(layers[i+1].W, error)
            layers[i].Wupdates += alpha*np.outer(error , layers[i-1].activations[:,-1])
            layers[i].bupdates += alpha*error
            
        # first hidden layer: back through time 
        # but this is the last instant, so don't compute Whh update
        error = layers[1].backward(layers[2].W, error)
        layers[1].Wupdates += alpha*np.outer(error , layers[0].last_activ())
        layers[1].bupdates += alpha*error

        
        for t in range(unroll_factor-2,-1,-1):
            
            error = top_error
       
            for i in range(nlayers-2,1,-1):
                error = layers[i].backward(layers[i+1].W, error)
                layers[i].Wupdates += \
                           alpha*np.outer(error , layers[i-1].activations[:,t])
                layers[i].Whhupdates += \
                           alpha*np.outer(error , layers[i].activations[:,t+1])
                layers[i].bupdates += alpha*error
                
            # first hidden layer: back through time            
            error = layers[1].backward(layers[2].W, error)
            layers[1].Wupdates += alpha*np.outer(error , layers[0].last_activ())
            layers[1].Whhupdates += \
                           alpha*np.outer(error , layers[1].activations[:,t+1])
            layers[1].bupdates += alpha*error

        for i in range(1,nlayers):
            layers[i].update()
            
    training_error.append(np.mean(errors))
    
    # validation
    errors = []
    for idx, data_sample in enumerate(y_test):
        # feedforward
        for t in range(unroll_factor):
            a = layers[0].forward(data_sample[t])
            for i in range(1,nlayers):
                a = layers[i].forward(a,t)
        errors.append(CostFunction.error(data_sample[unroll_factor], a[0]))
    validation_error.append(np.mean(errors))

In [None]:
plt.figure(figsize=(12,12))
plt.plot(range(n_epochs), training_error, '.-')
plt.plot(range(n_epochs), validation_error, '.-')
plt.legend(['training','validation'])

In [None]:
def inference(layers, data_sample):
    # feedforward
    for t in range(unroll_factor):
        a = layers[0].forward(data_sample[t])
        for i in range(1,nlayers):
            a = layers[i].forward(a,t)
    return a

In [None]:
for i in range(15):
    print(y_test[i,:])
    print(inference(layers,y_test[i,:]))