In [1]:
import numpy as np

# Here will will evaluate the effect of the RBF by comparing with the multi layer perceptron of lab 1

In [2]:
## Support functions for the evaluation

def getTrainSet(func = 'sin2x', stepSize = 0.1, noise = True):
    ## Returns an 2 x N array of a training set
    # Row 0 = inputs and row 1 = targets¨
    # stepSize is taken as input, range is fixed to 0 --> 2pi. 
    N = int(np.floor(2 * np.pi/stepSize)) #Number of datapoints
    
    train = np.zeros((2,N))
    inputs = np.arange(0, N)
    np.random.shuffle(inputs)
    
    if (noise):
        noise = 1
    else: 
        noise = 0
    
    for step, i in enumerate(inputs):
        train[0, i] = step*stepSize # Input: will be for example 0, 0.1, 0.2 .. 2pi etc.. 
        if (func == 'sin2x'): 
            train[1, i] = np.sin(2*step*stepSize) + noise * np.random.normal(0, np.sqrt(0.1)) # Target: for example sin(2*0), sin(2*0.1) .. sin(2*2pi) etc..
        elif (func == 'step2x'): 
            train[1, i] = np.sign(np.sin(2*step*stepSize)) + noise * np.random.normal(0, np.sqrt(0.1)) # Target: for example sin(2*0), sin(2*0.1) .. sin(2*2pi) etc..

    return train.T

def getTestSet(func = 'sin2x', stepSize = 0.1):
    ## Returns an 2 x N array of a training set
    # Row 0 = inputs and row 1 = targets¨
    # stepSize is taken as input, range is fixed to 0 --> 2pi. 
    N = int(np.floor(2 * np.pi/stepSize)) #Number of datapoints
    
    test = np.zeros((2,N))

    for step in range(N):
        test[0, step] = step*stepSize+0.05 # Input: will be for example 0.05, 0.15, 0.25 .. 2pi´0.05 etc.. 
        if (func == 'sin2x'): 
            test[1, step] = np.sin(2*step*stepSize+0.05) # Target: for example sin(2*0), sin(2*0.1) .. sin(2*2pi) etc..
        elif (func == 'step2x'): 
            test[1, step] = np.sign(np.sin(2*step*stepSize+0.05)) # Target: for example sin(2*0), sin(2*0.1) .. sin(2*2pi) etc..

    return test.T

In [3]:
class RBF:
    def __init__(self, n = 12, variance = 0.1, maxinput = 6.28, learning_rate = 0.1):
        self.n = n # the number of nodes
        self.variance = 0.1 ## Same variance for all nodes
        # self.units = np.random.rand(1, n) * maxinput # random unit position in the input space
        # self.units = np.array([0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6]) # unit positions 
        
        self.units = np.arange(0, maxinput, maxinput/(n-1))
        self.units = np.append(self.units, 6.14)
        
        ## One node in each max and minimum
        # self.units = np.arange(0, maxinput, 3.14/2)
        # print("Units: ")
        # print(self.units)
        
        self.n = self.units.shape[0]
        self.w = np.random.rand(1,self.n).T
        self.lr = learning_rate
        # print("Initiated weights, shape: ")
        # print(self.w.shape)
    
    def error(self, f_approx, f):
        # the average error (with direction +/-)
        if not (f_approx.shape == f.shape): 
            raise Exception('f_approx and f shapes mismatch. f_approx: {}, f: {} '.format(f_approx.shape, f.shape))

        # error = (phi(x) - target)^2
        return np.average(f_approx - f)
    
    def res_error(self, f_approx, f):
        # the residual error (absolute)
        if not (f_approx.shape == f.shape): 
            raise Exception('f_approx and f shapes mismatch. f_approx: {}, f: {} '.format(f_approx.shape, f.shape))

        # error = (phi(x) - target)^2
        return np.average(np.abs(f_approx - f))            
    
    def predict(self, inp):
        # takes an input inp and returns predictions
        # do f^ = phi(x) * w.T
        x = inp.dot(np.ones((1, self.n))) # inp x 1 * 1 x n --> inp x n
        
        # print("n x 1:")
        # print(self.w.shape)
        
        f_approx = self.phi_matrix(x).dot(self.w)
        return f_approx
    
    def fit_lsq(self, inp, f):
        # x is the input. Shape: inputs x 1
        # f is the true value of the function (aka the target), same shape
        if not (inp.shape == f.shape): 
            raise Exception('inp and f shapes mismatch. inp: {}, f: {} '.format(inp.shape, f.shape))
    

        # We want x to be a matrix with shape: inputs x neurons
        x = inp.dot(np.ones((1, self.n))) # inp x 1 * 1 x n --> inp x n
        
        
        # get phi(x)
        self.phi_x = self.phi_matrix(x)

        # we obtain the w that minimizes the error by solving: 
        # phi(x).T * phi(x) * w = phi(x).T * f
        # --> w = (phi(x).T * phi(x))^-1 * phi(x).T * f        
        self.w = np.linalg.inv(self.phi_x.T.dot(self.phi_x)).dot(self.phi_x.T).dot(f)
        
    
    def fit_delta(self, inp, f):
        # x is the input. Shape: inputs x 1
        # f is the true value of the function (aka the target), same shape
        if not (inp.shape == f.shape): 
            raise Exception('inp and f shapes mismatch. inp: {}, f: {} '.format(inp.shape, f.shape))
        
        x = inp.dot(np.ones((1, self.n))) # inp x 1 * 1 x n --> inp x n
        
        # compute phi(x)
        self.phi_x = self.phi_matrix(x)
        
        # make a prediction
        f_approx = self.predict(inp)
        
        # compute the error
        e = self.error(f_approx, f)
        
        # find delta w
        self.dw = -1 * self.lr * e * self.phi_x.T
        
        #print("dw shape")
        #print(dw.shape)

        
        #update weights
        self.w += self.dw

    def phi(self, x, i): 
        return np.exp((-(np.linalg.norm(x-i))**2)/(2*np.square(self.variance)))
    
    
    def phi_matrix(self, x):
        # number of inputs (m) (rows) x self.n (n) (columns)
        # print("phi_matrix input size:" + str(x.shape))
        # this is a slow and stupid way of computing phi(x)
        for m in range(x.shape[0]): # m
            for n in range(x.shape[1]): # n
                res = self.phi(x = x[m, n], i = self.units[n])
        #        print("phi({}, {}): {}".format(x[row, col], self.units[0, row], res))
                x[m, n] = res

        return x

In [4]:
function = 'sin2x'

# Train model
trainset = getTrainSet(function, noise = False)

## Test Model
# Get test set
testset = getTestSet(function)

In [5]:


def batch(n, trainset, testset, variance = 0.1):
    model = RBF(n, variance = 0.1)

    
    # fit model
    model.fit_lsq(trainset[:, 0][np.newaxis,:].T, trainset[:, 1][np.newaxis,:].T)


    
    ## Test Model
    # Make predictions
    pred = model.predict(testset[:, 0][np.newaxis,:].T)
    # test error
    e = model.res_error(pred, testset[:, 1][np.newaxis,:].T)

    # print("Absolute residual error: {}".format(e))
    return round(e, 3)

def delta(n, trainset, testset, variance = 0.1, learning_rate = 0.1, epochs = 20):
    model = RBF(n, variance = variance, learning_rate = learning_rate)
    
    #fit model
    for epoch in range(epochs): 
        for tupl in trainset: 
            sample = np.array([tupl[0]])[np.newaxis,:].T
            target = np.array([tupl[1]])[np.newaxis,:].T
            model.fit_delta(sample, target)
        # Make predictions
        pred = model.predict(testset[:, 0][np.newaxis,:].T)
        # test error
        e = model.res_error(pred, testset[:, 1][np.newaxis,:].T)
        # if (epoch % 5 == 0):
            # print("Epoch {}, residual error: {}, dw: {}".format(epoch, e, np.average(model.dw)))
        # print("Epoch {}, residual error: {}, dw: {} ".format(epoch, e, np.average(model.dw)))
        return round(e, 2)
units = 60
width = 0.01
batch(units, trainset, testset, width)
delta(units, trainset, testset, width, 0.5, 20)

0.13

In [8]:
class NeuralNet():
    def __init__(self, inputs, no_nodes1, no_nodes2, eta, epochs):
        self.no_nodes_first_layer = no_nodes1
        self.no_nodes_second_layer = no_nodes2  
        self.no_input_nodes = inputs
        self.eta = eta
        self.epochs = epochs
        self.shape_y = 0
        self.number_of_layers = 2
        
        self.train_zero_losses = []
        self.train_ms_errors = []
        self.test_zero_losses = []
        self.test_ms_errors = []
        
        self.trained_weights = []
        
    def phi_function(self, x):
        return (2 / (1 + np.exp(-x))) - 1
    
    def phi_prime(self, x):
        return (1 + x)*(1 - x) / 2
    
    def init_weights(self, no_inputs, no_neurons):
        #np.random.seed(42)
        return np.random.normal(0, 1, size=(no_inputs + 1, no_neurons))
    
    def examine(self, x):
        w = self.trained_weights[0]
        v = self.trained_weights[1]
        hout,_ = self.forward_pass(v, w, x) 
        return hout[:,:3]
    
    def forward_pass(self, v, w, x):
        bias = np.ones([1, len(x)])
        x = np.column_stack([x, bias.T])
        hin = np.dot(x, w)
        hout = self.phi_function(hin)
        hout = np.column_stack([hout, bias.T])
        
        oin = np.dot(hout, v)
        oout = self.phi_function(oin)
        
        return hout, oout
    
    def backprop(self, v, t, oout, hout):
        t = t.reshape(t.shape[0],self.shape_y)        
        delta_o = np.multiply((oout - t), self.phi_prime(oout))  
        delta_h = np.dot(delta_o, v.T) * self.phi_prime(hout)
        delta_h = delta_h[:, :self.no_nodes_first_layer]
        return delta_h, delta_o
    
    def weight_update(self, x, hout, v, w, dv, dw, delta_h, delta_o):
        alpha = 0.9
        
        bias = np.ones(len(x))
        x = np.column_stack([x, bias.T])
      
        dw = (dw * alpha) - np.array(np.dot(x.T, delta_h)) * (1-alpha)
        dv = (dv * alpha) - np.array(np.dot(hout.T, delta_o)) * (1-alpha)
        
        w = w + dw * self.eta
        v = v + dv * self.eta 
        return (w, v, dw, dv)
      
    def test_accuracy(self, predictions, targets):
        # mean squared error
        mse = mean_squared_error(targets, predictions)
        
        # accuracy
        for i in range(len(predictions)):
            for j in range(len(predictions[i])):
                if predictions[i, j] >= 0:
                    predictions[i, j] = 1
                else:
                    predictions[i, j] = -1
        zero_one = zero_one_loss(targets, predictions, normalize=True)
        
        return mse, zero_one
        
    def batch_learn(self, x_train, y_train, x_test, y_test):
        self.shape_y = int(y_train.size/len(y_train))
        w = self.init_weights(self.no_input_nodes, self.no_nodes_first_layer)
        v = self.init_weights(self.no_nodes_first_layer, self.no_nodes_second_layer)
        dw = 0
        dv = 0
        
        for i in range(self.epochs):
            hout, oout = self.forward_pass(v, w, x_train)
            delta_h, delta_o = self.backprop(v, y_train, oout, hout)

            w, v, dw, dv = self.weight_update(x_train, hout, v, w, dv, dw, delta_h, delta_o)
            
            _, train_predictions = self.forward_pass(v, w, x_train)
            train_mse, train_zero_one = self.test_accuracy(train_predictions, y_train)
            
            _, test_predictions = self.forward_pass(v, w, x_test)
            test_mse, test_zero_one = self.test_accuracy(test_predictions, y_test)
            
            self.train_zero_losses.append(train_zero_one)
            self.train_ms_errors.append(train_mse)
            self.test_zero_losses.append(test_zero_one)
            self.test_ms_errors.append(test_mse)
        self.trained_weights.append(w)
        self.trained_weights.append(v)
            
    def seq_learn(self, x_train, y_train, x_test, y_test):
        w = self.init_weights(self.no_input_nodes, self.no_nodes_first_layer)
        v = self.init_weights(self.no_nodes_first_layer, self.no_nodes_second_layer)
        dw = 0
        dv = 0
        
        for i in range(self.epochs):
            for j in range(len(X_train)):
                x = X_train[j,:]
                x = np.expand_dims(x, axis=1).T
                y = y_train[j]
                
                hout, oout = self.forward_pass(v, w, x)
                delta_h, delta_o = self.backprop(v, y, oout, hout)

                w, v, dw, dv = self.weight_update(x, hout, v, w, dv, dw, delta_h, delta_o)

            _, train_predictions = self.forward_pass(v, w, x_train)
            train_mse, train_zero_one = self.test_accuracy(train_predictions, y_train)
            
            _, test_predictions = self.forward_pass(v, w, x_test)
            test_mse, test_zero_one = self.test_accuracy(test_predictions, y_test)
            
            self.train_zero_losses.append(train_zero_one)
            self.train_ms_errors.append(train_mse)
            self.test_zero_losses.append(test_zero_one)
            self.test_ms_errors.append(test_mse)
            
    def plot_errors(self):
        f,axarr = plt.subplots(2,sharex=True)
        axarr[0].plot(range(self.epochs),self.train_zero_losses, '-',label="train")
        axarr[0].plot(range(self.epochs),self.test_zero_losses, '-',label="test")
        axarr[0].set_title('Ratio of misclassifications')
        
        axarr[1].plot(range(self.epochs),self.train_ms_errors, '-',label="train")
        axarr[1].plot(range(self.epochs),self.test_ms_errors, '-',label="test")
        axarr[1].set_title('Mean squared error')
        
        plt.grid()
        plt.legend()
        plt.show()
        
    def final_errors(self):
        print("Final MSE:", self.test_ms_errors[-1])
        print("Final Zero-One Loss:", self.test_zero_losses[-1])
        
        





### Compare RBF with the MLP from lab 1




In [None]:
input_nodes = 8
first_hidden_layer_nodes = 3
second_hidden_layer_nodes = 8
eta = 0.01
epochs = 750

n = NeuralNet(input_nodes, first_hidden_layer_nodes, second_hidden_layer_nodes, eta, epochs)


n.batch_learn(X_auto_train, X_auto_train, X_auto_test, X_auto_test)
n.plot_errors()
n.final_errors()