In [6]:
import numpy as np

class FullyConnectedLayer:
    def __init__(self, input_size, layer_width, 
                 weights = False, biases = False, 
                 activation_type = 'linear'):
        if isinstance(weights, np.ndarray):
            #check if weights are the right size
            if (len(weights.shape) == 2) and (weights.shape[0] == input_size) and (weights.shape[1] == layer_width):
                self.weights = weights
            else:
                raise Exception('Weight dimensions look wrong')
        else:
            self.weights = np.random.rand(input_size, layer_width) #use rand to init
        
        self.activation_type = activation_type
        if self.activation_type == 'linear':
            self.activation_function = lambda x : x
        elif self.activation_type == 'relu':
            self.activation_function = lambda x : np.maximum(x, 0)
        elif self.activation_type == 'sigmoid':
            self.activation_function = lambda x : 1 / (1 + np.exp(-1 * x))
        elif self.activation_type == 'softmax':
            self.activation_function = lambda x : np.exp(x) / np.sum(np.exp(x), axis = 1)
        else:
            raise Exception('Invalid activation function')
        
        if isinstance(biases, np.ndarray):
            if (len(biases.shape) == 2) and (biases.shape[0] == 1) and (biases.shape[1] == layer_width):
                self.biases = biases
            else:
                raise Exception('Bias dimensions look wrong')
        else:
            self.biases = np.random.rand(1, layer_width)
            
    def forward_prop(self, input_data):
        layer_input = np.add(np.matmul(input_data, self.weights), self.biases)
        layer_output = self.activation_function(layer_input)
        return layer_output
        
    def back_prop_output(self, input_data, target, cost_function, learning_rate = 0.001, train = True):
        if cost_function == 'square_loss':
            cost_gradient = -2 * (target - self.forward_prop(input_data))
        elif (cost_function == 'cross_entropy') and (self.activation_type == 'softmax'):
            error = self.forward_prop(input_data) - target
        elif (cost_function == 'cross_entropy') and (self.activation_type == 'sigmoid'):
            error = self.forward_prop(input_data) - target
        else:
            raise Exception('Invalid cost function')
            
        if self.activation_type == 'linear':
            error = cost_gradient * 1
        elif self.activation_type == 'sigmoid':
            error = error
        elif self.activation_type == 'softmax':
            error = error
        elif self.activation_type == 'relu':
            relu_grad = map(lambda x : 1 if x > 0 else 0, self.forward_prop(input_data).flatten())
            error = np.multiply(cost_gradient, np.array([list(relu_grad)]))
        else:
            raise Exception('Invalid activation function')
            
        if not train:
            return np.matmul(error, self.weights.T)
        
        weights_update = [] #add updates by neuron, replace this with a single elementise mult
        for i in range(target.shape[1]):
            weights_update.append(np.multiply(error[0,i], input_data).T)
        weights_update = np.concatenate(weights_update, axis = 1)
        
        self.weights += -1 * learning_rate * weights_update
        
        self.biases += -1 * learning_rate * error
        
        return np.matmul(error, self.weights.T)
        
    def back_prop_hidden(self, input_data, next_layer_gradients, learning_rate = 0.001):
        if self.activation_type == 'linear':
            error = next_layer_gradients * 1
        elif self.activation_type == 'sigmoid':
            error = next_layer_gradients * self.forward_prop(input_data) * (1 - self.forward_prop(input_data))
        elif self.activation_type == 'relu':
            relu_grad = map(lambda x : 1 if x > 0 else -0.1, self.forward_prop(input_data).flatten())
            error = next_layer_gradients * np.array([list(relu_grad)])
        else:
            raise Exception('Invalid activation function')
            
        weights_update = []
        for i in range(error.shape[1]):
            weights_update.append(np.multiply(error[0,i], input_data).T)
        
        weights_update = np.concatenate(weights_update, axis = 1)
        
        self.weights += -1 * learning_rate * weights_update
        
        self.biases += -1 * learning_rate * error
        
        return np.matmul(error, self.weights.T)

In [29]:
input_layer = np.array([[1, 1]])

layer_1 = FullyConnectedLayer(2, 3, 
                              activation_type = 'relu'
                             )

output_layer = FullyConnectedLayer(3, 2,
                              activation_type = 'linear'
                             )

In [32]:
target = np.array([[-100, 100]])
print('target:')
print(target)
print(output_layer.forward_prop(layer_1.forward_prop(input_layer)))
for i in range(10):
    output_gradient = output_layer.back_prop_output(layer_1.forward_prop(input_layer), target, 'square_loss', 
                                                    learning_rate = 0.001, train = True)
    layer_1.back_prop_hidden(input_layer, output_gradient, learning_rate=0.001)

    print('weights:')
    print(output_layer.weights)
    print(layer_1.weights)
    print('preds:')
    print(output_layer.forward_prop(layer_1.forward_prop(input_layer)))

target:
[[-100  100]]
[[-99.99995287 100.00004762]]
weights:
[[-1.63932624  2.55927905]
 [-0.52741058  1.49169219]
 [-5.70839765  5.10540074]]
[[1.93412548 0.93371711 5.22570308]
 [2.13425639 0.81719704 4.92769156]]
preds:
[[-99.99997995 100.00002026]]
weights:
[[-1.63932648  2.55927881]
 [-0.5274107   1.49169208]
 [-5.70839827  5.10540012]]
[[1.93412544 0.93371707 5.2257031 ]
 [2.13425635 0.81719701 4.92769158]]
preds:
[[-99.99999147 100.00000862]]
weights:
[[-1.63932658  2.5592787 ]
 [-0.52741074  1.49169203]
 [-5.70839853  5.10539985]]
[[1.93412543 0.93371705 5.22570311]
 [2.13425633 0.81719699 4.92769159]]
preds:
[[-99.99999637 100.00000367]]
weights:
[[-1.63932662  2.55927866]
 [-0.52741076  1.49169201]
 [-5.70839864  5.10539974]]
[[1.93412542 0.93371705 5.22570311]
 [2.13425633 0.81719698 4.9276916 ]]
preds:
[[-99.99999846 100.00000156]]
weights:
[[-1.63932664  2.55927864]
 [-0.52741077  1.491692  ]
 [-5.70839869  5.10539969]]
[[1.93412542 0.93371704 5.22570311]
 [2.13425632 0.81

In [17]:
target = np.array([[69, -420]])
print(target)
print(output_layer.forward_prop(layer_1_output))
for i in range(10):
    _ = output_layer.back_prop_output(layer_1_output, target, learning_rate = 0.1)
    print(output_layer.forward_prop(layer_1_output))
    

[[  69 -420]]
[[ 0.84090483 -4.14991045]]
[[  88.14803729 -536.82539213]]
[[  63.62071304 -387.1800666 ]]
[[  70.51121119 -429.22015333]]
[[  68.57545316 -417.40976843]]
[[  69.11926858 -420.72767766]]
[[  68.9664937  -419.79557242]]
[[  69.00941298 -420.05743015]]
[[  68.9973556  -419.98386606]]
[[  69.0007429  -420.00453253]]
[[  68.9997913  -419.99872667]]
