In [139]:
import numpy as np

class ConvConnectedLayer:
    def __init__(self, filter_count, kernel_shape, stride = 1,
                 #weights = False, biases = False, 
                 activation_type = 'relu'):
        
        self.kernel_shape = kernel_shape
        self.stride = stride
        self.weights = np.random.rand(kernel_shape[0], 
                                      kernel_shape[1]) #use rand to init, use 3rd dim for filter count?
        
        self.activation_type = activation_type
        if self.activation_type == 'linear':
            self.activation_function = lambda x : x
        elif self.activation_type == 'relu':
            self.activation_function = lambda x : np.maximum(x, 0)
        elif self.activation_type == 'sigmoid':
            self.activation_function = lambda x : 1 / (1 + np.exp(-1 * x))
        elif self.activation_type == 'softmax':
            self.activation_function = lambda x : np.exp(x) / np.sum(np.exp(x), axis = 1)
        else:
            raise Exception('Invalid activation function')
        
        self.biases = np.random.rand(1, filter_count)
            
    def convolve(self, region):
        weighted_output = np.sum(np.multiply(region, self.weights)) + self.biases
        return weighted_output[0][0]
    
    def forward_prop(self, input_data, padding = 'same', activation = True):
        #padding for stride 1
        orig_shape = input_data.shape
        if (padding == 'same'):
            #pad right edge
            input_data = np.concatenate([input_data, np.zeros((input_data.shape[0], self.kernel_shape[1] - 1))], 
                                        axis = 1)
            #pad bottom edge
            input_data = np.concatenate([input_data, np.zeros((self.kernel_shape[0] - 1, input_data.shape[1]))], 
                                        axis = 0)
        elif padding != 'none':
            raise Exception('Padding not supported')
            
    
        output = []
        for i in range(0, orig_shape[0], self.stride):
            row = []
            for j in range(0, orig_shape[1], self.stride):
                row.append(self.convolve(region = input_data[i : i + self.kernel_shape[0],
                                                             j : j + self.kernel_shape[1]]
                                        )
                          )
            output.append(row)
            
        if activation:
            return self.activation_function(np.array(output))
        else:
            return np.array(output)
    
    def back_prop_output(self, input_data, cost_grad, learning_rate = 0.001, padding = 'same'):
        if self.activation_type == 'linear':
            activation_grad = 1
        elif self.activation_type == 'relu':
            relu_grad = map(lambda x : 1 if x > 0 else -0.1, 
                            self.forward_prop(input_layer, activation = False).flatten())
            
            activation_grad = np.array([list(relu_grad)])
        return activation_grad
        #padding for stride 1
        orig_shape = input_data.shape
        if (padding == 'same'):
            #pad right edge
            input_data = np.concatenate([input_data, np.zeros((input_data.shape[0], self.kernel_shape[1] - 1))], 
                                        axis = 1)
            #pad bottom edge
            input_data = np.concatenate([input_data, np.zeros((self.kernel_shape[0] - 1, input_data.shape[1]))], 
                                        axis = 0)
            
            #pad right edge
            activation_grad = np.concatenate([activation_grad, 
                                              np.zeros((activation_grad.shape[0], self.kernel_shape[1] - 1))], 
                                        axis = 1)
            #pad bottom edge
            activation_grad = np.concatenate([activation_grad, 
                                              np.zeros((self.kernel_shape[0] - 1, activation_grad.shape[1]))], 
                                        axis = 0)
        elif padding != 'none':
            raise Exception('Padding not supported')
            
    
        weights_update = np.zeros(self.weights.shape)
        for i in range(0, orig_shape[0], self.stride):
            for j in range(0, orig_shape[1], self.stride):
                region = input_data[i : i + self.kernel_shape[0],
                                    j : j + self.kernel_shape[1]]
                activation_grad_region = activation_grad[i : i + self.kernel_shape[0],
                                                         j : j + self.kernel_shape[1]]
                weights_update += np.multiply(region, activation_grad_region)
        weights_update /= 16
        weights_update *= cost_grad

        self.weights += -1 * learning_rate * weights_update
        self.biases += -1 * learning_rate * cost_grad

In [140]:
input_layer = np.array([[1, 1, 0, 0], 
                        [1, 1, 0, 0], 
                        [0, 0, 1, 1], 
                        [0, 0, 1, 1],
                       ])

layer_1 = ConvConnectedLayer(filter_count = 1, kernel_shape = [2, 2], stride = 1,
                              activation_type = 'relu'
                             )

# layer_2 = ConvConnectedLayer(filter_count = 1, kernel_shape = [2, 2], stride = 1,
#                               activation_type = 'relu'
#                              )
np.mean(layer_1.forward_prop(input_layer))

1.1426177437461167

In [None]:
layer_2.back_prop_output

In [141]:
target = np.array([[-1]])
for i in range(10):
    cost_grad = -2 * (target - np.mean(layer_1.forward_prop(input_layer)))
    layer_1.back_prop_output(input_layer, cost_grad, learning_rate = 0.5)
    print(np.mean(layer_1.forward_prop(input_layer)))

1.1426177437461167
1.1426177437461167
1.1426177437461167
1.1426177437461167
1.1426177437461167
1.1426177437461167
1.1426177437461167
1.1426177437461167
1.1426177437461167
1.1426177437461167


In [9]:
layer_1.forward_prop(input_layer)

array([[2.30721707, 0.74735136],
       [0.74735136, 2.30721707]])

In [12]:
np.mean(layer_1.forward_prop(input_layer))

1.9682848719270942

In [47]:
layer_1.weights

array([[0.05267091, 0.34828919],
       [0.04212482, 0.20214394]])

In [48]:
layer_1.biases

array([0.66860029])

In [53]:
layer_1.forward_prop(input_layer, stride = 1).shape

(4, 4, 1)

In [54]:
layer_1.forward_prop(input_layer, stride = 1)

array([[[0.91687232],
        [0.91687232],
        [0.91687232],
        [0.91687232]],

       [[0.91687232],
        [0.91687232],
        [0.91687232],
        [0.91687232]],

       [[0.91687232],
        [0.91687232],
        [0.91687232],
        [0.91687232]],

       [[0.91687232],
        [0.91687232],
        [0.91687232],
        [0.91687232]]])

In [18]:
0.92360386 + 0.17977975

1.10338361

In [29]:
output_layer = FullyConnectedLayer(3, 2,
                              activation_type = 'linear'
                             )

In [32]:
target = np.array([[-100, 100]])
print('target:')
print(target)
print(output_layer.forward_prop(layer_1.forward_prop(input_layer)))
for i in range(10):
    output_gradient = output_layer.back_prop_output(layer_1.forward_prop(input_layer), target, 'square_loss', 
                                                    learning_rate = 0.001, train = True)
    layer_1.back_prop_hidden(input_layer, output_gradient, learning_rate=0.001)

    print('weights:')
    print(output_layer.weights)
    print(layer_1.weights)
    print('preds:')
    print(output_layer.forward_prop(layer_1.forward_prop(input_layer)))

target:
[[-100  100]]
[[-99.99995287 100.00004762]]
weights:
[[-1.63932624  2.55927905]
 [-0.52741058  1.49169219]
 [-5.70839765  5.10540074]]
[[1.93412548 0.93371711 5.22570308]
 [2.13425639 0.81719704 4.92769156]]
preds:
[[-99.99997995 100.00002026]]
weights:
[[-1.63932648  2.55927881]
 [-0.5274107   1.49169208]
 [-5.70839827  5.10540012]]
[[1.93412544 0.93371707 5.2257031 ]
 [2.13425635 0.81719701 4.92769158]]
preds:
[[-99.99999147 100.00000862]]
weights:
[[-1.63932658  2.5592787 ]
 [-0.52741074  1.49169203]
 [-5.70839853  5.10539985]]
[[1.93412543 0.93371705 5.22570311]
 [2.13425633 0.81719699 4.92769159]]
preds:
[[-99.99999637 100.00000367]]
weights:
[[-1.63932662  2.55927866]
 [-0.52741076  1.49169201]
 [-5.70839864  5.10539974]]
[[1.93412542 0.93371705 5.22570311]
 [2.13425633 0.81719698 4.9276916 ]]
preds:
[[-99.99999846 100.00000156]]
weights:
[[-1.63932664  2.55927864]
 [-0.52741077  1.491692  ]
 [-5.70839869  5.10539969]]
[[1.93412542 0.93371704 5.22570311]
 [2.13425632 0.81

In [17]:
target = np.array([[69, -420]])
print(target)
print(output_layer.forward_prop(layer_1_output))
for i in range(10):
    _ = output_layer.back_prop_output(layer_1_output, target, learning_rate = 0.1)
    print(output_layer.forward_prop(layer_1_output))
    

[[  69 -420]]
[[ 0.84090483 -4.14991045]]
[[  88.14803729 -536.82539213]]
[[  63.62071304 -387.1800666 ]]
[[  70.51121119 -429.22015333]]
[[  68.57545316 -417.40976843]]
[[  69.11926858 -420.72767766]]
[[  68.9664937  -419.79557242]]
[[  69.00941298 -420.05743015]]
[[  68.9973556  -419.98386606]]
[[  69.0007429  -420.00453253]]
[[  68.9997913  -419.99872667]]
