In [3]:
import numpy as np
from keras.datasets import mnist

In [2]:
(img_train, lbl_train), (img_test, lbl_test) = mnist.load_data()

In [4]:
# Build a Layer with [Size x Size] x Numbers neurons
class Layer(object):
    def __init__(self, lay_size = []):
        self.lay_size = lay_size
        self.maps = []
        for map_size in lay_size :
            self.maps.append(np.zeros(map_size))
        self.maps = np.array(self.maps)

In [4]:
class ConvLayer(Layer) :
        def __init__(self, lay_size = [], conv_core_sizes = [], map_comb_index = []) :
            Layer.__init__(self, lay_size)
            self.conv_cores = []
            self.bias = []
            self.map_comb_index = map_comb_index

            # Initialize the parameters. 
            # The number -2.4/Fi and 2.4/Fi comes from the paper in Appendices A.
            for conv_core_size in conv_core_sizes :
                # Fi is based on the definition in the paper
                Fi = conv_core_size[0] * conv_core_size[1] + 1
                # Make random filters
                self.conv_cores.append(np.random.uniform(-2.4/Fi, 2.4/Fi, conv_core_size)) 
                # Make random biases
                self.bias.append(np.random.uniform(-2.4/Fi, 2.4/Fi))

            self.conv_cores = np.array(self.conv_cores)
        
        def cov_op(self, pre_maps, core_index) :
            pre_map_shape = pre_maps.shape
            core_shape = self.conv_cores[core_index].shape
            map_shape = self.maps[core_index].shape

            # Check the input size, 
            # If the result from input size match the output
            # Calculate the Convolution Layer
            if not (map_shape[-2] == pre_map_shape[-2] - core_shape[-2] + 1 \
                and map_shape[-1] == pre_map_shape[-1] - core_shape[-1] + 1) :
                return None

            for i in range(map_shape[-2]) :
                for j in range(map_shape[-1]) :

                    # Filter caculation in HW4 
                    local_recept = pre_maps[:, i:i + core_shape[-2], j:j + core_shape[-1]]
                    val = np.sum(local_recept * self.conv_cores[core_index]) + self.bias[core_index]

                    # Use tanh(x) as activation function.
                    # Remark that tanh(x) = ((e^2x) -1)/((e^2x)+1)
                    # We use the parameters in the paper
                    # f(a) = Atanh(Sa), where A = 1.7159, S = 2/3

                    val = np.exp((4.0/3)*val) 
                    self.maps[core_index][i][j] = 1.7159 * (val - 1) / (val + 1)
        
        def calc_maps(self, pre_mapset, map_comb_flag = False) :
            # if mapcombflag = False, the first Conv Layer
            if not map_comb_flag :
                for i in range(len(self.maps)) :
                    self.cov_op(pre_mapset, i)
                
            # mapcombflag = True, the other Conv Layer
            else :
                for i in range(len(self.maps)) :
                    self.cov_op(pre_mapset[self.map_comb_index[i]], i)
        
        

In [5]:
class PoolingLayer(Layer) :
    def __init__(self, lay_size = [], pool_core_sizes = []) :
        Layer.__init__(self, lay_size)
        Fi = pool_core_sizes[0][0] * pool_core_sizes[0][1] + 1
        self.poolparas = np.random.uniform(-2.4/Fi, 2.4/Fi, [len(lay_size), 2])
        self.poolcore_sizes = np.array(pool_core_sizes)
        
    def pool_op(self, pre_map, pool_index) :
        pre_map_shape = pre_map.shape
        poolcore_size = self.poolcore_sizes[pool_index]
        for i in range(int(pre_map_shape[0] / poolcore_size[0])) :
            for j in range(int(pre_map_shape[1] / poolcore_size[1])) :
                val = self.poolparas[pool_index][0] * np.sum(pre_map[i*poolcore_size[0]:(i+1)*poolcore_size[0],\
                            j*poolcore_size[1]:(j+1)*poolcore_size[1]]) + self.poolparas[pool_index][1]
                val = np.exp((4.0/3)*val)
                self.maps[pool_index][i][j] = 1.7159 * (val -1) / (val + 1)

    def calc_maps(self, pre_mapset) :
        for i in range(len(self.maps)) :
            self.pool_op(pre_mapset[i], i)

In [6]:
class FcLayer(Layer) :
    def __init__(self, lay_len, pre_nodesnum) :
        Layer.__init__(self, [[1, lay_len]])
        Fi = pre_nodesnum + 1
        self.weight = np.random.uniform(-2.4/Fi, 2.4/Fi, [lay_len, pre_nodesnum]) # 84x120
        self.bias = np.random.uniform(-2.4/Fi, 2.4/Fi, [lay_len]) #84 
        
    def fc_op(self, pre_maps, node_index) :
        # pre_maps: 1x1x120
        pre_nodes = pre_maps.reshape([pre_maps.shape[0] * pre_maps.shape[1] * pre_maps.shape[2]]) #120
        val  = np.sum(self.weight[node_index] * pre_nodes) + self.bias[node_index] #
        val = np.exp((4.0/3)*val)
        self.maps[0][0][node_index] = 1.7159 *  (val -1) / (val + 1)

    def calc_maps(self, pre_mapset) :
        for i in range(len(self.maps[0][0])) : #84
            self.fc_op(pre_mapset, i)

In [7]:
class OutputLayer(FcLayer) :
    def __init__(self, lay_len, pre_nodesnum) :
        FcLayer.__init__(self, lay_len, pre_nodesnum)
        # We have to assign float64 to ensure the weight is float number. 
        self.weight = np.float64(np.random.choice([-1,1], [lay_len, pre_nodesnum])) # 10x84

    def rbf(self, pre_maps, node_index = -1) :
        pre_nodes = pre_maps.flatten() #84

        if node_index != -1:
            self.maps[0][0][node_index] = 0.5 * np.sum((pre_nodes - self.weight[node_index])**2)

        else:
            for i in range(len(self.maps[0][0])) :
                self.maps[0][0][i] = 0.5 * np.sum((pre_nodes - self.weight[i])**2)
    
    def back_propa(self, pre_mapset, current_error, learn_rate) :
        self.current_error = current_error
        current_error_matrix = np.array(np.matrix(list(current_error[0]) * self.weight.shape[1]).T)  #84x10 > 10x84
        
        # pre_mapset : 1x1x84
        weight_update = (self.weight - np.array(list(pre_mapset[0]) * self.weight.shape[0])) * current_error_matrix   
        self.weight -= learn_rate * weight_update
        pre_error = ((np.array(list(pre_mapset[0]) * self.weight.shape[0]) - self.weight) * current_error_matrix).sum(axis = 0)
        return pre_error.reshape(pre_mapset.shape) 

In [8]:
class ConvNet(object): 
    def __init__(self) :

        C3_core_sizes = [[3, 5, 5]] * 6
        C3_core_sizes.extend([[4, 5, 5]] * 9)
        C3_core_sizes.extend([[6, 5, 5]])
        
        # C3_map_comb_index is based on TABLE 1
        C3_map_comb_index = [[0,1,2],[1,2,3],[2,3,4],[3,4,5],[4,5,0],[5,0,1], \
             [0,1,2,3],[1,2,3,4],[2,3,4,5],[3,4,5,0],[4,5,0,1],[5,0,1,2],[0,1,3,4],[1,2,4,5],[0,2,3,5],[0,1,2,3,4,5]]

        self.C1 = ConvLayer([[28, 28]] * 6, [[1, 5, 5]] * 6)
        self.S2 = PoolingLayer([[14, 14]] * 6, [[2, 2]] * 6)
        self.C3 = ConvLayer([[10, 10]] * 16, C3_core_sizes, C3_map_comb_index)
        self.S4 = PoolingLayer([[5, 5]] * 16, [[2, 2]] * 16)
        self.C5 = ConvLayer([[1, 1]] * 120, [[16, 5, 5]] * 120)
        self.F6 = FcLayer(84, 120)
        self.output = OutputLayer(10, 84)
        
    def fw_prop(self, mapset, mapclass = -1) :
        self.C1.calc_maps(mapset)
        self.S2.calc_maps(self.C1.maps)
        self.C3.calc_maps(self.S2.maps, True)
        self.S4.calc_maps(self.C3.maps)
        self.C5.calc_maps(self.S4.maps)
        self.F6.calc_maps(self.C5.maps)
        self.output.rbf(self.F6.maps, mapclass)
    
    def bw_prop(self, mapset, mapclass, learn_rate) :
        output_error = np.zeros([1, 1, 10])
        output_error[0][0][mapclass] = 1
        
        F6_error = self.output.back_propa(self.F6.maps, output_error, learn_rate, True)
        C5_error = self.F6.back_propa(self.C5.maps, F6_error, learn_rate, True)
        S4_error = self.C5.back_propa(self.S4.maps, C5_error, learn_rate, True)
        C3_error = self.S4.back_propa(self.C3.maps, S4_error, learn_rate, True)
        S2_error = self.C3.back_propa(self.S2.maps, C3_error, learn_rate, True)
        C1_error = self.S2.back_propa(self.C1.maps, S2_error, learn_rate, True)
        ilayer_error = self.C1.back_propa(mapset, C1_error, learn_rate, True)

In [11]:
processed_imgs = []
for index in range(1):
    padded_img = np.ones([32, 32]) * -0.1
    
    for row in range(img_train[index].shape[0]):
        for col in range(img_train[index].shape[1]):
            if img_train[index][row][col] > 0:
                padded_img[row + 2][col + 2] = 1.175
    processed_imgs.append(padded_img)
processed_imgs = np.array(processed_imgs)

In [89]:
train_covnet = ConvNet()
train_covnet.fw_prop(np.array(processed_imgs), lbl_train[0])

In [117]:
mapset = processed_imgs
mapclass = lbl_train[0]
learn_rate = 0.001
output_error = np.zeros([1, 1, 10])
output_error[0][0][mapclass] = 1


In [122]:
weight = train_covnet.output.weight
current_error = output_error
current_error_matrix = np.tile(current_error[0], (weight.shape[1], 1)).T
weight_update = (weight - np.tile(train_covnet.F6.maps[0], (weight.shape[0], 1))) * current_error_matrix 

weight -= weight_update 
pre_error = ((np.array(list(train_covnet.F6.maps[0]) * weight.shape[0]) - weight) * current_error_matrix).sum(axis = 0)
pre_error.reshape(train_covnet.F6.maps.shape)

array([[[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.]]])