# 멀티 뉴런 (Multiple Neurons) - Backpropagation

## 1. Layers with Forward and Backward

In [1]:
import numpy as np
import random
import math

In [46]:
class AffineWithTwoInputs:
    def __init__(self):
        self.w = np.array([random.random(), random.random()])   # weight of one input
        self.b = np.array([random.random()])  # bias
        self.x = None
        self.dw = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(self.w, self.x) + self.b
        return out

    def backward(self, din):
        if isinstance(din, np.ndarray) and din.size == 1:
            din = np.asscalar(din)
        dx = np.dot(din, self.w.T)
        self.dw = np.dot(self.x.T, din)
        self.db = din
        return dx

class AffineWithOneInput:
    def __init__(self):
        self.w = np.array([random.random()])   # weight of one input
        self.b = np.array([random.random()])   # bias
        self.x = None
        self.dw = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(self.w, self.x) + self.b
        return out

    def backward(self, din):
        dx = np.dot(din, self.w.T)
        self.dw = np.dot(self.x.T, din)
        self.db = din
        return dx
    
class Relu:
    def __init__(self):
        self.x = None

    def forward(self, x):
        self.x = x
        mask = (self.x <= 0)
        out = self.x.copy()
        out[mask] = 0
        return out

    def backward(self, din):
        if isinstance(din, np.ndarray):
            mask = (self.x <= 0)
            din[mask] = 0
            dx = din
        else:
            if self.x <= 0:
                dx = 0
            else:
                dx = din
        return dx
    
class SquaredError:
    def __init__(self):
        self.z = None
        self.z_target = None
    
    def forward(self, z, z_target):
        self.z = z
        self.z_target = z_target
        loss = 1.0 / 2.0 * math.pow(self.z - self.z_target, 2)
        return loss

    def backward(self, din):
        dx = (self.z - self.z_target) * din
        return dx

## 2. Neural Network Model of Linear Two Neurons

In [47]:
class LinearTwoNeurons:
    def __init__(self):
        self.n1 = AffineWithTwoInputs()
        self.relu1 = Relu()
        self.n2 = AffineWithOneInput()
        self.relu2 = Relu()
        self.loss = SquaredError()
        print("Neuron n1 - Initial w: {0}, b: {1}".format(self.n1.w, self.n1.b))
        print("Neuron n2 - Initial w: {0}, b: {1}".format(self.n2.w, self.n2.b))


    def predict(self, x):
        u1 = self.n1.forward(x)
        z1 = self.relu1.forward(u1)
        u2 = self.n2.forward(z1)
        z2 = self.relu2.forward(u2)
        return z2
    
    def backpropagation_gradient(self, x, z_target):
        # forward
        z2 = self.predict(x)
        self.loss.forward(z2, z_target)

        # backward
        din = 1
        din = self.loss.backward(din)
        din = self.relu2.backward(din)
        din = self.n2.backward(din)
        din = self.relu1.backward(din)
        self.n1.backward(din)

    def learning(self, alpha, x, z_target):
        self.backpropagation_gradient(x, z_target)

        self.n1.w = self.n1.w - alpha * self.n1.dw
        self.n1.b = self.n1.b - alpha * self.n1.db
        self.n2.w = self.n2.w - alpha * self.n2.dw
        self.n2.b = self.n2.b - alpha * self.n2.db

## 3. OR gate with Two Linear Neurons - Learing and Testing

In [48]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 1.0, 1.0, 1.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    ltn = LinearTwoNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))

    max_epoch = 1000
    print_epoch_period = 20
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            ltn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z2 = ltn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + ltn.loss.forward(z2, z_target)

            print("Epoch{0:4d}-Error:{1:7.5f}, Neuron n1[w11: {2:7.5f}, w12: {3:7.5f}, b1: {4:7.5f}], Neuron n2[w2: {5:7.5f}, b2: {6:7.5f}]".format(
                i, 
                sum / d.numTrainData,
                ltn.n1.w[0],
                ltn.n1.w[1],
                ltn.n1.b[0],
                ltn.n2.w[0],
                ltn.n2.b[0])
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z2 = ltn.predict(x)
        z_target = d.training_z_target[idx]
        error = ltn.loss.forward(z2, z_target)
        print("x: {0:s}, z2: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z2), str(z_target), error))

Neuron n1 - Initial w: [ 0.259415    0.76477372], b: [ 0.21560677]
Neuron n2 - Initial w: [ 0.74739234], b: [ 0.78961367]
x: [ 0.  0.], z2: [ 0.95075652], z_target: 0.0, error: 0.45197
x: [ 1.  0.], z2: [ 1.14464131], z_target: 1.0, error: 0.01046
x: [ 0.  1.], z2: [ 1.52234255], z_target: 1.0, error: 0.13642
x: [ 1.  1.], z2: [ 1.71622733], z_target: 1.0, error: 0.25649
Epoch   0-Error:0.18529, Neuron n1[w11: 0.25343, w12: 0.75601, b1: 0.19877], Neuron n2[w2: 0.73155, b2: 0.76700]
Epoch  20-Error:0.05533, Neuron n1[w11: 0.22806, w12: 0.68833, b1: 0.05451], Neuron n2[w2: 0.62298, b2: 0.55254]
Epoch  40-Error:0.04768, Neuron n1[w11: 0.25376, w12: 0.67942, b1: 0.01811], Neuron n2[w2: 0.62071, b2: 0.49380]
Epoch  60-Error:0.04477, Neuron n1[w11: 0.28727, w12: 0.68003, b1: 0.00180], Neuron n2[w2: 0.63525, b2: 0.46780]
Epoch  80-Error:0.04256, Neuron n1[w11: 0.32022, w12: 0.68104, b1: 0.00086], Neuron n2[w2: 0.65161, b2: 0.44803]
Epoch 100-Error:0.04064, Neuron n1[w11: 0.35078, w12: 0.68083

## 4. Neural Network Model of Three Neurons

In [49]:
class ThreeNeurons:
    def __init__(self):
        self.n1 = AffineWithTwoInputs()
        self.relu1 = Relu()
        self.n2 = AffineWithTwoInputs()
        self.relu2 = Relu()
        self.n3 = AffineWithTwoInputs()
        self.relu3 = Relu()
        self.loss = SquaredError()

    def predict(self, x):
        u1 = self.n1.forward(x)
        z1 = self.relu1.forward(u1)
        u2 = self.n2.forward(x)
        z2 = self.relu2.forward(u2)
        z  = np.array([np.asscalar(z1), np.asscalar(z2)])
        u3 = self.n3.forward(z)
        z3 = self.relu3.forward(u3)
        return z3
    
    def backpropagation_gradient(self, x, z_target):
        # forward
        z3 = self.predict(x)
        self.loss.forward(z3, z_target)

        # backward
        din = 1
        din = self.loss.backward(din)
        
        din = self.relu3.backward(din)
        din = self.n3.backward(din)
        
        din_0 = self.relu1.backward(din[0])
        self.n1.backward(din_0)
        
        din_1 = self.relu2.backward(din[1])
        self.n2.backward(din_1)

    def learning(self, alpha, x, z_target):
        self.backpropagation_gradient(x, z_target)

        self.n1.w = self.n1.w - alpha * self.n1.dw
        self.n1.b = self.n1.b - alpha * self.n1.db
        self.n2.w = self.n2.w - alpha * self.n2.dw
        self.n2.b = self.n2.b - alpha * self.n2.db
        self.n3.w = self.n3.w - alpha * self.n3.dw
        self.n3.b = self.n3.b - alpha * self.n3.db

## 5. OR gate with Three Neurons - Learing and Testing

In [50]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 1.0, 1.0, 1.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    tn = ThreeNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))

    max_epoch = 1000
    print_epoch_period = 20
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            tn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z3 = tn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + tn.loss.forward(z3, z_target)

            print("{0:4d}-Err:{1:7.4f}, n1[w:{2:s},b:{3:s}], n2[w:{4:s},b:{5:s}], n3[w:{6:},b:{7:s}]".format(
                i, 
                sum / d.numTrainData,
                np.array_str(tn.n1.w, precision=2),
                np.array_str(tn.n1.b, precision=2),
                np.array_str(tn.n2.w, precision=2),
                np.array_str(tn.n2.b, precision=2),
                np.array_str(tn.n3.w, precision=2),
                np.array_str(tn.n3.b, precision=2))
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))

x: [ 0.  0.], z3: [ 1.15209077], z_target: 0.0, error: 0.66366
x: [ 1.  0.], z3: [ 1.68362009], z_target: 1.0, error: 0.23367
x: [ 0.  1.], z3: [ 1.56569511], z_target: 1.0, error: 0.16001
x: [ 1.  1.], z3: [ 2.09722443], z_target: 1.0, error: 0.60195
   0-Err: 0.3046, n1[w:[ 0.47  0.37],b:[ 0.48]], n2[w:[ 0.43  0.29],b:[ 0.69]], n3[w:[ 0.87  0.2 ],b:[ 0.5]]
  20-Err: 0.0464, n1[w:[ 0.41  0.33],b:[ 0.32]], n2[w:[ 0.42  0.28],b:[ 0.67]], n3[w:[ 0.73  0.01],b:[ 0.3]]
  40-Err: 0.0428, n1[w:[ 0.43  0.35],b:[ 0.3]], n2[w:[ 0.42  0.28],b:[ 0.67]], n3[w:[ 0.74  0.01],b:[ 0.26]]
  60-Err: 0.0403, n1[w:[ 0.44  0.38],b:[ 0.28]], n2[w:[ 0.42  0.28],b:[ 0.67]], n3[w:[ 0.76  0.01],b:[ 0.24]]
  80-Err: 0.0384, n1[w:[ 0.46  0.4 ],b:[ 0.27]], n2[w:[ 0.42  0.28],b:[ 0.67]], n3[w:[ 0.77  0.02],b:[ 0.22]]
 100-Err: 0.0368, n1[w:[ 0.47  0.42],b:[ 0.25]], n2[w:[ 0.42  0.28],b:[ 0.67]], n3[w:[ 0.79  0.02],b:[ 0.2]]
 120-Err: 0.0355, n1[w:[ 0.48  0.44],b:[ 0.24]], n2[w:[ 0.42  0.28],b:[ 0.67]], n3[w:[ 0.8  

## 6. XOR gate with Three Neurons - Learing and Testing

In [56]:
class Data:
    def __init__(self):
        self.training_input_value = np.array([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (1.0, 1.0)])
        self.training_z_target = np.array([0.0, 1.0, 1.0, 0.0])
        self.numTrainData = len(self.training_input_value)

if __name__ == '__main__':
    tn = ThreeNeurons()
    d = Data()
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))

    max_epoch = 2000
    print_epoch_period = 40
    for i in range(max_epoch + 1):
        for idx in range(d.numTrainData):
            x = d.training_input_value[idx]
            z_target = d.training_z_target[idx]
            tn.learning(0.01, x, z_target)

        if i % print_epoch_period == 0:
            sum = 0.0
            for idx in range(d.numTrainData):
                x = d.training_input_value[idx]
                z3 = tn.predict(x)
                z_target = d.training_z_target[idx]
                sum = sum + tn.loss.forward(z3, z_target)

            print("{0:4d}-Err:{1:7.4f}, n1[w:{2:s},b:{3:s}], n2[w:{4:s},b:{5:s}], n3[w:{6:},b:{7:s}]".format(
                i, 
                sum / d.numTrainData,
                np.array_str(tn.n1.w, precision=2),
                np.array_str(tn.n1.b, precision=2),
                np.array_str(tn.n2.w, precision=2),
                np.array_str(tn.n2.b, precision=2),
                np.array_str(tn.n3.w, precision=2),
                np.array_str(tn.n3.b, precision=2))
            )
            
    for idx in range(d.numTrainData):
        x = d.training_input_value[idx]
        z3 = tn.predict(x)
        z_target = d.training_z_target[idx]
        error = tn.loss.forward(z3, z_target)
        print("x: {0:s}, z3: {1:s}, z_target: {2:s}, error: {3:7.5f}".format(str(x), str(z3), str(z_target), error))

x: [ 0.  0.], z3: [ 1.04199236], z_target: 0.0, error: 0.54287
x: [ 1.  0.], z3: [ 2.1597136], z_target: 1.0, error: 0.67247
x: [ 0.  1.], z3: [ 1.92813386], z_target: 1.0, error: 0.43072
x: [ 1.  1.], z3: [ 3.04585511], z_target: 0.0, error: 4.63862
   0-Err: 1.1391, n1[w:[ 0.59  0.84],b:[-0.03]], n2[w:[ 0.61  0.06],b:[ 0.18]], n3[w:[ 0.86  0.8 ],b:[ 0.77]]
  40-Err: 0.1263, n1[w:[ 0.34  0.62],b:[-0.22]], n2[w:[ 0.36 -0.25],b:[-0.12]], n3[w:[ 0.47  0.65],b:[ 0.33]]
  80-Err: 0.1116, n1[w:[ 0.3   0.59],b:[-0.17]], n2[w:[ 0.4  -0.32],b:[-0.08]], n3[w:[ 0.39  0.7 ],b:[ 0.32]]
 120-Err: 0.1010, n1[w:[ 0.27  0.58],b:[-0.13]], n2[w:[ 0.44 -0.4 ],b:[-0.05]], n3[w:[ 0.33  0.76],b:[ 0.32]]
 160-Err: 0.0931, n1[w:[ 0.24  0.58],b:[-0.09]], n2[w:[ 0.47 -0.46],b:[-0.01]], n3[w:[ 0.29  0.81],b:[ 0.31]]
 200-Err: 0.0877, n1[w:[ 0.21  0.59],b:[-0.06]], n2[w:[ 0.51 -0.51],b:[-0.]], n3[w:[ 0.26  0.86],b:[ 0.3]]
 240-Err: 0.0837, n1[w:[ 0.18  0.59],b:[-0.03]], n2[w:[ 0.54 -0.55],b:[ -1.32e-06]], n3[w:[ 