In [2]:
import numpy as np
import json
import random
import sys
from os.path import abspath, join

In [6]:
"""initial and define neruon network"""

def init_layers(nn_architecture, seed = 99):
    np.random.seed(seed)
    number_of_layers = len(nn_architecture)
    params_values = {}

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        params_values['W' + str(layer_idx)] = np.random.randn(
            layer_output_size, layer_input_size) * 0.1
        params_values['b' + str(layer_idx)] = np.random.randn(
            layer_output_size, 1) * 0.1
        
    return params_values

In [8]:
"""activation function"""

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

In [82]:
"""single layer feedforward funtcion"""

def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    
    if activation is "relu":
        activation_func = relu
    elif activation is "sigmoid":
        activation_func = sigmoid
    else:
        raise Exception('Non-supported activation function')
        
    return activation_func(Z_curr), Z_curr

In [83]:
"""full layer feedforward funtcion"""

def full_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        
        activ_function_curr = layer["activation"]
        W_curr = params_values["W" + str(layer_idx)]
        b_curr = params_values["b" + str(layer_idx)]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)
        
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
       
    return A_curr, memory

In [12]:
"""error function"""

def get_cost_value(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    return np.squeeze(cost)

def get_accuracy_value(Y_hat, Y):
    Y_hat_ = convert_prob_into_class(Y_hat)
    return (Y_hat_ == Y).all(axis=0).mean()

def convert_prob_into_class(probs):
    probs_ = np.copy(probs)
    probs_[probs_ > 0.5] = 1
    probs_[probs_ <= 0.5] = 0
    return probs_

In [13]:
"""single layer backpropagation function"""

def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
    m = A_prev.shape[1]
    
    if activation is "relu":
        backward_activation_func = relu_backward
    elif activation is "sigmoid":
        backward_activation_func = sigmoid_backward
    else:
        raise Exception('Non-supported activation function')
    
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)

    return dA_prev, dW_curr, db_curr

In [15]:
"""full layer backpropagation funtcion"""

def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
   
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["W" + str(layer_idx_curr)]
        b_curr = params_values["b" + str(layer_idx_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
    
    return grads_values

In [16]:
""" """

def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture, 1):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]        
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values;

In [84]:
"""file load"""

class Loader:

    @staticmethod
    def load_img(path):
        data = []
        try:
            with open(f"{path}", 'r') as txtFile:
                for line in txtFile:
                    line = line.replace('\n', '').split(',')
                    x = [int(_) for _ in line]
                    data.append(np.array(x))
            return data
        except Exception as e:
            raise

    @staticmethod
    def load_label(path):
        data = []
        try:
            with open(f"{path}", 'r') as txtFile:
                for line in txtFile:
                    x = line.replace('\n', '').split(',')
                    a = [1,0,0]
                    if x[0] == '0':
                        a = [1,0,0]
                    elif x[0] == '1':
                        a = [0,1,0]
                    elif x[0] == '2':
                        a = [0,0,1]
                    data.append(np.array(a))
            return data
        except Exception as e:
            raise

    @staticmethod
    def merge(imgFile, labelFile):
        return [(x, y) for x, y in zip(imgFile, labelFile)]

In [76]:
nn_architecture = [
    {"input_dim": 784, "output_dim": 30, "activation": "sigmoid"},
    {"input_dim": 30, "output_dim": 10, "activation": "sigmoid"},
]
fi1e = Loader()
X = fi1e.load_img(f'{abspath(".")}/data/test_img.txt')
print(len(X[0]))
#print(len(train_img))
Y = fi1e.load_img(f'{abspath(".")}/data/test_label.txt')
#print(len(train_label))
train_data = fi1e.merge(X, Y)

784


In [73]:
def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history, accuracy_history

In [81]:
train(X[:100], Y[:100], nn_architecture, 2, 3)

ValueError: shapes (784,30) and (100,784) not aligned: 30 (dim 1) != 100 (dim 0)

In [96]:
import numpy as np
class NeuralNetwork:
    def __init__(self, layers = [784 , 30, 10], activations=['sigmoid', 'sigmoid']):
        assert(len(layers) == len(activations)+1)
        self.layers = layers
        self.activations = activations
        self.weights = []
        self.biases = []
        for i in range(len(layers)-1):
            self.weights.append(np.random.randn(layers[i+1], layers[i]))
            self.biases.append(np.random.randn(layers[i+1], 1))
    
    def feedforward(self, x):
        # return the feedforward value for x
        a = np.copy(x)
        z_s = []
        a_s = [a]
        for i in range(len(self.weights)):
            activation_function = self.getActivationFunction(self.activations[i])
            z_s.append(self.weights[i].dot(a) + self.biases[i])
            a = activation_function(z_s[-1])
            a_s.append(a)
        return (z_s, a_s)
    def backpropagation(self,y, z_s, a_s):
            dw = []  # dC/dW
            db = []  # dC/dB
            deltas = [None] * len(self.weights)  # delta = dC/dZ  known as error for each layer
            # insert the last layer error
            deltas[-1] = (((y-a_s[-1]).dot(self.getDerivitiveActivationFunction(self.activations[-1]))(z_s[-1])))
            # Perform BackPropagation
            for i in reversed(range(len(deltas)-1)):
                deltas[i] = self.weights[i+1].T.dot(deltas[i+1])*(self.getDerivitiveActivationFunction(self.activations[i])(z_s[i]))        
            #a= [print(d.shape) for d in deltas]
            batch_size = y.shape[1]
            db = [d.dot(np.ones((batch_size,1)))/float(batch_size) for d in deltas]
            dw = [d.dot(a_s[i].T)/float(batch_size) for i,d in enumerate(deltas)]
            # return the derivitives respect to weight matrix and biases
            return dw, db
    def train(self, x, y, batch_size=10, epochs=100, lr = 0.01):
        for e in range(epochs): 
            i=0
            while(i<len(y)):
                x_batch = x[i:i+batch_size]
                y_batch = y[i:i+batch_size]
                i = i+batch_size
                z_s, a_s = self.feedforward(x_batch)
                dw, db = self.backpropagation(y_batch, z_s, a_s)
                self.weights = [w+lr*dweight for w,dweight in  zip(self.weights, dw)]
                self.biases = [w+lr*dbias for w,dbias in  zip(self.biases, db)]
                print("loss = {}".format(np.linalg.norm(a_s[-1]-y_batch) ))
    @staticmethod
    def getActivationFunction(name):
        if(name == 'sigmoid'):
            return lambda x : np.exp(x)/(1+np.exp(x))
        elif(name == 'linear'):
            return lambda x : x
        elif(name == 'relu'):
            def relu(x):
                y = np.copy(x)
                y[y<0] = 0
                return y
            return relu
        else:
            print('Unknown activation function. linear is used')
            return lambda x: x
    
    @staticmethod
    def getDerivitiveActivationFunction(name):
        if(name == 'sigmoid'):
            sig = lambda x : np.exp(x)/(1+np.exp(x))
            return lambda x :sig(x)*(1-sig(x)) 
        elif(name == 'linear'):
            return lambda x: 1
        elif(name == 'relu'):
            def relu_diff(x):
                y = np.copy(x)
                y[y>=0] = 1
                y[y<0] = 0
                return y
            return relu_diff
        else:
            print('Unknown activation function. linear is used')
            return lambda x: 1
if __name__=='__main__':
    import matplotlib.pyplot as plt
    fi1e = Loader()
    X = fi1e.load_img(f'{abspath(".")}/data/test_img.txt')
    #print(len(X[0]))
    #print(len(train_img))
    Y = fi1e.load_label(f'{abspath(".")}/data/test_label.txt')
    #print(len(train_label))

    X = np.array(X)
    y = np.array(Y)
    nn = NeuralNetwork([784, 30, 3],activations=['sigmoid', 'sigmoid'])
    
    nn.train(X, y, epochs=10000, batch_size=784, lr = 1)
    _, a_s = nn.feedforward(X)
    #print(y, X)
    plt.scatter(X.flatten(), y.flatten())
    plt.scatter(X.flatten(), a_s[-1].flatten())
    plt.show()

ValueError: operands could not be broadcast together with shapes (784,3) (3,784) 

In [59]:
def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x)*(1.0-sigmoid(x))

In [62]:
w1 = np.array([[0.1,-0.2,0.2],[-0.3,0.1,-0.1]]).T
print(f'w1 = {w1}')
a0 = np.array([1,0])
print(f'a0 = {a0}')
b1 = np.array([0,0.2,-0.1])
print(f'b1 = {b1}')
n1 = np.dot(w1,a0)+b1
print(f'n1 = {n1}')
a1 = sigmoid(n1)
print(a1)

w1 = [[ 0.1 -0.3]
 [-0.2  0.1]
 [ 0.2 -0.1]]
a0 = [1 0]
b1 = [ 0.   0.2 -0.1]
n1 = [0.1 0.  0.1]
[0.52497919 0.5        0.52497919]


In [57]:
# -*- coding: UTF-8 -*-
'''
created at 2020/11/6
author: Lishang Chien
'''
import numpy as np


def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x)*(1.0-sigmoid(x))

def cross_entropy(a, y):
    return a - y

class Loader:
    """define function for loading txt file"""

    @staticmethod
    def load_img(path):
        """load train_img/test_img txt file"""
        data = []
        try:
            with open(f"{abspath('.')}{path}", 'r') as txtFile:
                for line in txtFile:
                    line = line.replace('\n', '').split(',')
                    x = [int(_) for _ in line]
                    data.append(np.array(x))
            return data
        except Exception as e:
            raise

    @staticmethod
    def load_label(path):
        """load train_label/test_label txt file"""
        data = []
        try:
            with open(f"{abspath('.')}{path}", 'r') as txtFile:
                for line in txtFile:
                    x = line.replace('\n', '').split(',')
                    a = [0,0,0]
                    if x[0] == '0':
                        a = [1,0,0]
                    elif x[0] == '1':
                        a = [0,1,0]
                    elif x[0] == '2':
                        a = [0,0,1]
                    data.append(np.array(a))
            return data
        except Exception as e:
            raise

    @staticmethod
    def merge(imgFile, labelFile):
        return [(x, y) for x, y in zip(imgFile, labelFile)]

class NeuralNetwork:

    def __init__(self, layers, activation=sigmoid, activation_prime=sigmoid_prime, cost=cross_entropy):
        """ initial dependency / hyperparamters """
        # set activation function
        self.activation = activation
        self.activation_prime = activation_prime
        self.cost = cost

        # set-up
        self.weights = []
        self.learningRate = 0

        # layers = [784,30,3] means 
        # input layer has 784 neurons
        # one hidden layer has 30 neurons
        # output layer has 3 neurons
        for i in range(1, len(layers) - 1):
            r = 2*np.random.random((layers[i-1] + 1, layers[i] + 1)) -1
            self.weights.append(r)
        # output layer
        r = 2*np.random.random( (layers[i] + 1, layers[i+1])) - 1
        self.weights.append(r)

    def fit(self, X, y, learningRate=0.5, epochs=500):
        """ do the job """
        ones = np.atleast_2d(np.ones(X.shape[0])) # add column of ones to X
        X = np.concatenate((ones.T, X), axis=1) # add the bias unit to the input layer
        self.learningRate = learningRate
        errorMeasure_lastTime = 0
         
        for k in range(epochs):
            i = np.random.randint(X.shape[0])
            a = [X[i]] # a = x[0]
            errorMeasure = 1

            # Feedforward part: 
            #   calculate a[l] to a[L], acivation function using sigmoid
            for l in range(len(self.weights)):
                    dot_value = np.dot(a[l], self.weights[l])
                    activation = self.activation(dot_value)
                    a.append(activation)

            # Backward part: 
            #   output layer -> delta L
            error = self.cost(a[-1], y[i])
            deltas = [error]
            #   from second to last layer -> delta L-1,L-2,...,1
            for l in range(len(a) - 2, 0, -1): 
                deltas.append(deltas[-1].dot(self.weights[l].T)*self.activation_prime(a[l]))

            #   reverse deltas to simplify backpropagation's implementation(L -> l)
            deltas.reverse()

            # Update weights part:
            #   using stotistic backpropagation
            for l in range(len(self.weights)):
                layer = np.atleast_2d(a[l])
                delta = np.atleast_2d(deltas[l])
                self.weights[l] -= self.learningRate * np.dot(layer.T, delta)
            #errorMeasure = error ** 2 # Mean Square Error(MSE), sum part
            self.learningRate *= 0.95
            #errorMeasure /= len(X) # Mean Square Error(MSE), 1/N part
            #print(errorMeasure)
            #if errorMeasure < 0.02:
            #    return k # return result when error measure enough small or error measure stop beening smaller
            #errorMeasure_lastTime = errorMeasure

            if k % 100 ==0:print('epochs:', k)
        return epochs

    def predict(self, x): 
        """ predict unknown data(a) from well-trainned weights """
        a = np.concatenate((np.ones(1).T, np.array(x)))      
        for l in range(len(self.weights)):
            a = self.activation(np.dot(a, self.weights[l]))

        return a

    def get_accuracy(self, X, Y):
        """ calculate accuracy rate and accuracy number """
        signal = []
        for x,y in zip(X, Y):
            result = list(nn.predict(x))
            predictNumber = result.index(max(result))
            answerNumber = list(y).index(max(y))
            signal.append(1 if answerNumber==predictNumber else 0)

        accuracyRate = round(signal.count(1)/(len(signal))*100,2)
        accuracyNum = f'{signal.count(1)} / {len(signal)}'
        return accuracyRate, accuracyNum

if __name__ == '__main__':

    # loading data
    fi1e = Loader()
    X = fi1e.load_img('/data/train_img.txt')
    Y = fi1e.load_label('/data/train_label.txt')
    unknown_X = fi1e.load_img('/data/test_img.txt')

    # normalization & grouping
    train_X = np.array(X[:6000])
    train_Y = np.array(Y[:6000])
    validation_X = np.array(X[6000:])
    validation_Y = np.array(Y[6000:])

    # neural network create
    nnStructure = [784,30,3]
    nn = NeuralNetwork(nnStructure)

    # trainning process
    print(f'>> Process starting ...')
    epochFinal = nn.fit(train_X,
                        train_Y, 
                        learningRate=0.5, 
                        epochs=30)
    print(f'>> Trainning finished !')

    # valadation process
    print(f'>> Starting validation ...')
    accRateT, accNumT = nn.get_accuracy(train_X, train_Y)
    accRateV, accNumV = nn.get_accuracy(validation_X, validation_Y)
    print(f'>> Validation finished !')

    # predict process
    print(f'>> Starting predict ...')
    result = []
    for x in unknown_X:
        output = list(nn.predict(x))
        predict_number = output.index(max(output))
        result.append(predict_number)
    print(f'>> Predict finished !')
    #print(result)

    print(f'')
    print(f'###### Details ######')
    print(f'1.Data Count')
    print(f' -  Trainning data: {len(train_X)}')
    print(f' - Validation data: {len(validation_X)}')
    print(f' -    Predict data: {len(unknown_X)}')
    print(f' -           Total: {len(train_X)+len(validation_X)+len(unknown_X)}')
    print(f'')
    print(f'2.Hidden layer')
    print(f' -     Hidden Layer count: {len(nnStructure)-2}')
    for cnt in range(1,len(nnStructure)-1):
        print(f' - Layer#{cnt}\'s Neuron count: {nnStructure[cnt]}')
    print(f'')
    print(f'3.Final epoch: {epochFinal}')
    print(f'4.Final learning rate: {nn.learningRate}%')
    print(f'5.Accuracy Rate')
    print(f' -       Bias: {accRateT}% [{accNumT}]')
    print(f' - Validation: {accRateV}% [{accNumV}]')
    print(f'')
    print(f'6.Error Rate')
    print(f' -     Bias: {round(100-accRateT, 2)}%')
    print(f' - Variance: {round(-accRateV+accRateT, 2)}%')
    print(f'')
    print(f'>> Process End-Up !')

>> Process starting ...
epochs: 0
>> Trainning finished !
>> Starting validation ...
>> Validation finished !
>> Starting predict ...
>> Predict finished !

###### Details ######
1.Data Count
 -  Trainning data: 6000
 - Validation data: 2000
 -    Predict data: 2000
 -           Total: 10000

2.Hidden layer
 -     Hidden Layer count: 1
 - Layer#1's Neuron count: 30

3.Final epoch: 30
4.Final learning rate: 0.1073193819714686%
5.Accuracy Rate
 -       Bias: 89.93% [5396 / 6000]
 - Validation: 89.15% [1783 / 2000]

6.Error Rate
 -     Bias: 10.07%
 - Variance: 0.78%

>> Process End-Up !


In [64]:
w2 = np.array([[0.1,0.3],[-0.1,0],[0.2,-0.1]]).T
b2 = np.array([0.1,-0.1])
n2 = np.dot(w2,a1)+b2
print(n2)
a2 = sigmoid(n2)

[0.20749376 0.00499584]


In [93]:
np.ones(1)

array([1.])

In [146]:
a = np.array([[0.85081511,0.84737803,0.84737803,0.93442471,0.93442471,0.93442471,
  0.84737803,0.93442471, 0.93442471, 0.84737803, 0.84737803, 0.84737803,
  0.84737803, 0.93442471, 0.84737803, 0.84737803, 0.87154484, 0.84737808,
  0.93442471, 0.84737803, 0.84737803, 0.93442471, 0.84737803, 0.93442471,
  0.84737803, 0.84737803, 0.93442471, 0.93442471, 0.84737803, 0.84737803],
 [0.3015119 , 0.30177854, 0.30177854, 0.38489885, 0.38489885, 0.38489885,
  0.30177854, 0.38489885, 0.38489885, 0.30177854, 0.30177854, 0.30177854,
  0.30177854, 0.38489885, 0.30177854, 0.30177854, 0.29919285, 0.30177853,
  0.38489885, 0.30177854, 0.30177854, 0.38489885, 0.30177854, 0.38489885,
  0.30177854, 0.30177854, 0.38489885, 0.38489885, 0.30177854, 0.30177854],
 [0.23706566, 0.24018899, 0.24018899, 0.04986671, 0.04986671, 0.04986671,
  0.24018899, 0.04986671, 0.04986671, 0.24018899 ,0.24018899, 0.24018899,
  0.24018899, 0.04986671, 0.24018899, 0.24018899 ,0.21669789, 0.24018895,
  0.04986671, 0.24018899, 0.24018899, 0.04986671 ,0.24018899, 0.04986671,
  0.24018899, 0.24018899, 0.04986671, 0.04986671 ,0.24018899, 0.24018899]]).T
y = np.array([0,0,1])
print(a-y)

[[ 0.85081511  0.3015119  -0.76293434]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.87154484  0.29919285 -0.78330211]
 [ 0.84737808  0.30177853 -0.75981105]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.93442471  0.38489885 -0.95013329]
 [ 0.84737803  0.30177854 -0.75981101]
 [ 0.84737803  0.30177854

In [70]:
convert_dict = {'柏':'(bo2)', '乾':'(qian2)', '粘':'(nian2)', '仇':'(qiu2)', '單':'(shan4)'}
name = '仇乾單'
name_split = []
for i,n in enumerate(name):
    cell = n
    if n in convert_dict:
        cell += convert_dict[n]
    name_split.append(cell)
print(''.join(name_split))

仇(qiu2)乾(qian2)單(shan4)


In [63]:
name = '柏乾粘仇單'
name_split = name.split()