## 1 - Assignment1 Neural Network model



### 1.1 Packages####

In [1]:
import numpy as np
import h5py
from scipy import optimize
import random
import itertools

In [2]:
def vectorized_label(label):
    tmp = np.zeros((10, 1))
    tmp[label] = 1.0
    return tmp
def flattenParams(thetas_list):
    """
    Hand this function a list of theta matrices, and it will flatten it
    into one long (n,1) shaped numpy array
    """
    flattened_list = [ mytheta.flatten() for mytheta in thetas_list ]
    combined = list(itertools.chain.from_iterable(flattened_list))
    return np.array(combined).reshape((1, len(combined)))

### 1.2 Dataset ####
Show the first 10 instances and labels.

In [3]:
with h5py.File('train_128.h5','r') as H:
    data = np.copy(H['data'])
print("The first 10 instances: ")
print(data[:10],"\n")
with h5py.File('train_label.h5','r') as H:
    label = np.copy(H['label'])
print("The first 10 instances: ",label[:10],"\n")

print("The shape of instance is: ",data.shape)
print("The shape of label is: ",label.shape)

labels = np.array([vectorized_label(tmp) for tmp in label])
#print(labels[:10])
data = np.array([np.reshape(tmp, (128, 1)) for tmp in data])
train_data = np.squeeze(data[:48000])
train_label = np.squeeze(labels[:48000])

print(train_data.shape,train_label.shape)

test_data = np.squeeze(data[48000:])
test_label = np.squeeze(labels[48000:])

print(test_data.shape,test_label.shape)



The first 10 instances: 
[[ -1.26502938e+02   1.63243234e+03  -1.20922145e+03 ...,   1.11837600e+02
    3.57044951e+01  -6.63192504e-01]
 [  1.40756479e+03  -4.51681446e+02  -2.59769757e+02 ...,   9.24883807e+01
   -1.46637001e+02  -7.93095667e+01]
 [ -7.25430358e+02  -1.10359763e+03   1.07005341e+02 ...,  -2.93622934e+01
    1.30540938e+01   2.86296443e+01]
 ..., 
 [  1.82574270e+03   1.67704384e+03   3.29740502e+02 ...,  -9.53786962e+01
   -5.42099830e+01  -8.22305980e+01]
 [ -1.58283168e+03  -3.88443114e+02   1.14953389e+02 ...,   5.78324789e+01
    2.17396631e+00   6.87229558e+01]
 [ -9.46185395e+02   4.13536641e+02  -5.49866946e+01 ...,   8.39081821e+01
   -8.13921414e+01   4.63882409e+01]] 

The first 10 instances:  [9 0 0 3 0 2 7 2 5 5] 

The shape of instance is:  (60000, 128)
The shape of label is:  (60000,)
(48000, 128) (48000, 10)
(12000, 128) (12000, 10)


### 1.3 - Defining the neural network structure ####

In [4]:
   def feedforward(self, inputs, theta):
        for b, w in zip(self.biases, self.weights):
            inputs = sigmoid(np.dot(w, inputs) + b)
        return inputs

In [5]:
class Network(object):

    def __init__(self, sizes):
        """
        :param sizes: list类型，储存每层神经网络的神经元数目
                      譬如说：sizes = [2, 3, 2] 表示输入层有两个神经元、
                      隐藏层有3个神经元以及输出层有2个神经元
        """
        # 有几层神经网络
        self.num_layers = len(sizes)
        self.sizes = sizes
        # 除去输入层，随机产生每层中 y 个神经元的 biase 值（0 - 1）
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        # 随机产生每条连接线的 weight 值（0 - 1）
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """
        前向传输计算每个神经元的值
        :param a: 输入值
        :return: 计算后每个神经元的值
        
        tmp  = []
        #print("I am theta: ",theta)
        #print(theta.shape)
        for i in range(self.num_layers-1):
            tmp.append(np.reshape(theta[:self.sizes[i]*self.sizes[i+1]],(self.sizes[i+1],self.sizes[i])))
        #print("start")
        theta = tmp
        """
        #for b, w in zip(self.biases, theta):
        #    z = np.dot(w, activation)+b
        for b, w in zip(self.biases, self.weights):
            # 加权求和以及加上 biase
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, y,
            test_data=None):
        """
        随机梯度下降
        :param training_data: 输入的训练集
        :param epochs: 迭代次数
        :param mini_batch_size: 小样本数量
        :param eta: 学习率
        :param test_data: 测试数据集
        """
        #if test_data is not None: 
        #    n_test = len(test_data)
        theta1 = flattenParams(self.weights)[0]
        #theta2 = flattenParams(self.biases)[0]
        #theta = np.concatenate((theta1,theta2),axis=0)
        
        result = optimize.minimize(self.NNcost, theta1, args=(training_data,y), method=None,jac=self.backprop, 
                                   options={'maxiter':100})
        if test_data is not None:
            n_test = len(test_data)
            print("n_test is: ",n_test)
            correct = self.evaluate(test_data)
            print("Accury is: {}%".format(correct/n_test*100))
        """
            # 输出测试每轮结束后，神经网络的准确度
            if test_data is not None:
                print("Epoch {0}: {1} / {2}".format(
                    j, self.evaluate(test_data), n_test))
            else:
                print("Epoch {0} complete".format(j))
        """
    def NNcost(self,theta,training_data,y):
        """
        Here is the cost funtion for ANN
        """
        cost = 0.5* np.sum((self.feedforward(training_data)-y)**2)
        #print("I am cost: ",cost)
        return cost

    def backprop(self, theta, training_data, y):
        """
        :param x:
        :param y:
        :return:
        """
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        records = training_data.shape[0]
        tmp  = []
        #print("I am theta: ",theta)
        #print(theta.shape)
        for i in range(self.num_layers-1):
            tmp.append(np.reshape(theta[:self.sizes[i]*self.sizes[i+1]],(self.sizes[i+1],self.sizes[i])))
        #print("start")
        theta = tmp
        self.weights = theta
        # 前向传输
        activation = training_data
        # 储存每层的神经元的值的矩阵，下面循环会 append 每层的神经元的值
        activations = [training_data]
        # 储存每个未经过 sigmoid 计算的神经元的值
        zs = []
        for b, w in zip(self.biases, theta):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # 求 δ 的值
        delta = self.cost_derivative(activations[-1], y) * \
            sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        # 乘于前一层的输出值
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        #print()
        #print("delta: ",delta.shape)
        #print("activations[-2]: ",activations[-2].shape)
        #print("nabla_w[-1]: ",nabla_w[-1].shape)
        #print()
        for l in range(2, self.num_layers):
            # 从倒数第 **l** 层开始更新，**-l** 是 python 中特有的语法表示从倒数第 l 层开始计算
            # 下面这里利用 **l+1** 层的 δ 值来计算 **l** 的 δ 值
            z = zs[-l]
            sp = sigmoid_prime(z)
            #print("delta1: ",delta.shape)
            #print("theta[-l+1].transpose(): ",theta[-l+1].transpose().shape)
            delta = np.dot(theta[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            #print(delta.shape)
            #print(activations[-l-1].transpose().shape)
            #print("delta2: ",delta.shape)
            #print("activations[-l-1].transpose(): ",activations[-l-1].transpose().shape)
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
            #print("nabla_w[-l]: ",nabla_w[-l].shape)
        #print(type(records))
        #print("np.array(nabla_w): ",np.array(nabla_w)
        nabla_b = np.array(nabla_b)/records
        nabla_w = np.array(nabla_w)/records
        #print("nabla_b_o: ",nabla_b.shape)
        nabla_b,nabla_w = flattenParams(nabla_b)[0],flattenParams(nabla_w)[0]
        #print("nabla_b: ",nabla_b.shape,"nabla_w: ",nabla_w.shape)
        #grad = np.concatenate((nabla_w,nabla_b),axis=0)
        #print(grad.shape)
        return nabla_w

    def evaluate(self, test_data):
        # 获得预测结果
        for  x,y in test_data:
            #print(x.shape)
            tmp =  self.feedforward(x)
            #print("I amd out shape: ", tmp.shape)
            #break
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y))
                        for (x, y) in test_data]
        # 返回正确识别的个数
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """
        二次损失函数
        :param output_activations:
        :param y:
        :return:
        """
        #print("I am first: ",output_activations.shape)
        #print(y.shape)
        #print("I am temp cost: ",output_activations-y)
        return (output_activations-y)

#### Miscellaneous functions
def sigmoid(z):
    """
    求 sigmoid 函数的值
    :param z:
    :return:
    """
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """
    求 sigmoid 函数的导数
    :param z:
    :return:
    """
    return sigmoid(z)*(1-sigmoid(z))

In [6]:
net = Network([128, 30, 10])
test_data_used = []
for data,label in zip(test_data,test_label):
    test_data_used.append((np.expand_dims(data,axis=1),np.expand_dims(label,axis=1)))
test_data_used = np.array(test_data_used)
#print(test_data_used[:1].shape)
#print(type(test_data_used))
#for x,y in test_data_used:
#    print(x.shape)
#    print(y.shape)
#    break
net.SGD(train_data.T,train_label.T,test_data=test_data_used)



n_test is:  12000




Accury is: 16.666666666666664%
