In [1]:
"""
神经网络的定义方式，获取参数，保存参数
"""

'\n神经网络的定义方式，获取参数，保存参数\n'

In [1]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
%run read_fashion_image.ipynb

x_train, y_train, x_test, y_test = get_data()

In [3]:
x_train = x_train / 255.0
x_test = x_test / 255.0

In [4]:
x_train = np.array([x.reshape(28,28) for x in x_train ])
x_test = np.array([x.reshape(28,28) for x in x_test ])

In [5]:
# 方式1：一次性定义全部网络结构
model = tf.keras.models.Sequential([
                                    tf.keras.layers.Flatten(input_shape=(28, 28)),
                                    tf.keras.layers.Dense(256, activation='relu',),
                                    tf.keras.layers.Dense(10, activation='softmax')
                                    ])

In [25]:
# 方式2：以序列方式定义，分别定义每一层
class MLP(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        flatten_layer = tf.keras.layers.Flatten()
        def dense_layer(name=None):
            if not name:
                return tf.keras.layers.Dense(256, activation=tf.nn.relu)
            else:
                return tf.keras.layers.Dense(256, activation=tf.nn.relu, name=name)
        dropout_layer = tf.keras.layers.Dropout(0.5)
        output_layer = tf.keras.layers.Dense(10, activation='softmax')

        self.model = tf.keras.models.Sequential()
        self.model.add(flatten_layer)
        for i in range(2):
#             self.model.add(dense_layer("dense%d" % i)) # 每一层起不同的名字
            self.model.add(dense_layer()) # 每一层起不同的名字
            self.model.add(dropout_layer)
        self.model.add(output_layer)
    
    def call(self, inputs):
        return self.model(inputs)
    
model = MLP()
type(model)

__main__.MLP

In [21]:
# 方式3：以类的方式定义，更工程化
class MLP(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.flatten_layer = tf.keras.layers.Flatten()
        self.dense_layer1 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.dense_layer2 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        self.dropout_layer = tf.keras.layers.Dropout(0.5)
        self.output_layer = tf.keras.layers.Dense(10, activation='softmax')
        
    def call(self, inputs, training=False):
        x = self.flatten_layer(inputs)
        x = self.dense_layer1(x)
        x = self.dense_layer2(x) 
        # 如果只写成dense_layer，并调用两次 x = dense_layer(x) 会报错
        # 因为第一次执行这句后，dense_layer会变成一个神经网络层，输入是784维，所以再次调用时，输入维度不正确，就会报错
        output = self.output_layer(x)     
        return output
    
model = MLP()
type(model)

__main__.MLP

In [26]:
model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.5),
             loss = 'sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [27]:
model.fit(x_train, 
          y_train, 
          epochs=5,
          batch_size=256,
          validation_data=(x_test, y_test),
          validation_freq=1
          )

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0xb3aaeb668>

## 获取每一层的权重系数

In [28]:
len(model.weights)

6

In [29]:
model.weights[0]

<tf.Variable 'mlp_6/sequential_1/dense_14/kernel:0' shape=(784, 256) dtype=float32, numpy=
array([[ 0.00294566,  0.05004987,  0.00271001, ...,  0.03959084,
        -0.05993354, -0.06199143],
       [ 0.05427834,  0.06871633,  0.06340374, ..., -0.05930483,
         0.04408232, -0.02000068],
       [-0.06381077,  0.0587069 , -0.01078447, ..., -0.0060904 ,
        -0.01464957, -0.02623158],
       ...,
       [ 0.02187521, -0.03366455,  0.0260867 , ...,  0.05555315,
         0.03741949,  0.01170047],
       [-0.00589255,  0.0134085 , -0.01863818, ..., -0.04816734,
        -0.0276657 ,  0.02965577],
       [-0.02482702,  0.03131078,  0.03506022, ..., -0.05677408,
        -0.07807521,  0.00352092]], dtype=float32)>

In [30]:
model.weights[1]

<tf.Variable 'mlp_6/sequential_1/dense_14/bias:0' shape=(256,) dtype=float32, numpy=
array([ 0.11102301,  0.04962588,  0.10887065, -0.05661264,  0.0093953 ,
        0.00237108,  0.07363104,  0.01493046, -0.02420369, -0.13762571,
        0.03165716,  0.02491756, -0.03754677, -0.02195394,  0.09384637,
        0.17899649,  0.09422062, -0.13498096,  0.05634186, -0.02349562,
        0.16778252,  0.14011617,  0.07608903,  0.06117399, -0.13008495,
       -0.02320166, -0.01270711,  0.39943975, -0.01971927,  0.05362552,
        0.20469756,  0.04425382, -0.33079726,  0.00875897,  0.05691639,
        0.07537393,  0.22522433,  0.13301359,  0.0330934 ,  0.16991006,
       -0.01340355, -0.01575116,  0.10794733, -0.02128834,  0.11518096,
        0.12628207, -0.01013546, -0.09367588,  0.08754671, -0.03214873,
        0.10295682,  0.06585162, -0.0263508 , -0.08750086,  0.11789899,
        0.15809304,  0.08076334, -0.01024261,  0.2851429 ,  0.04711298,
       -0.01060936,  0.18932113,  0.03836387,  0.12

## save numpy variable

In [38]:
tensor = tf.ones((2,2))
print(tensor)
np.save("test.npy", tensor)

tf.Tensor(
[[1. 1.]
 [1. 1.]], shape=(2, 2), dtype=float32)


In [39]:
# load 
tensor = np.load("test.npy")
tensor

array([[1., 1.],
       [1., 1.]], dtype=float32)

## save model parameters

In [40]:
model.save_weights("model.h5")

In [42]:
model.predict(x_test[:5])

array([[8.8569112e-05, 6.1693376e-05, 2.7899505e-05, 7.8957237e-05,
        3.2714339e-05, 1.8832438e-01, 2.9896195e-05, 2.3371458e-01,
        1.2878830e-03, 5.7635337e-01],
       [1.4052604e-03, 1.6857641e-05, 9.5151764e-01, 7.2085073e-05,
        4.1229506e-03, 2.7934652e-07, 4.2829890e-02, 7.5904501e-07,
        3.2777960e-05, 1.4482712e-06],
       [2.3909356e-08, 1.0000000e+00, 4.0523943e-10, 3.5288792e-08,
        1.8345388e-10, 2.0642683e-14, 3.3352640e-11, 3.1342438e-14,
        9.0871261e-13, 2.2048415e-14],
       [2.3080089e-08, 9.9999976e-01, 7.7677798e-10, 2.7649963e-07,
        7.8283746e-10, 1.7237976e-13, 1.2632465e-10, 3.3389960e-13,
        3.4676006e-12, 2.6574381e-13],
       [2.6186955e-01, 1.6260222e-03, 1.5092772e-01, 2.5567267e-02,
        1.6019188e-02, 1.0647790e-03, 5.2746540e-01, 6.6100073e-04,
        1.4291835e-02, 5.0711236e-04]], dtype=float32)

## load model parameters

In [50]:
model = MLP()
model(x_train[:5])
model.load_weights("model.h5")



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



In [51]:
model.predict(x_test[:5])

array([[8.8569112e-05, 6.1693376e-05, 2.7899505e-05, 7.8957237e-05,
        3.2714339e-05, 1.8832438e-01, 2.9896195e-05, 2.3371458e-01,
        1.2878830e-03, 5.7635337e-01],
       [1.4052604e-03, 1.6857641e-05, 9.5151764e-01, 7.2085073e-05,
        4.1229506e-03, 2.7934652e-07, 4.2829890e-02, 7.5904501e-07,
        3.2777960e-05, 1.4482712e-06],
       [2.3909356e-08, 1.0000000e+00, 4.0523943e-10, 3.5288792e-08,
        1.8345388e-10, 2.0642683e-14, 3.3352640e-11, 3.1342438e-14,
        9.0871261e-13, 2.2048415e-14],
       [2.3080089e-08, 9.9999976e-01, 7.7677798e-10, 2.7649963e-07,
        7.8283746e-10, 1.7237976e-13, 1.2632465e-10, 3.3389960e-13,
        3.4676006e-12, 2.6574381e-13],
       [2.6186955e-01, 1.6260222e-03, 1.5092772e-01, 2.5567267e-02,
        1.6019188e-02, 1.0647790e-03, 5.2746540e-01, 6.6100073e-04,
        1.4291835e-02, 5.0711236e-04]], dtype=float32)

In [None]:
"""
问题：搞清楚单层全连接神经网络的权重系数有多少，矩阵结构是怎样的。
  比如：同一层里的多个神经元，bias有多少个，每个神经元都不同还是共享的？明显是不共享的。
  
"""