# 神经网络基础

## 深度神经网络

- Big Data
- Relu
- Dropout
- BatchNorm
- ResNet
- Xavier Initialization
- Caffe/Tensorflow/PyTorch


In [1]:
import tensorflow as tf


In [2]:
x = tf.random.normal([4, 784])
net = tf.keras.layers.Dense(512)
out = net(x)

print("After net transform, shape of out : ", out.shape)
print("Shape of w : ", net.kernel.shape)
print("Shape of bias : ", net.bias.shape)


After net transform, shape of out :  (4, 512)
Shape of w :  (784, 512)
Shape of bias :  (512,)


In [3]:
net = tf.keras.layers.Dense(10)


In [4]:
# bias 还未创建
net.bias


AttributeError: 'Dense' object has no attribute 'bias'

In [5]:
print("net's weight: ", net.get_weights())
print("net's weight: ", net.weights)


net's weight:  []
net's weight:  []


In [6]:
# build net , 可以多次创建
net.build(input_shape=(None, 4))
print("Shape of w : ", net.kernel.shape)
print("Shape of bias : ", net.bias.shape)


Shape of w :  (4, 10)
Shape of bias :  (10,)


In [7]:
net.build(input_shape=(None, 20))
print("Shape of w : ", net.kernel.shape)
print("Shape of bias : ", net.bias.shape)


Shape of w :  (20, 10)
Shape of bias :  (10,)


In [8]:
# Sequential 容器

x = tf.random.normal([2, 3])

model = tf.keras.Sequential([
    tf.keras.layers.Dense(2, activation='relu'),
    tf.keras.layers.Dense(2, activation='relu'),
    tf.keras.layers.Dense(2),
])

model.build(input_shape=[None, 3])
print("model Summary : \n", model.summary())


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              multiple                  8         
_________________________________________________________________
dense_3 (Dense)              multiple                  6         
_________________________________________________________________
dense_4 (Dense)              multiple                  6         
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________
model Summary : 
 None


In [9]:
for p in model.trainable_variables:
    print(p.name, p.shape)



dense_2/kernel:0 (3, 2)
dense_2/bias:0 (2,)
dense_3/kernel:0 (2, 2)
dense_3/bias:0 (2,)
dense_4/kernel:0 (2, 2)
dense_4/bias:0 (2,)


## 输出方式

- $y \in R^{d}$
    - linear regression
    - naive classification with MSE
- $y_i \in [0, 1]$
    - binary classification
    - image generation(rgb)
    - $\sigma$ - function
- $y_i \in [-1, 1]$ 
    - tanh


In [11]:
# sigmoid function

a = tf.linspace(-6., 6, 10)
print("sigmoid(a) = ", tf.sigmoid(a))


sigmoid(a) =  tf.Tensor(
[0.00247264 0.00931597 0.03444517 0.11920291 0.33924365 0.6607564
 0.8807971  0.96555483 0.99068403 0.9975274 ], shape=(10,), dtype=float32)


In [13]:
x = tf.random.normal([1, 28, 28]) * 5

tf.reduce_min(x), tf.reduce_max(x)


(<tf.Tensor: id=264, shape=(), dtype=float32, numpy=-20.29687>,
 <tf.Tensor: id=266, shape=(), dtype=float32, numpy=15.087424>)

In [14]:
x = tf.sigmoid(x)

tf.reduce_min(x), tf.reduce_max(x)


(<tf.Tensor: id=299, shape=(), dtype=float32, numpy=0.0>,
 <tf.Tensor: id=301, shape=(), dtype=float32, numpy=0.99999976>)

In [15]:
b = tf.linspace(-2., 2, 5)
tf.nn.softmax(b)



<tf.Tensor: id=352, shape=(5,), dtype=float32, numpy=
array([0.01165623, 0.03168492, 0.08612854, 0.23412167, 0.6364086 ],
      dtype=float32)>

In [16]:
logits = tf.random.uniform([1, 10], minval=-2, maxval=2)
print("Logits : ", logits)


Logits :  tf.Tensor(
[[-0.9800911   1.3957143  -1.9753623  -0.734632   -0.7043357  -0.42098284
   0.12055969 -0.05920219  1.0760355   0.27005243]], shape=(1, 10), dtype=float32)


In [17]:
prob = tf.nn.softmax(logits, axis=1)
print("Probability of logits : ", prob)

print("Sum of probs :", tf.reduce_sum(prob, axis=1))


Probability of logits :  tf.Tensor(
[[0.03003159 0.3231302  0.01110037 0.03838662 0.03956739 0.05252862
  0.09027861 0.075425   0.23471604 0.10483559]], shape=(1, 10), dtype=float32)
Sum of probs : tf.Tensor([1.], shape=(1,), dtype=float32)


## 误差计算

- MSE 
- Cross Entropy


In [18]:
a = tf.fill([4], 0.25)
print("Entropy of a, ", -tf.reduce_sum(a * tf.math.log(a) / tf.math.log(2.)))


Entropy of a,  tf.Tensor(2.0, shape=(), dtype=float32)


In [19]:
a = tf.constant([0.1, 0.1, 0.1, 0.7])
print("Entropy of a, ", -tf.reduce_sum(a * tf.math.log(a) / tf.math.log(2.)))



Entropy of a,  tf.Tensor(1.3567796, shape=(), dtype=float32)


In [20]:
a = tf.constant([0.01, 0.01, 0.01, 0.97])
print("Entropy of a, ", -tf.reduce_sum(a * tf.math.log(a) / tf.math.log(2.)))


Entropy of a,  tf.Tensor(0.24194068, shape=(), dtype=float32)


In [21]:
# 交叉熵
tf.losses.categorical_crossentropy([0, 1, 0, 0], [0.25, 0.25, 0.25, 0.25])


<tf.Tensor: id=758, shape=(), dtype=float32, numpy=1.3862944>

In [22]:
tf.losses.categorical_crossentropy([0, 1, 0, 0], [0.1, 0.7, 0.1, 0.1])



<tf.Tensor: id=839, shape=(), dtype=float32, numpy=0.35667497>