In [1]:
# 前向传播

In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'

(x,y ),_ = datasets.mnist.load_data()

x = tf.convert_to_tensor(x ,dtype=tf.float32)/255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)

(60000, 28, 28) (60000,) <dtype: 'float32'> <dtype: 'int32'>


In [7]:
print(tf.reduce_max(x), tf.reduce_min(x))
print(tf.reduce_max(y), tf.reduce_min(y))

tf.Tensor(1.0, shape=(), dtype=float32) tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(9, shape=(), dtype=int32) tf.Tensor(0, shape=(), dtype=int32)


In [8]:
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:',sample[0].shape, sample[1].shape)

batch: (128, 28, 28) (128,)


In [23]:
w1 = tf.Variable(tf.random.truncated_normal([784,256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [26]:
lr = 1e-3
for epoch in range(10):
    for step, (x,y) in enumerate(train_db):
        x = tf.reshape(x, [-1,28*28])
        with tf.GradientTape() as tape:
            h1 = x @ w1 + tf.broadcast_to(b1, [x.shape[0],256])
            h1 = tf.nn.relu(h1)

            h2 = h1 @ w2 +b2
            h2 = tf.nn.relu(h2)
            out = h2 @ w3 + b3

            # mse
            y_onehot = tf.one_hot(y, depth=10)

            loss = tf.square(y_onehot-out)
            loss = tf.reduce_mean(loss)

        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # print(grads)
        # w1 = w1 - lr * grads[0]
        # b1 = b1 - lr * grads[1]
        # w2 = w2 - lr * grads[2]
        # b2 = b2 - lr * grads[3]
        # w3 = w3 - lr * grads[4]
        # b3 = b3 - lr * grads[5]
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])

        # print(isinstance(b3, tf.Variable))
        # print(isinstance(b3, tf.Tensor))

        if step % 100 == 0:
            print(step, 'loss:', float(loss))


0 loss: 0.14274731278419495
100 loss: 0.14764165878295898
200 loss: 0.15454457700252533
300 loss: 0.12773576378822327
400 loss: 0.148223876953125
0 loss: 0.12292657047510147
100 loss: 0.13066372275352478
200 loss: 0.13578681647777557
300 loss: 0.11325784772634506
400 loss: 0.1321716606616974
0 loss: 0.10991337150335312
100 loss: 0.11922131478786469
200 loss: 0.1228143721818924
300 loss: 0.1032351404428482
400 loss: 0.12097344547510147
0 loss: 0.10053443908691406
100 loss: 0.1109633818268776
200 loss: 0.11329660564661026
300 loss: 0.09593107551336288
400 loss: 0.11266548931598663
0 loss: 0.09353484958410263
100 loss: 0.10468713939189911
200 loss: 0.1059718132019043
300 loss: 0.09029517322778702
400 loss: 0.106240414083004
0 loss: 0.08804386109113693
100 loss: 0.09969504177570343
200 loss: 0.10020148754119873
300 loss: 0.0858243927359581
400 loss: 0.10115544497966766
0 loss: 0.08363509923219681
100 loss: 0.09564249962568283
200 loss: 0.09543633460998535
300 loss: 0.0822121649980545
400 l

合并与切割

In [29]:
a = tf.ones([4,35,8])
b = tf.ones([2,35,8])
c = tf.concat([a, b], axis=0)
c.shape

TensorShape([6, 35, 8])

In [30]:
a = tf.ones([4,32,8])
b = tf.ones([4,2,8])
c = tf.concat([a,b], axis=1)
c.shape

TensorShape([4, 34, 8])

In [31]:
a = tf.ones([4,35,8])
b = tf.ones([4,35,8])
c = tf.concat([a,b], axis=-1)
c.shape

TensorShape([4, 35, 16])

In [33]:
c = tf.stack([a,b], axis=0)
c.shape

TensorShape([2, 4, 35, 8])

In [34]:
c = tf.stack([a,b], axis=3)
c.shape

TensorShape([4, 35, 8, 2])

In [35]:
a = tf.ones([4,35,8])
b = tf.ones([4,35,8])
c = tf.stack([a,b])
c.shape

TensorShape([2, 4, 35, 8])

In [36]:
aa, bb = tf.unstack(c)
aa.shape, bb.shape

(TensorShape([4, 35, 8]), TensorShape([4, 35, 8]))

In [42]:
res = tf.unstack(c, axis=3)
res[0].shape, len(res)

(TensorShape([2, 4, 35]), 8)

In [43]:
res = tf.split(c, axis=3, num_or_size_splits=2)
len(res)

2

In [45]:
res[0].shape

TensorShape([2, 4, 35, 4])

In [46]:
res = tf.split(c, axis=3, num_or_size_splits=[2,2,4])
res[0].shape

TensorShape([2, 4, 35, 2])

数据统计