In [1]:
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

# 加载数据，准备数据集

In [5]:
batchsz = 128
total_words = 10000
max_review_len = 80
# 也许会遇到打不开文件的错误，可以参考：
# https://stackoverflow.com/questions/55890813/how-to-fix-object-arrays-cannot-be-loaded-when-allow-pickle-false-for-imdb-loa
(x_train, y_train),(x_test, y_test) = keras.datasets.imdb.load_data(num_words = total_words)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)

db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(10000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.shuffle(10000).batch(batchsz, drop_remainder=True)

print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(x_train))
print('x_test shape:', x_test.shape)

x_train shape: (25000, 80) tf.Tensor(1, shape=(), dtype=int64) tf.Tensor(0, shape=(), dtype=int32)
x_test shape: (25000, 80)


# 创建模型

In [7]:
class MyRNN(keras.Model):
    
    def __init__(self, embedding_dim, units):
        """
        embedding_dim:　嵌入层的神经元数量
        units: RNN层的神经元数量
        """
        super(MyRNN, self).__init__()
        
        # 输入-> Embedding，word2vec的过程
        # [b,80] -> [b,80,100]
        self.embedding = layers.Embedding(total_words, # 词汇表的尺寸
                                          embedding_dim, # embedding层的神经元数量(维度)
                                         input_length=max_review_len) # 每个输入的单词数量
        
        # Embedding -> RNN（可以有多层）
        # [b,80,100] -> [b,64](units=64)
        # 这里降维不会丢失信息，因为使用更多的神经元来表达
        # 80维度的消失是因为在cell0中会逐个单词进行处理：处理过的维度自然就不需要了
        self.rnn_cell0 = layers.SimpleRNNCell(units)
        
        # RNN -> Dense
        # [b,64] -> [b,]
        self.outlayer = layers.Dense(1, activation="sigmoid")
    
    # 前向计算
    def call(self, inputs, training=True):
        """
        inputs: [b,80]
        training: 鉴别处于训练状态还是预测状态
        """
        
        # 输入的word2vec
        
        # RNN的前向累积计算
        
        # 输出结果

# 模型训练

In [None]:
model = MyRNN(100,64)
model.compile()
model.fit()