<h2 class="devsite-page-title"><span style="color: #ffff99;">tf.keras.preprocessing.sequence.pad_sequences</span></h2>
<ul>
<li>
<pre class="prettyprint language-python language-python" data-language="python"><span style="color: #ffffff;">tf<span class="token punctuation">.</span>keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>
    sequences<span class="token punctuation">,</span>
    maxlen<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
    dtype<span class="token operator">=</span><span class="token string">'int32'</span><span class="token punctuation">,</span>
    padding<span class="token operator">=</span><span class="token string">'pre'</span><span class="token punctuation">,</span>
    truncating<span class="token operator">=</span><span class="token string">'pre'</span><span class="token punctuation">,</span>
    value<span class="token operator">=</span><span class="token number">0.0</span>
<span class="token punctuation">)</span></span></pre>
<ul>
<li>
<p><span style="color: #ccffff;">Pads sequences to the same length.</span></p>
<p><span style="color: #ccffff;">This function transforms a list of&nbsp;<code>num_samples</code>&nbsp;sequences (lists of integers) into a 2D Numpy array of shape&nbsp;<code>(num_samples, num_timesteps)</code>.&nbsp;<code>num_timesteps</code>&nbsp;is either the&nbsp;<code>maxlen</code>&nbsp;argument if provided, or the length of the longest sequence otherwise.</span></p>
<p><span style="color: #ccffff;">Sequences that are shorter than&nbsp;<code>num_timesteps</code>&nbsp;are padded with&nbsp;<code>value</code>&nbsp;at the end.</span></p>
<p><span style="color: #ccffff;">Sequences longer than&nbsp;<code>num_timesteps</code>&nbsp;are truncated so that they fit the desired length. The position where padding or truncation happens is determined by the arguments&nbsp;<code>padding</code>&nbsp;and&nbsp;<code>truncating</code>, respectively.</span></p>
<p><span style="color: #ccffff;">Pre-padding is the default.</span></p>
</li>
<li><span style="color: #ffff99;"><strong><code>sequences</code></strong>: List of lists, where each element is a sequence.</span></li>
<li><span style="color: #ffff99;"><strong><code>maxlen</code></strong>: Int, maximum length of all sequences.</span></li>
<li><span style="color: #ffff99;"><strong><code>dtype</code></strong>: Type of the output sequences.</span></li>
<li><span style="color: #ffff99;"><strong><code>padding</code></strong>: String, 'pre' or 'post': pad either before or after each sequence.</span></li>
<li><span style="color: #ffff99;"><strong><code>truncating</code></strong>: String, 'pre' or 'post': remove values from sequences larger than&nbsp;<code>maxlen</code>, either at the beginning or at the end of the sequences.</span></li>
<li><span style="color: #ffff99;"><strong><code>value</code></strong>: Float, padding value.</span></li>
<li>
<p>&nbsp;</p>
<h4 id="returns"><span style="color: #ff9900;">Returns:</span></h4>
<ul>
<li><span style="color: #ffff99;"><strong><code>x</code></strong>: Numpy array with shape&nbsp;</span><code><span style="color: #ffff99;">(len(sequences), maxlen)</span></code>
<h4 id="returns">&nbsp;</h4>
<pre class="prettyprint language-python language-python" data-language="python">&nbsp;</pre>
</li>
</ul>
</li>
</ul>
</li>
</ul>

In [1]:


from keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence #note that from keras.preprocessing import sequence gives an error,tensorflow should be put before that

max_features = 10000
max_len = 500

print('Loading data...')
# (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
imdb_path='D:\CSV original\imdb.npz'
(x_train, y_train), (x_test, y_test) = imdb.load_data(path=imdb_path,num_words=max_features)    # Loads the data as lists of integers
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Padding sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Loading data...
25000 train sequences
25000 test sequences
Padding sequences (samples x time)
x_train shape: (25000, 500)
x_test shape: (25000, 500)


In [2]:
from keras.models import Sequential
from keras import layers
from keras import regularizers
from keras import callbacks
from keras.optimizers import Adam

<h2><span style="color: #ff9900;">multi layer GRU</span></h2>

In [3]:
model = Sequential()
model.add(layers.Embedding(max_features, 128, input_length=max_len, embeddings_regularizer=regularizers.l2(1e-3)))
model.add(layers.GRU(32, return_sequences=True))
model.add(layers.Dropout(0.5))
model.add(layers.GRU(32, return_sequences=True))
model.add(layers.Dropout(0.5))
model.add(layers.GRU(32))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

model.compile(optimizer=Adam(),
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(x_train, y_train,
                    epochs=10,
                    batch_size=128,
                    validation_data=(x_test, y_test),
                    callbacks=callbacks.TensorBoard(log_dir='logs/RNN'))

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 gru (GRU)                   (None, 500, 32)           15552     
                                                                 
 dropout (Dropout)           (None, 500, 32)           0         
                                                                 
 gru_1 (GRU)                 (None, 500, 32)           6336      
                                                                 
 dropout_1 (Dropout)         (None, 500, 32)           0         
                                                                 
 gru_2 (GRU)                 (None, 32)                6336      
                                                                 
 dropout_2 (Dropout)         (None, 32)                0

<ul>
<li><span style="color: #ffff99;">embedding layer parameters no: 10000(words) * 128 (features)=1280000</span></li>
<li><span style="color: #ffff99;">GRU: 3*(32*32+32*128+2*32)=15552</span></li>
</ul>
<p>&nbsp;</p>

<h2><span style="color: #ff9900;">1D convnet</span></h2>

In [4]:
model = Sequential()
model.add(layers.Embedding(max_features, 128, input_length=max_len, embeddings_regularizer=regularizers.l2(1e-3)))
model.add(layers.Conv1D(32, 9, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.MaxPooling1D(2))
model.add(layers.Conv1D(32, 9, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

model.compile(optimizer=Adam(),
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(x_train, y_train,
                    epochs=10,
                    batch_size=128,
                    validation_data=(x_test, y_test),
                    callbacks=callbacks.TensorBoard(log_dir='logs/CNN'))

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 500, 128)          1280000   
                                                                 
 conv1d (Conv1D)             (None, 492, 32)           36896     
                                                                 
 dropout_3 (Dropout)         (None, 492, 32)           0         
                                                                 
 max_pooling1d (MaxPooling1D  (None, 246, 32)          0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 238, 32)           9248      
                                                                 
 dropout_4 (Dropout)         (None, 238, 32)           0         
                                                      

<h2><span style="color: #ff9900;">1D convnet + GRU</span></h2>

In [5]:
model = Sequential()
model.add(layers.Embedding(max_features, 128, input_length=max_len, embeddings_regularizer=regularizers.l2(1e-3)))
model.add(layers.Conv1D(32, 9, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.MaxPooling1D(2))
model.add(layers.Conv1D(32, 9, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.GRU(32))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

model.compile(optimizer=Adam(),
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(x_train, y_train,
                    epochs=10,
                    batch_size=128,
                    validation_data=(x_test, y_test),
                    callbacks=callbacks.TensorBoard(log_dir='logs/CNN_RNN'))

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 500, 128)          1280000   
                                                                 
 conv1d_2 (Conv1D)           (None, 492, 32)           36896     
                                                                 
 dropout_6 (Dropout)         (None, 492, 32)           0         
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 246, 32)          0         
 1D)                                                             
                                                                 
 conv1d_3 (Conv1D)           (None, 238, 32)           9248      
                                                                 
 dropout_7 (Dropout)         (None, 238, 32)           0         
                                                      

In [6]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [8]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 11364), started 0:09:56 ago. (Use '!kill 11364' to kill it.)