In [1]:
import pandas as pd

In [2]:
reviews_df = pd.read_csv('movie_data.csv', encoding='utf-8')

In [3]:
label = reviews_df.pop('sentiment')

In [4]:
import tensorflow as tf

In [5]:
ds_raw = tf.data.Dataset.from_tensor_slices((reviews_df.values, label.values))

In [6]:
ds_raw_count = label.count()

In [7]:
tf.random.set_seed(42)

In [8]:
ds_raw = ds_raw.shuffle(ds_raw_count, reshuffle_each_iteration = False)

In [9]:
test_count = int(0.5 * ds_raw_count)
train_count = int(0.8 * (ds_raw_count - test_count))
valid_count = int(ds_raw_count - test_count - train_count)

In [10]:
train_count, valid_count, test_count

(20000, 5000, 25000)

In [11]:
ds_test_raw = ds_raw.take(test_count)
ds_train_and_valid_raw = ds_raw.skip(test_count)
ds_train_raw = ds_train_and_valid_raw.take(train_count)
ds_valid_raw = ds_train_and_valid_raw.skip(train_count)

In [12]:
import tensorflow_datasets as tfds
from collections import Counter

In [13]:
tokenizer = tfds.features.text.Tokenizer()
word_counts = Counter()

In [14]:
for review in ds_train_raw.as_numpy_iterator():
    words = tokenizer.tokenize(review[0][0])
    word_counts.update(words)

In [15]:
for review in ds_train_raw.take(5).as_numpy_iterator():
    print(review[0][0], review[1])

b'When I fist watched the movie, I said to myself, "so a film can be made like this." Wong Kar Wai\'s gorgeous poetic love story captured me throughout and even after the film. I must admit this is one of the best love movies, maybe the best of all, I have ever watched. The content and the form overlaps perfectly. As watching the secret love we see the characters in bounded frames that limits their movements as well as their feelings. Beautiful camera angles and the lighting makes the feelings and the blues even touchable. I want to congratulate Christopher Doyle and Pin Bing Lee for their fantastic cinematography which creates the mood for love. Also the music defines the sadness of the love which plays along the beautiful slow motion frames and shows the characters in despairing moods. And of course the performances of the actors which makes the love so real. Eventually, all the elements in the film combined in a perfect way under the direction of WKW and give the audience the feelin

In [16]:
text_encoder = tfds.features.text.TokenTextEncoder(word_counts)

In [17]:
def encode_text(text_tensor, label):
    text = text_tensor.numpy()[0]
    encoded_text = text_encoder.encode(text)
    return encoded_text, label

In [18]:
for review in ds_train_raw.take(5):
    print(encode_text(review[0], review[1]))

([1, 2, 3, 4, 5, 6, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 5, 12, 2, 32, 33, 17, 34, 35, 36, 5, 37, 24, 38, 39, 5, 37, 36, 40, 2, 41, 42, 4, 43, 44, 29, 5, 45, 46, 47, 48, 49, 5, 50, 24, 51, 52, 5, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 61, 59, 63, 64, 65, 66, 29, 5, 67, 68, 5, 63, 29, 5, 69, 30, 70, 2, 71, 8, 72, 73, 74, 29, 75, 76, 77, 78, 59, 79, 80, 81, 82, 5, 83, 78, 24, 84, 5, 85, 86, 5, 87, 36, 5, 24, 81, 88, 89, 5, 90, 91, 92, 56, 29, 93, 5, 53, 54, 94, 95, 96, 36, 97, 5, 98, 36, 5, 99, 81, 68, 5, 24, 10, 100, 101, 40, 5, 102, 54, 5, 12, 103, 54, 11, 104, 105, 106, 5, 107, 36, 108, 29, 109, 5, 110, 5, 111, 112, 24], <tf.Tensor: shape=(), dtype=int64, numpy=1>)
([113, 114, 115, 116, 117, 11, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 5, 128, 36, 5, 6, 129, 130, 131, 5, 132, 133, 134, 135, 6, 136, 137, 138, 54, 61, 139, 61, 136, 13, 140, 141, 6, 34, 10, 142, 57, 136, 143, 144, 8, 145, 146, 124, 147, 29, 148, 149

In [19]:
def encode_text_map(text_tensor, label):
    return tf.py_function(encode_text, inp=[text_tensor, label], Tout=[tf.int64, tf.int64])

In [20]:
ds_train_encoded = ds_train_raw.map(encode_text_map)
ds_valid_encoded = ds_valid_raw.map(encode_text_map)
ds_test_encoded = ds_test_raw.map(encode_text_map)

In [21]:
for review in ds_train_encoded.take(5).as_numpy_iterator():
    print(review[0], review[1])

[  1   2   3   4   5   6   2   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31   5  12   2  32
  33  17  34  35  36   5  37  24  38  39   5  37  36  40   2  41  42   4
  43  44  29   5  45  46  47  48  49   5  50  24  51  52   5  53  54  55
  56  57  58  59  60  61  62  61  59  63  64  65  66  29   5  67  68   5
  63  29   5  69  30  70   2  71   8  72  73  74  29  75  76  77  78  59
  79  80  81  82   5  83  78  24  84   5  85  86   5  87  36   5  24  81
  88  89   5  90  91  92  56  29  93   5  53  54  94  95  96  36  97   5
  98  36   5  99  81  68   5  24  10 100 101  40   5 102  54   5  12 103
  54  11 104 105 106   5 107  36 108  29 109   5 110   5 111 112  24] 1
[113 114 115 116 117  11 118 119 120 121 122 123 124 125 126 127   5 128
  36   5   6 129 130 131   5 132 133 134 135   6 136 137 138  54  61 139
  61 136  13 140 141   6  34  10 142  57 136 143 144   8 145 146 124 147
  29 148 149   5 150  36  17  12 151 152 152 153 136

In [None]:
ds_train = ds_train_encoded.padded_batch(32, padded_shapes=([-1],[]))
ds_valid = ds_valid_encoded.padded_batch(32, padded_shapes=([-1],[]))
ds_test = ds_test_encoded.padded_batch(32, padded_shapes=([-1],[]))

In [23]:
for review in ds_train.take(5).as_numpy_iterator():
    print(review[0][0])

[  1   2   3   4   5   6   2   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31   5  12   2  32
  33  17  34  35  36   5  37  24  38  39   5  37  36  40   2  41  42   4
  43  44  29   5  45  46  47  48  49   5  50  24  51  52   5  53  54  55
  56  57  58  59  60  61  62  61  59  63  64  65  66  29   5  67  68   5
  63  29   5  69  30  70   2  71   8  72  73  74  29  75  76  77  78  59
  79  80  81  82   5  83  78  24  84   5  85  86   5  87  36   5  24  81
  88  89   5  90  91  92  56  29  93   5  53  54  94  95  96  36  97   5
  98  36   5  99  81  68   5  24  10 100 101  40   5 102  54   5  12 103
  54  11 104 105 106   5 107  36 108  29 109   5 110   5 111 112  24   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   

In [24]:
from tensorflow.keras import Sequential

In [25]:
conv_bidir_model = Sequential()

In [25]:
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, LSTM, Bidirectional, GRU, Dense, Flatten

In [27]:
vocab_size = len(word_counts) + 2
embedding_size = 32
conv_bidir_model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size))

In [28]:
conv_bidir_model.add(Conv1D(filters = 64, kernel_size=3, activation='relu'))

In [29]:
conv_bidir_model.add(MaxPooling1D(pool_size=5))

In [30]:
conv_bidir_model.add(Bidirectional(LSTM(units=64, dropout=0.5, recurrent_dropout=0.5, return_sequences=False)))

In [31]:
conv_bidir_model.add(Dense(64, activation='relu')) 

In [32]:
conv_bidir_model.add(Dense(1, activation='sigmoid')) 

In [33]:
conv_bidir_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 32)          2791616   
_________________________________________________________________
conv1d (Conv1D)              (None, None, 64)          6208      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, None, 64)          0         
_________________________________________________________________
bidirectional (Bidirectional (None, 128)               66048     
_________________________________________________________________
dense (Dense)                (None, 64)                8256      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 2,872,193
Trainable params: 2,872,193
Non-trainable params: 0
______________________________________________

In [34]:
conv_bidir_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [26]:
from tensorflow.keras.callbacks import TensorBoard
import time

In [35]:
tensorboard_callback = TensorBoard(log_dir=f'.\logs\mnist{time.time()}', histogram_freq=1, write_graph=True)

In [36]:
conv_bidir_model.fit(ds_train, epochs=7, validation_data=ds_valid, callbacks=[tensorboard_callback])

Epoch 1/7
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<tensorflow.python.keras.callbacks.History at 0x1be8c78a3a0>

In [37]:
loss, accuracy = conv_bidir_model.evaluate(ds_test)



In [38]:
conv_lstm_model = Sequential()

In [39]:
vocab_size = len(word_counts) + 2
embedding_size = 32
conv_lstm_model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size))

In [40]:
conv_lstm_model.add(Conv1D(filters = 64, kernel_size=3, activation='relu'))

In [41]:
conv_lstm_model.add(MaxPooling1D(pool_size=5))

In [42]:
conv_lstm_model.add(LSTM(units=64, dropout=0.5, recurrent_dropout=0.5, return_sequences=False))

In [43]:
conv_lstm_model.add(Dense(64, activation='relu')) 

In [44]:
conv_lstm_model.add(Dense(1, activation='sigmoid')) 

In [45]:
conv_lstm_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 32)          2791616   
_________________________________________________________________
conv1d_1 (Conv1D)            (None, None, 64)          6208      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, None, 64)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 2,835,073
Trainable params: 2,835,073
Non-trainable params: 0
____________________________________________

In [46]:
conv_lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [50]:
tensorboard_callback = TensorBoard(log_dir=f'.\logs\conv_lstm_model', histogram_freq=1, write_graph=True)

In [51]:
conv_lstm_model.fit(ds_train, epochs=7, validation_data=ds_valid, callbacks=[tensorboard_callback])

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<tensorflow.python.keras.callbacks.History at 0x1be8c5091f0>

In [52]:
loss, accuracy = conv_lstm_model.evaluate(ds_test)



In [53]:
conv_model = Sequential()

In [54]:
vocab_size = len(word_counts) + 2
embedding_size = 32
conv_model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size))

In [55]:
conv_model.add(Conv1D(filters = 64, kernel_size=3, activation='relu'))

In [56]:
conv_model.add(MaxPooling1D(pool_size=5))

In [57]:
conv_model.add(Dense(64, activation='relu')) 

In [58]:
conv_model.add(Dense(1, activation='sigmoid')) 

In [59]:
conv_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 32)          2791616   
_________________________________________________________________
conv1d_2 (Conv1D)            (None, None, 64)          6208      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, None, 64)          0         
_________________________________________________________________
dense_4 (Dense)              (None, None, 64)          4160      
_________________________________________________________________
dense_5 (Dense)              (None, None, 1)           65        
Total params: 2,802,049
Trainable params: 2,802,049
Non-trainable params: 0
_________________________________________________________________


In [60]:
conv_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [61]:
tensorboard_callback = TensorBoard(log_dir=f'.\logs\conv_model', histogram_freq=1, write_graph=True)

In [62]:
conv_model.fit(ds_train, epochs=7, validation_data=ds_valid, callbacks=[tensorboard_callback])

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<tensorflow.python.keras.callbacks.History at 0x1bea3f21700>

In [63]:
loss, accuracy = conv_model.evaluate(ds_test)



In [124]:
conv_lstm_bidir_model = Sequential()

In [125]:
vocab_size = len(word_counts) + 2
embedding_size = 32
conv_lstm_bidir_model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size))

In [126]:
conv_lstm_bidir_model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))

In [127]:
conv_lstm_bidir_model.add(MaxPooling1D(pool_size=5))

In [128]:
conv_lstm_bidir_model.add(LSTM(units=32, dropout=0.5, recurrent_dropout=0.5, return_sequences=True))

In [129]:
conv_lstm_bidir_model.add(Bidirectional(LSTM(units=16, dropout=0.5, recurrent_dropout=0.5, return_sequences=False)))

In [130]:
conv_lstm_bidir_model.add(Dense(64, activation='relu')) 

In [131]:
conv_lstm_bidir_model.add(Dense(1, activation='sigmoid')) 

In [132]:
conv_lstm_bidir_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, None, 32)          2791616   
_________________________________________________________________
conv1d_8 (Conv1D)            (None, None, 128)         12416     
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, None, 128)         0         
_________________________________________________________________
lstm_16 (LSTM)               (None, None, 32)          20608     
_________________________________________________________________
bidirectional_8 (Bidirection (None, 32)                6272      
_________________________________________________________________
dense_16 (Dense)             (None, 64)                2112      
_________________________________________________________________
dense_17 (Dense)             (None, 1)                

In [133]:
conv_lstm_bidir_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [134]:
tensorboard_callback = TensorBoard(log_dir='.\logs\conv_lstm_bidir_model', histogram_freq=1, write_graph=True)

In [135]:
conv_lstm_bidir_model.fit(ds_train, epochs=10, validation_data=ds_valid, callbacks=[tensorboard_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1af9eef1190>

In [76]:
loss, accuracy = conv_lstm_bidir_model.evaluate(ds_test)



In [27]:
conv_bidir_lstm_model = Sequential()

In [28]:
vocab_size = len(word_counts) + 2
embedding_size = 32
conv_bidir_lstm_model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size))

In [29]:
conv_bidir_lstm_model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))

In [30]:
conv_bidir_lstm_model.add(MaxPooling1D(pool_size=5))

In [31]:
conv_bidir_lstm_model.add(Bidirectional(LSTM(units=16, dropout=0.5, recurrent_dropout=0.5, return_sequences=True)))

In [32]:
conv_bidir_lstm_model.add(LSTM(units=32, dropout=0.5, recurrent_dropout=0.5, return_sequences=False))

In [33]:
conv_bidir_lstm_model.add(Dense(64, activation='relu')) 

In [34]:
conv_bidir_lstm_model.add(Dense(1, activation='sigmoid')) 

In [35]:
conv_bidir_lstm_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 32)          2791616   
_________________________________________________________________
conv1d (Conv1D)              (None, None, 128)         12416     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, None, 128)         0         
_________________________________________________________________
bidirectional (Bidirectional (None, None, 32)          18560     
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense (Dense)                (None, 64)                2112      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 6

In [36]:
conv_bidir_lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [37]:
tensorboard_callback = TensorBoard(log_dir='.\logs\conv_bidir_lstm_model', histogram_freq=1, write_graph=True)

In [38]:
conv_bidir_lstm_model.fit(ds_train, epochs=8, validation_data=ds_valid, callbacks=[tensorboard_callback])

Epoch 1/8
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x159622ffa90>

In [39]:
loss, accuracy = conv_bidir_lstm_model.evaluate(ds_test)



In [67]:
reviews_test = ds_test.map(lambda x, y: x)

In [213]:
reviews_test

<MapDataset shapes: (None, None), types: tf.int64>

In [211]:
for batch in reviews_test.take(1):
    for review in batch:
        print(review)

tf.Tensor(
[  576    34    54     5   617    36    17  4250    57     5    37   208
     8    14    11  1316   309    54     5 15576    21    11  3546   212
  4641   337   249  1772 22769    29   635    29  1454  2932   337  3171
    78   700    36     5    37   515   985     8  1853     5  3534 16770
    43    37   128    36  6013   215  1304   337   185  8355  7402 12877
   202  5991    11  1705  6448  7475   256     5  4461  1540    36  1823
 22457  2424 87237    29   297 24920 14423   256   367    29  4662   835
    16 14136   130   636 29403 31009 15810  4641    15  4346   385     5
 16432 13851 10116   309     5  3270 29015   256   281  4662 10647  2194
    29 15587     8     5  4781    36 26398    29   265   237  5253 14449
 20702   309  1052  1841   385   244 27330     5 16432 13851    78    11
   315  3575   159   978  1348    57  1115  2963  2013    29  2148   475
    57    13  4840   200   908  2609  3200   460  4375   356   796  2585
   212     5  5253 19282   337 11642    

In [164]:
for batch in reviews_test:
    print(batch.shape)

(32, 834)
(32, 1666)
(32, 671)
(32, 851)
(32, 532)
(32, 536)
(32, 806)
(32, 756)
(32, 860)
(32, 789)
(32, 660)
(32, 1040)
(32, 552)
(32, 392)
(32, 441)
(32, 1033)
(32, 678)
(32, 1098)
(32, 1039)
(32, 826)
(32, 419)
(32, 672)
(32, 1023)
(32, 550)
(32, 887)
(32, 591)
(32, 456)
(32, 726)
(32, 791)
(32, 826)
(32, 687)
(32, 560)
(32, 935)
(32, 569)
(32, 824)
(32, 744)
(32, 474)
(32, 467)
(32, 675)
(32, 629)
(32, 457)
(32, 808)
(32, 414)
(32, 744)
(32, 1051)
(32, 666)
(32, 737)
(32, 1032)
(32, 668)
(32, 527)
(32, 928)
(32, 685)
(32, 712)
(32, 454)
(32, 594)
(32, 802)
(32, 551)
(32, 645)
(32, 900)
(32, 825)
(32, 924)
(32, 842)
(32, 568)
(32, 708)
(32, 957)
(32, 890)
(32, 354)
(32, 1234)
(32, 719)
(32, 1042)
(32, 612)
(32, 1031)
(32, 635)
(32, 632)
(32, 775)
(32, 990)
(32, 437)
(32, 854)
(32, 465)
(32, 681)
(32, 615)
(32, 1007)
(32, 669)
(32, 825)
(32, 494)
(32, 597)
(32, 1020)
(32, 448)
(32, 955)
(32, 559)
(32, 811)
(32, 732)
(32, 658)
(32, 636)
(32, 857)
(32, 1025)
(32, 939)
(32, 1064)
(32, 

In [54]:
predictions = conv_bidir_lstm_model.predict(reviews_test)

In [69]:
labels_test = ds_test.map(lambda x, y: y)

In [77]:
for batch in labels_test.take(5).as_numpy_iterator():
    for i, review in enumerate(batch):
        print(review, predictions[i][0])
    print('--------------------')

1 0.9924869
1 0.9295788
0 0.048887134
1 0.9876755
1 0.9876692
0 0.005816579
0 0.04877445
1 0.9876425
0 0.51842713
0 0.00508222
1 0.9842012
0 0.0058969557
1 0.987761
0 0.005816579
0 0.98764795
0 0.005816579
1 0.98758006
1 0.9876735
0 0.040071845
0 0.052507967
1 0.41035807
0 0.9224813
0 0.035926133
0 0.005816579
0 0.87355304
1 0.9876542
1 0.9855072
0 0.9294214
0 0.005816579
0 0.045541167
0 0.005816549
0 0.04546213
--------------------
1 0.9924869
1 0.9295788
0 0.048887134
0 0.9876755
0 0.9876692
1 0.005816579
1 0.04877445
0 0.9876425
0 0.51842713
1 0.00508222
1 0.9842012
0 0.0058969557
0 0.987761
1 0.005816579
1 0.98764795
0 0.005816579
1 0.98758006
1 0.9876735
1 0.040071845
0 0.052507967
0 0.41035807
1 0.9224813
1 0.035926133
1 0.005816579
0 0.87355304
0 0.9876542
0 0.9855072
1 0.9294214
0 0.005816579
0 0.045541167
0 0.005816549
0 0.04546213
--------------------
0 0.9924869
0 0.9295788
0 0.048887134
1 0.9876755
1 0.9876692
0 0.005816579
0 0.04877445
1 0.9876425
0 0.51842713
1 0.00508222

In [498]:
test_review = "Just got out of the theater, and WOW. An absolute Masterpiece. The story, the decor, the music... you enter in a world amd can't get out. Chalamet carries the hell out of the movie, he is Paul. Ferguson is amazing and Zendaya is an absolute vision. The cinema needed this Dune ! Denis Villeneuve... your masterpiece will be remembered."

In [500]:
%pprint

Pretty printing has been turned OFF


In [501]:
encoded_review = text_encoder.encode(test_review)
encoded_review

[3752, 351, 162, 36, 5, 588, 29, 9683, 3179, 2580, 35271, 43, 25, 5, 49066, 5, 85, 243, 9338, 54, 11, 669, 87237, 13, 116, 177, 162, 87237, 7347, 5, 1136, 162, 36, 5, 6, 357, 34, 297, 61846, 34, 2862, 29, 87237, 34, 189, 2580, 10699, 43, 3437, 2920, 17, 12730, 29552, 87237, 205, 1401, 280, 14, 2396]

In [502]:
batch_shape = len(encoded_review)
batch_shape

59

In [503]:
batch_review = []

In [504]:
for i in range(0,32):
    batch_review.append(encoded_review)
len(batch_review)

32

In [505]:
batch_review = np.int64(batch_review)
batch_review

array([[3752,  351,  162, ...,  280,   14, 2396],
       [3752,  351,  162, ...,  280,   14, 2396],
       [3752,  351,  162, ...,  280,   14, 2396],
       ...,
       [3752,  351,  162, ...,  280,   14, 2396],
       [3752,  351,  162, ...,  280,   14, 2396],
       [3752,  351,  162, ...,  280,   14, 2396]], dtype=int64)

In [506]:
tf_ds_review = tf.data.Dataset.from_tensor_slices(batch_review)

In [507]:
for review in tf_ds_review:
    print(review)

tf.Tensor(
[ 3752   351   162    36     5   588    29  9683  3179  2580 35271    43
    25     5 49066     5    85   243  9338    54    11   669 87237    13
   116   177   162 87237  7347     5  1136   162    36     5     6   357
    34   297 61846    34  2862    29 87237    34   189  2580 10699    43
  3437  2920    17 12730 29552 87237   205  1401   280    14  2396], shape=(59,), dtype=int64)
tf.Tensor(
[ 3752   351   162    36     5   588    29  9683  3179  2580 35271    43
    25     5 49066     5    85   243  9338    54    11   669 87237    13
   116   177   162 87237  7347     5  1136   162    36     5     6   357
    34   297 61846    34  2862    29 87237    34   189  2580 10699    43
  3437  2920    17 12730 29552 87237   205  1401   280    14  2396], shape=(59,), dtype=int64)
tf.Tensor(
[ 3752   351   162    36     5   588    29  9683  3179  2580 35271    43
    25     5 49066     5    85   243  9338    54    11   669 87237    13
   116   177   162 87237  7347     5  1136   16

In [508]:
tf_ds_batch_review = tf_ds_review.padded_batch(32, padded_shapes=np.max([7, batch_shape]))

In [509]:
tf_ds_batch_review

<PaddedBatchDataset shapes: (None, 59), types: tf.int64>

In [510]:
for batch in tf_ds_batch_review.take(1):
    for review in batch:
        print(review)

tf.Tensor(
[ 3752   351   162    36     5   588    29  9683  3179  2580 35271    43
    25     5 49066     5    85   243  9338    54    11   669 87237    13
   116   177   162 87237  7347     5  1136   162    36     5     6   357
    34   297 61846    34  2862    29 87237    34   189  2580 10699    43
  3437  2920    17 12730 29552 87237   205  1401   280    14  2396], shape=(59,), dtype=int64)
tf.Tensor(
[ 3752   351   162    36     5   588    29  9683  3179  2580 35271    43
    25     5 49066     5    85   243  9338    54    11   669 87237    13
   116   177   162 87237  7347     5  1136   162    36     5     6   357
    34   297 61846    34  2862    29 87237    34   189  2580 10699    43
  3437  2920    17 12730 29552 87237   205  1401   280    14  2396], shape=(59,), dtype=int64)
tf.Tensor(
[ 3752   351   162    36     5   588    29  9683  3179  2580 35271    43
    25     5 49066     5    85   243  9338    54    11   669 87237    13
   116   177   162 87237  7347     5  1136   16

In [511]:
for batch in tf_ds_batch_review:
    print(batch.shape)

(32, 59)


In [512]:
test_prediction = conv_bidir_lstm_model.predict(tf_ds_batch_review)

In [513]:
test_prediction

array([[0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047],
       [0.9723047]], dtype=float32)

In [148]:
conv_bidir_lstm_02_model = Sequential()

In [149]:
vocab_size = len(word_counts) + 2
embedding_size = 32
conv_bidir_lstm_02_model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size))

In [150]:
conv_bidir_lstm_02_model.add(Conv1D(filters = 128, kernel_size=3, activation='relu'))

In [151]:
conv_bidir_lstm_02_model.add(MaxPooling1D(pool_size=5))

In [152]:
conv_bidir_lstm_02_model.add(Bidirectional(LSTM(units=16, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))

In [153]:
conv_bidir_lstm_02_model.add(LSTM(units=32, dropout=0.2, recurrent_dropout=0.2, return_sequences=False))

In [154]:
conv_bidir_lstm_02_model.add(Dense(64, activation='relu')) 

In [155]:
conv_bidir_lstm_02_model.add(Dense(1, activation='sigmoid')) 

In [156]:
conv_bidir_lstm_02_model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, None, 32)          2791616   
_________________________________________________________________
conv1d_10 (Conv1D)           (None, None, 128)         12416     
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, None, 128)         0         
_________________________________________________________________
bidirectional_10 (Bidirectio (None, None, 32)          18560     
_________________________________________________________________
lstm_21 (LSTM)               (None, 32)                8320      
_________________________________________________________________
dense_20 (Dense)             (None, 64)                2112      
_________________________________________________________________
dense_21 (Dense)             (None, 1)               

In [157]:
conv_bidir_lstm_02_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [158]:
tensorboard_callback = TensorBoard(log_dir=f'.\logs\conv_bidir_lstm_05_model', histogram_freq=1, write_graph=True)

In [None]:
conv_bidir_lstm_02_model.fit(ds_train, epochs=10, validation_data=ds_valid, callbacks=[tensorboard_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

In [128]:
loss, accuracy = conv_bidir_lstm_02_model.evaluate(ds_test)

