In [None]:
import tensorflow_datasets as tfds

In [None]:
imdb, info = tfds.load('imdb_reviews',as_supervised=True, with_info=True)

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
train_data, test_data = imdb['train'], imdb['test']

In [None]:
train_sentences = []
test_sentences = []

train_labels = []
test_labels = []

for s,l in train_data:
  train_sentences.append(str(s.numpy()))
  train_labels.append(l.numpy())

for s,l in test_data:
  test_sentences.append(str(s.numpy()))
  test_labels.append(l.numpy())

In [None]:
train_sentences[0]

'b"This was an absolutely terrible movie. Don\'t be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie\'s ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor\'s like Christopher Walken\'s good name. I could barely sit through it."'

In [None]:
train_labels[0]

0

In [None]:
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

In [None]:
vocab_size = 10000
embedding_dim = 64
max_length = 140

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(train_sentences)

In [None]:
train_sequences = tokenizer.texts_to_sequences(train_sentences)

In [None]:
padded_train_sequences = pad_sequences(train_sequences, maxlen=max_length, truncating='post',padding ='post')

In [None]:
padded_train_sequences

array([[  59,   12,   14, ...,    0,    0,    0],
       [ 256,   28,   78, ...,    0,    0,    0],
       [   1, 6175,    2, ...,    0,    0,    0],
       ...,
       [7629,   37,   11, ..., 2743,   21, 1563],
       [2677,   11,  216, ..., 1048,   37,   31],
       [3875,    5,   31, ...,    1, 2238,   42]], dtype=int32)

In [None]:
test_sequences = tokenizer.texts_to_sequences(test_sentences)
padded_test_sequences = pad_sequences(test_sequences, maxlen=max_length, truncating='post',padding ='post')

In [None]:
print(padded_train_sequences.shape)
print(padded_test_sequences.shape)

(25000, 140)
(25000, 140)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense



In [None]:
model = Sequential()

In [None]:
model.add(Embedding(vocab_size,embedding_dim,input_length=max_length))

In [None]:
model.add(Flatten())

In [None]:
model.add(Dense(10,activation='relu'))

In [None]:
model.add(Dense(1,activation='sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics =['acc'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 140, 64)           640000    
                                                                 
 flatten (Flatten)           (None, 8960)              0         
                                                                 
 dense (Dense)               (None, 10)                89610     
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                                 
Total params: 729,621
Trainable params: 729,621
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(padded_train_sequences, train_labels, epochs=10, validation_data=(padded_test_sequences,test_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f48d46ae790>

In [None]:
def model_predict(text):
  test_seq = tokenizer.texts_to_sequences(text)
  padded_test_seq = pad_sequences(test_seq,maxlen=max_length,padding='post',truncating='post' )

  pred = model.predict(padded_test_seq)
  print('Positive') if pred > 0.5 else print('Negative')

In [None]:
test_sen =["This is my second attempt to review this title. I was able to see a screening of this in Europe...waste of time. Predictable and laughable at points, it desperately tries to keep pace with other Marvel offerings but fails miserably. "]
model_predict(test_sen)

Negative


In [None]:
e = model.layers[0]