In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

from imdb_sentiment import create_model

In [2]:
seq_len = 256
vocab_size = 2048

In [3]:
model = create_model(
    seq_len=seq_len, 
    vocab_size=vocab_size,
    pad_id=0, 
    N=2, 
    d_model=128, 
    d_ff=512, 
    h=8, 
    dropout=0.5
)

In [4]:
model.load_weights("models/imdb_sentiment/weights/model_weights")

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x1a31fdb3c8>

In [5]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256)          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 256, 128)     262144      input_1[0][0]                    
__________________________________________________________________________________________________
positional_encoding (Positional (None, None, 128)    0           input_1[0][0]                    
__________________________________________________________________________________________________
add (Add)                       (None, 256, 128)     0           embedding[0][0]                  
                                                                 positional_encoding[0][0]        
__________

In [6]:
word_index = imdb.get_word_index()

In [7]:
def get_word(w):
    if w in word_index and word_index[w] < vocab_size:
        return word_index[w]
    return 0

In [8]:
def tokenize_input(inp):
    return list(map(lambda t: get_word(t.lower()), inp.split(" ")))

def pad_input(inp):
    return pad_sequences(
        [inp],
        maxlen=seq_len,
        padding="pre",
        truncating="pre",
        value=0,
    )

In [9]:
tokenize_input("this movie is excellent")

[11, 17, 6, 318]

In [10]:
model.predict(pad_input(tokenize_input("this movie is good")))

array([[6.4751357e-06, 9.9999356e-01]], dtype=float32)

In [11]:
model.predict(pad_input(tokenize_input("this movie sucks")))

array([[0.71018857, 0.28981143]], dtype=float32)