In [2]:
import collections
import pathlib
import re
import string

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from tensorflow.keras import utils
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

import tensorflow_datasets as tfds

In [1]:
import tensorflow_text as tf_text

In [3]:
data_url = 'https://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz'
dataset = utils.get_file(
    'stack_overflow_16k.tar.gz',
    data_url,
    untar=True,
    cache_dir='stack_overflow',
    cache_subdir='')
dataset_dir = pathlib.Path(dataset).parent

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz


In [4]:
list(dataset_dir.iterdir())

[WindowsPath('/tmp/.keras/README.md'),
 WindowsPath('/tmp/.keras/stack_overflow_16k.tar.gz.tar.gz'),
 WindowsPath('/tmp/.keras/test'),
 WindowsPath('/tmp/.keras/train')]

In [10]:
train_dir = dataset_dir/'train'
list(train_dir.iterdir())

[WindowsPath('/tmp/.keras/train/csharp'),
 WindowsPath('/tmp/.keras/train/java'),
 WindowsPath('/tmp/.keras/train/javascript'),
 WindowsPath('/tmp/.keras/train/python')]

In [12]:
sample_file = train_dir/'python/1755.txt'
with open(sample_file) as f:
    print(f.read())

why does this blank program print true x=true.def stupid():.    x=false.stupid().print x



In [13]:
batch_size=32
seed = 42

raw_train_ds = preprocessing.text_dataset_from_directory(
    train_dir,
    batch_size = batch_size,
    validation_split=0.2,
    subset='training',
    seed=seed)

Found 8000 files belonging to 4 classes.
Using 6400 files for training.


In [14]:
for text_batch, label_batch in raw_train_ds.take(1):
    for i in range(10):
        print("Question: ", text_batch.numpy()[i][:100], '...')
        print("Label: ", label_batch.numpy()[i])

Question:  b'"my tester is going to the wrong constructor i am new to programming so if i ask a question that can' ...
Label:  1
Question:  b'"blank code slow skin detection this code changes the color space to lab and using a threshold finds' ...
Label:  3
Question:  b'"option and validation in blank i want to add a new option on my system where i want to add two text' ...
Label:  1
Question:  b'"exception: dynamic sql generation for the updatecommand is not supported against a selectcommand th' ...
Label:  0
Question:  b'"parameter with question mark and super in blank, i\'ve come across a method that is formatted like t' ...
Label:  1
Question:  b'call two objects wsdl the first time i got a very strange wsdl. ..i would like to call the object (i' ...
Label:  0
Question:  b'how to correctly make the icon for systemtray in blank using icon sizes of any dimension for systemt' ...
Label:  0
Question:  b'"is there a way to check a variable that exists in a different script than the orig

In [15]:
for i, label in enumerate(raw_train_ds.class_names):
    print('Label', i, 'corresponds to ', label)

Label 0 corresponds to  csharp
Label 1 corresponds to  java
Label 2 corresponds to  javascript
Label 3 corresponds to  python


In [16]:
raw_val_ds = preprocessing.text_dataset_from_directory(
    train_dir,
    batch_size = batch_size,
    validation_split=0.2,
    subset='validation',
    seed=seed)

Found 8000 files belonging to 4 classes.
Using 1600 files for validation.


In [17]:
test_dir = dataset_dir/'test'
raw_test_ds = preprocessing.text_dataset_from_directory(
    test_dir, batch_size=batch_size)

Found 8000 files belonging to 4 classes.


In [18]:
VOCAB_SIZE=10000

binary_vectorize_layer = TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode='binary')

In [19]:
MAX_SEQUENCE_LENGTH = 250
int_vectorize_layer = TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    output_sequence_length=MAX_SEQUENCE_LENGTH)

In [20]:
train_text = raw_train_ds.map(lambda text, labels: text)
binary_vectorize_layer.adapt(train_text)
int_vectorize_layer.adapt(train_text)

In [21]:
def binary_vectorize_text(text, label):
    text = tf.expand_dims(text, -1)
    return binary_vectorize_layer(text), label

In [22]:
def int_vectorize_text(text, label):
    text = tf.expand_dims(text, -1)
    return int_vectorize_layer(text), label

In [23]:
text_batch, label_batch = next(iter(raw_train_ds))
first_question, first_label = text_batch[0], label_batch[0]
print('Question', first_question)
print('Label', first_label)

Question tf.Tensor(b'"function expected error in blank for dynamically created check box when it is clicked i want to grab the attribute value.it is working in ie 8,9,10 but not working in ie 11,chrome shows function expected error..&lt;input type=checkbox checked=\'checked\' id=\'symptomfailurecodeid\' tabindex=\'54\' style=\'cursor:pointer;\' onclick=chkclickevt(this);  failurecodeid=""1"" &gt;...function chkclickevt(obj) { .    alert(obj.attributes(""failurecodeid""));.}"\n', shape=(), dtype=string)
Label tf.Tensor(2, shape=(), dtype=int32)


In [24]:
print("'binary' vectorized question:", 
      binary_vectorize_text(first_question, first_label)[0])

'binary' vectorized question: tf.Tensor([[1. 1. 1. ... 0. 0. 0.]], shape=(1, 10000), dtype=float32)


In [25]:
print("'int' vectorized question:",
      int_vectorize_text(first_question, first_label)[0])

'int' vectorized question: tf.Tensor(
[[  38  450   65    7   16   12  892  265  186  451   44   11    6  685
     3   46    4 2062    2  485    1    6  158    7  479    1   26   20
   158    7  479    1  502   38  450    1 1767 1763    1    1    1    1
     1    1    1    1    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0

In [26]:
print('1289 --->', int_vectorize_layer.get_vocabulary()[1289])
print('313 --->', int_vectorize_layer.get_vocabulary()[313])
print("Vocabulary size: {}".format(len(int_vectorize_layer.get_vocabulary())))

1289 ---> roman
313 ---> source
Vocabulary size: 10000


In [45]:
binary_train_ds = raw_train_ds.map(binary_vectorize_text)
binary_val_ds = raw_val_ds.map(binary_vectorize_text)
binary_test_ds = raw_test_ds.map(binary_vectorize_text)

int_train_ds = raw_train_ds.map(int_vectorize_text)
int_val_ds = raw_val_ds.map(int_vectorize_text)
int_test_ds = raw_test_ds.map(int_vectorize_text)

In [46]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

def configure_dataset(dataset):
    return dataset.cache().prefetch(buffer_size=AUTOTUNE)

In [47]:
binary_train_ds = configure_dataset(binary_train_ds)
binary_val_ds = configure_dataset(binary_val_ds)
binary_test_ds = configure_dataset(binary_test_ds)

int_train_ds = configure_dataset(int_train_ds)
int_val_ds = configure_dataset(int_val_ds)
int_test_ds = configure_dataset(int_test_ds)

In [48]:
binary_model = tf.keras.Sequential([layers.Dense(4)])
binary_model.compile(
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer='adam',
    metrics=['accuracy'])
history = binary_model.fit(
    binary_train_ds, validation_data=binary_val_ds, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [49]:
def create_model(vocab_size, num_labels):
    model = tf.keras.Sequential([
        layers.Embedding(vocab_size, 64, mask_zero=True),
        layers.Conv1D(64, 5, padding='valid', activation='relu', strides=2),
        layers.GlobalMaxPooling1D(),
        layers.Dense(num_labels)
    ])
    return model

In [50]:
int_model = create_model(vocab_size = VOCAB_SIZE+1, num_labels=4)
int_model.compile(
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer='adam',
    metrics=['accuracy'])
history = int_model.fit(int_train_ds, validation_data=int_val_ds, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [51]:
print("Linear model on binary vectorized data:")
print(binary_model.summary())

Linear model on binary vectorized data:
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 4)                 40004     
Total params: 40,004
Trainable params: 40,004
Non-trainable params: 0
_________________________________________________________________
None


In [52]:
print("ConvNet model on int vectorized data:")
print(int_model.summary())

ConvNet model on int vectorized data:
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 64)          640064    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, None, 64)          20544     
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 260       
Total params: 660,868
Trainable params: 660,868
Non-trainable params: 0
_________________________________________________________________
None


In [55]:
binary_loss, binary_accuracy = binary_model.evaluate(binary_test_ds)
int_loss, int_accuracy = int_model.evaluate(int_test_ds)

print("Binary model accuracy: {:2.2%}".format(binary_accuracy))
print("Int model accuracy: {:2.2%}".format(int_accuracy))

Binary model accuracy: 81.27%
Int model accuracy: 81.24%


In [57]:
export_model = tf.keras.Sequential([
    binary_vectorize_layer,
    binary_model,
    layers.Activation('sigmoid')
])

export_model.compile(loss = losses.SparseCategoricalCrossentropy(from_logits=False),
                     optimizer = 'adam',
                     metrics=['accuracy'])

loss, accuracy = export_model.evaluate(raw_test_ds)
print("Accuracy: {:2.2%}".format(binary_accuracy))

Accuracy: 81.27%


In [58]:
def get_string_labels(predicted_scores_batch):
    predicted_int_labels = tf.argmax(predicted_scores_batch, axis=1)
    predicted_labels = tf.gather(raw_train_ds.class_names, predicted_int_labels)
    return predicted_labels

In [60]:
inputs = [
    "how do I extract keys from a dict into a list?", #python
    "debug public static void main(string[] args) {...}", #java
]

predicted_scores = export_model.predict(inputs)
predicted_labels = get_string_labels(predicted_scores)
print(predicted_scores)
print(raw_train_ds.class_names)
for input, label in zip(inputs, predicted_labels):
    print("Question: ",input)
    print("Predicted label: ",label.numpy())

[[0.37102163 0.2539064  0.22399515 0.81089264]
 [0.7188579  0.7963106  0.08442047 0.09317389]]
['csharp', 'java', 'javascript', 'python']
Question:  how do I extract keys from a dict into a list?
Predicted label:  b'python'
Question:  debug public static void main(string[] args) {...}
Predicted label:  b'java'


In [63]:
DIRECTORY_URL='https://storage.googleapis.com/download.tensorflow.org/data/illiad/'
FILE_NAMES=['cowper.txt', 'derby.txt', 'butler.txt']

for name in FILE_NAMES:
    text_dir = utils.get_file(name, origin=DIRECTORY_URL + name)
    
parent_dir = pathlib.Path(text_dir).parent
list(parent_dir.iterdir())

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt


[WindowsPath('C:/Users/June/.keras/datasets/butler.txt'),
 WindowsPath('C:/Users/June/.keras/datasets/cowper.txt'),
 WindowsPath('C:/Users/June/.keras/datasets/derby.txt'),
 WindowsPath('C:/Users/June/.keras/datasets/fashion-mnist'),
 WindowsPath('C:/Users/June/.keras/datasets/flower_photos'),
 WindowsPath('C:/Users/June/.keras/datasets/flower_photos.tar.gz'),
 WindowsPath('C:/Users/June/.keras/datasets/HIGGS.csv.gz'),
 WindowsPath('C:/Users/June/.keras/datasets/imdb.npz'),
 WindowsPath('C:/Users/June/.keras/datasets/imdb_word_index.json'),
 WindowsPath('C:/Users/June/.keras/datasets/mnist.npz')]

In [64]:
def labeler(example, index):
    return example, tf.cast(index, tf.int64)

In [69]:
labeled_data_sets = []
for i, file_name in enumerate(FILE_NAMES):
    lines_dataset = tf.data.TextLineDataset(str(parent_dir/file_name))
    labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
    labeled_data_sets.append(labeled_dataset)

In [71]:
BUFFER_SIZE = 50000
BATCH_SIZE = 64
VALIDATION_SIZE = 5000

In [72]:
all_labeled_data = labeled_data_sets[0]
for labeled_dataset in labeled_data_sets[1:]:
    all_labeled_data = all_labeled_data.concatenate(labeled_dataset)
    
all_labeled_data = all_labeled_data.shuffle(
    BUFFER_SIZE, reshuffle_each_iteration=False)

In [73]:
for text,label in all_labeled_data.take(10):
    print("Sentence: ", text.numpy())
    print("Label: ",label.numpy())

Sentence:  b"Possess'd the Trojans' souls; but Acamas,"
Label:  1
Sentence:  b'men, came up against them, but even they were not so many as the'
Label:  2
Sentence:  b'side stood Menelaus son of Atreus, nursing great sorrow in his heart.'
Label:  2
Sentence:  b'archer, vowing that when he got home to his strong city of Zelea he'
Label:  2
Sentence:  b'Who both with eloquence and wisdom rules'
Label:  0
Sentence:  b'The dwellers in Buprasium, on the shores'
Label:  0
Sentence:  b"While Menelaus thus the cares engross'd"
Label:  0
Sentence:  b"Environ'd, and by steeds, at side of whom"
Label:  0
Sentence:  b'Achilles marvel as he beheld Priam. The others looked one to another'
Label:  2
Sentence:  b'Thy knees, Achilles! Ah, illustrious Chief!'
Label:  0


In [78]:
tokenizer = tf_text.UnicodeScriptTokenizer()

In [79]:
def tokenize(text, unused_label):
    lower_case = tf_text.case_fold_utf8(text)
    return tokenizer.tokenize(lower_case)

In [80]:
tokenized_ds = all_labeled_data.map(tokenize)

Instructions for updating:
`tf.batch_gather` is deprecated, please use `tf.gather` with `batch_dims=-1` instead.


In [81]:
for text_batch in tokenized_ds.take(5):
    print("Tokens: ", text_batch.numpy())

Tokens:  [b'possess' b"'" b'd' b'the' b'trojans' b"'" b'souls' b';' b'but'
 b'acamas' b',']
Tokens:  [b'men' b',' b'came' b'up' b'against' b'them' b',' b'but' b'even' b'they'
 b'were' b'not' b'so' b'many' b'as' b'the']
Tokens:  [b'side' b'stood' b'menelaus' b'son' b'of' b'atreus' b',' b'nursing'
 b'great' b'sorrow' b'in' b'his' b'heart' b'.']
Tokens:  [b'archer' b',' b'vowing' b'that' b'when' b'he' b'got' b'home' b'to'
 b'his' b'strong' b'city' b'of' b'zelea' b'he']
Tokens:  [b'who' b'both' b'with' b'eloquence' b'and' b'wisdom' b'rules']


In [84]:
tokenized_ds = configure_dataset(tokenized_ds)
vocab_dict = collections.defaultdict(lambda: 0)
for toks in tokenized_ds.as_numpy_iterator():
    for tok in toks:
        vocab_dict[tok] += 1
        
vocab = sorted(vocab_dict.items(), key=lambda x: x[1], reverse=True)
vocab = [token for token, count in vocab]
vocab = vocab[:VOCAB_SIZE]
vocab_size = len(vocab)
print("Vocab size: ", vocab_size)
print("First five vocab entries: ", vocab[:5])

Vocab size:  10000
First five vocab entries:  [b',', b'the', b'and', b"'", b'of']


In [85]:
keys = vocab
values = range(2, len(vocab) + 2) # reserve 0 for padding, 1 for OOV

init = tf.lookup.KeyValueTensorInitializer(
    keys, values, key_dtype = tf.string, value_dtype = tf.int64)

num_oov_buckets = 1
vocab_table = tf.lookup.StaticVocabularyTable(init, num_oov_buckets)


In [86]:
def preprocess_text(text, label):
    standardized = tf_text.case_fold_utf8(text)
    tokenized = tokenizer.tokenize(standardized)
    vectorized = vocab_table.lookup(tokenized)
    return vectorized, label

In [87]:
example_text, example_label = next(iter(all_labeled_data))
print("Sentence: ", example_text.numpy())
vectorized_text, example_label = preprocess_text(example_text, example_label)
print("Vectorized sentence: ", vectorized_text.numpy())

Sentence:  b"Possess'd the Trojans' souls; but Acamas,"
Vectorized sentence:  [1308    5    9    3   62    5 2500   10   20 1391    2]


In [88]:
all_encoded_data = all_labeled_data.map(preprocess_text)

In [89]:
train_data = all_encoded_data.skip(VALIDATION_SIZE).shuffle(BUFFER_SIZE)
validation_data = all_encoded_data.take(VALIDATION_SIZE)

In [90]:
train_data = train_data.padded_batch(BATCH_SIZE)
validation_data = validation_data.padded_batch(BATCH_SIZE)

In [91]:
sample_text, sample_labels = next(iter(validation_data))
print("Text batch shape: ", sample_text.shape)
print("Label batch shape: ", sample_labels.shape)
print("First text example: ", sample_text[0])
print("First label example: ", sample_labels[0])

Text batch shape:  (64, 18)
Label batch shape:  (64,)
First text example:  tf.Tensor(
[1308    5    9    3   62    5 2500   10   20 1391    2    0    0    0
    0    0    0    0], shape=(18,), dtype=int64)
First label example:  tf.Tensor(1, shape=(), dtype=int64)


In [92]:
vocab_size += 2

In [93]:
train_data = configure_dataset(train_data)
validation_data = configure_dataset(validation_data)

In [95]:
model = create_model(vocab_size = vocab_size, num_labels = 3)
model.compile(optimizer='adam',
              loss = losses.SparseCategoricalCrossentropy(from_logits = True),
              metrics = ['accuracy'])
history = model.fit(train_data, validation_data = validation_data, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [96]:
loss, accuracy = model.evaluate(validation_data)
print("Loss: ", loss)
print("Accuracy: {:2.2%}".format(accuracy))

Loss:  0.39510858058929443
Accuracy: 83.80%


In [97]:
preprocess_layer = TextVectorization(
    max_tokens=vocab_size,
    standardize = tf_text.case_fold_utf8,
    split=tokenizer.tokenize,
    output_mode='int',
    output_sequence_length=MAX_SEQUENCE_LENGTH
)
preprocess_layer.set_vocabulary(vocab)

In [98]:
export_model = tf.keras.Sequential([
    preprocess_layer,
    model,
    layers.Activation("sigmoid")
])

export_model.compile(
    loss=losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer='adam',
    metrics=['accuracy']
)

In [100]:
test_ds = all_labeled_data.take(VALIDATION_SIZE).batch(BATCH_SIZE)
test_ds = configure_dataset(test_ds)
loss, accuracy = export_model.evaluate(test_ds)
print("Loss: ", loss)
print("Accuracy: {:2.2%}".format(accuracy))

Loss:  0.5118779540061951
Accuracy: 79.20%


In [101]:
inputs = [
    "Join'd to th' Ionians with their flowing robes,", #Label: 1
    "the allies, and his armour flashed about him so that he seemed to all", #Label: 2
    "And with loud clangor of his arms he fell.", #Label 0
]

predicted_scores = export_model.predict(inputs)
predicted_labels = tf.argmax(predicted_scores, axis=1)
for input, label in zip(inputs, predicted_labels):
    print("Question: ", input)
    print("Predicted label: ", label.numpy())

Question:  Join'd to th' Ionians with their flowing robes,
Predicted label:  1
Question:  the allies, and his armour flashed about him so that he seemed to all
Predicted label:  2
Question:  And with loud clangor of his arms he fell.
Predicted label:  0


In [102]:
train_ds = tfds.load(
    'imdb_reviews',
    split='train',
    batch_size = BATCH_SIZE,
    shuffle_files=True,
    as_supervised=True
)

In [103]:
val_ds = tfds.load(
    'imdb_reviews',
    split='train',
    batch_size = BATCH_SIZE,
    shuffle_files=True,
    as_supervised=True
)

In [104]:
for review_batch, label_batch in val_ds.take(1):
    for i in range(5):
        print("Review: ", review_batch[i].numpy())
        print("Label: ", label_batch[i].numpy())

Review:  b'I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However on this occasion I fell asleep because the film was rubbish. The plot development was constant. Constantly slow and boring. Things seemed to happen, but with no explanation of what was causing them or why. I admit, I may have missed part of the film, but i watched the majority of it and everything just seemed to happen of its own accord without any real concern for anything else. I cant recommend this film at all.'
Label:  0
Review:  b'This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with its own business as you descend into a big arm-chair and mellow for a couple of hours. Wonderful performances from Cher and Nicolas Cage (as always) gently row the plot along. There are no rapids to cross, no dangerous waters, just a warm and witty p

In [105]:
vectorize_layer = TextVectorization(
    max_tokens = VOCAB_SIZE,
    output_mode = 'int',
    output_sequence_length=MAX_SEQUENCE_LENGTH
)

train_text = train_ds.map(lambda text, labels: text)
vectorize_layer.adapt(train_text)

In [106]:
def vectorize_text(text, label):
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text), label

In [107]:
train_ds = train_ds.map(vectorize_text)
val_ds = val_ds.map(vectorize_text)

In [108]:
train_ds = configure_dataset(train_ds)
val_ds = configure_dataset(val_ds)

In [109]:
model = create_model(vocab_size = VOCAB_SIZE + 1, num_labels=1)
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, None, 64)          640064    
_________________________________________________________________
conv1d_4 (Conv1D)            (None, None, 64)          20544     
_________________________________________________________________
global_max_pooling1d_3 (Glob (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 65        
Total params: 660,673
Trainable params: 660,673
Non-trainable params: 0
_________________________________________________________________


In [110]:
model.compile(
    loss = losses.BinaryCrossentropy(from_logits=True),
    optimizer='adam',
    metrics=['accuracy']
)

In [111]:
history = model.fit(train_ds, validation_data = val_ds, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [113]:
loss, accuracy = model.evaluate(val_ds)
print("Loss: ", loss)
print("Accuracy: {:2.2%}".format(accuracy))

Loss:  0.09952496737241745
Accuracy: 97.74%


In [114]:
export_model = tf.keras.Sequential([
    vectorize_layer, 
    model,
    layers.Activation('sigmoid')
])

export_model.compile(
    loss = losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer = 'adam',
    metrics=['accuracy']
)

In [115]:
# 0 -> negative
# 1 -> positive

inputs = [
    "This is a fantastic movie.",
    "This is a bad movie.",
    "This movie was so bad that it was good.",
    "I will never say yes to watching this movie.",
]

predicted_scores = export_model.predict(inputs)
predicted_labels = [int(round(x[0])) for x in predicted_scores]
for input, label in zip(inputs, predicted_labels):
    print("Question: ", input)
    print("Predicted label: ", label)

Question:  This is a fantastic movie.
Predicted label:  1
Question:  This is a bad movie.
Predicted label:  0
Question:  This movie was so bad that it was good.
Predicted label:  0
Question:  I will never say yes to watching this movie.
Predicted label:  1
