In [32]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np

import tensorflow as tf

import tensorflow_hub as hub
import tensorflow_datasets as tfds

In [33]:
print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

Version:  2.0.0
Eager mode:  True
Hub version:  0.8.0
GPU is NOT AVAILABLE


In [34]:
# train_validation_split = tfds.Split.TRAIN.split([6, 4])

(train_data, validation_data) = tfds.load(
    name="imdb_reviews", 
    split=tfds.Split.TRAIN,
    with_info=True,
    as_supervised=True)

In [35]:
test_data = tfds.load(
    name="imdb_reviews", 
    split=tfds.Split.TEST)

In [36]:
dataset_ = test_data.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)


In [37]:
data_ = dataset_.take(1)
list(data_)[0]

{'label': <tf.Tensor: id=745, shape=(32,), dtype=int64, numpy=
 array([1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 0])>,
 'text': <tf.Tensor: id=746, shape=(32,), dtype=string, numpy=
 array([b'At first, I honestly thought it would be a corny movie. But after seeing this, I was quite surprised. Amanda Bynes was convincingly funny along with the supporting cast (Especially that character played by "Bullet tooth Tony" from Snatch. What a contrasting role between those two movies!). Now, i\'m not one to say whether or not an actor is good or not, but her act, especially, was thoroughly enjoyable. Even though the plot devolved into a teeny-bopper love triangle (though very funny) half-way into the movie, I feel that this shouldn\'t discount, what I think, the movie really is: simply entertaining. So if you happen to stumble upon it, whether by DVD or theater, i\'m confident that you\'ll enjoy.',
        b'I watched 40 minutes and cou

In [38]:
# Build your input pipeline
text, label
dataset = test_data.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
for features in dataset.take(1):
  text, label = features["text"], features["label"]

In [30]:
text

<tf.Tensor: id=608, shape=(32,), dtype=string, numpy=
array([b'In this film, there is a loose plot of a man (Bardem) who wishes to obtain financing for his construction business, and marries a woman he does not love (the wide-eyed Maria de Medieros) in the process. He maintains his passionate relationship with his first and true love, and ultimately gets entangled in his own romantic web. He never gives up his juggling act, until the three main characters come face to face. The film results boring, with lots of free sex (well, both girls are really good), all the reactions in the film are absurd, incoherent and of course, too much stupid. None of the characters are believable, which makes the movie a little annoying. Anyway, the acting is surprisingly good for such a bad directed film, which makes it a little interesting, but, if you can, watch another film please!',
       b"I kind of like Bam Margera, so I was curious. <br /><br />But watching a home production with somebody elses fr

In [39]:
train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))
train_examples_batch

<tf.Tensor: id=775, shape=(10,), dtype=string, numpy=
array([b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.",
       b'I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However on this occasion

In [13]:
train_labels_batch

<tf.Tensor: id=273, shape=(10,), dtype=int64, numpy=array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0])>

In [14]:
train_labels_batch[0]

<tf.Tensor: id=277, shape=(), dtype=int64, numpy=0>

In [40]:
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
hub_layer(train_examples_batch[:3])

<tf.Tensor: id=956, shape=(3, 20), dtype=float32, numpy=
array([[ 1.765786  , -3.882232  ,  3.9134233 , -1.5557289 , -3.3362343 ,
        -1.7357955 , -1.9954445 ,  1.2989551 ,  5.081598  , -1.1041286 ,
        -2.0503852 , -0.72675157, -0.65675956,  0.24436149, -3.7208383 ,
         2.0954835 ,  2.2969332 , -2.0689783 , -2.9489717 , -1.1315987 ],
       [ 1.8804485 , -2.5852382 ,  3.4066997 ,  1.0982676 , -4.056685  ,
        -4.891284  , -2.785554  ,  1.3874227 ,  3.8476458 , -0.9256538 ,
        -1.896706  ,  1.2113281 ,  0.11474707,  0.76209456, -4.8791065 ,
         2.906149  ,  4.7087674 , -2.3652055 , -3.5015898 , -1.6390051 ],
       [ 0.71152234, -0.6353217 ,  1.7385626 , -1.1168286 , -0.5451594 ,
        -1.1808156 ,  0.09504455,  1.4653089 ,  0.66059524,  0.79308075,
        -2.2268345 ,  0.07446612, -1.4075904 , -0.70645386, -1.907037  ,
         1.4419787 ,  1.9551861 , -0.42660055, -2.8022065 ,  0.43727064]],
      dtype=float32)>

In [41]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 20)                400020    
_________________________________________________________________
dense (Dense)                (None, 16)                336       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 400,373
Trainable params: 400,373
Non-trainable params: 0
_________________________________________________________________


In [42]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [43]:
history = model.fit(train_data.shuffle(10000).batch(512),
                    epochs=20,
                    validation_data=validation_data.batch(512),
                    verbose=1)

AttributeError: 'DatasetInfo' object has no attribute 'batch'

In [44]:
results = model.evaluate(text.batch(512), verbose=2)
for name, value in zip(model.metrics_names, results):
    print("%s: %.3f" % (name, value))

AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'batch'

In [20]:
test_data.

tfds.core.DatasetInfo(
    name='imdb_reviews',
    version=1.0.0,
    description='Large Movie Review Dataset.
This is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.',
    homepage='http://ai.stanford.edu/~amaas/data/sentiment/',
    features=FeaturesDict({
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
        'text': Text(shape=(), dtype=tf.string),
    }),
    total_num_examples=100000,
    splits={
        'test': 25000,
        'train': 25000,
        'unsupervised': 50000,
    },
    supervised_keys=('text', 'label'),
    citation="""@InProceedings{maas-EtAl:2011:ACL-HLT2011,
      author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan  and  Ng, Andrew Y.  and  Potts, Christopher},
      title     = {Learning Word

In [21]:
(train_data, validation_data), test_data = tfds.load(
    name="imdb_reviews", 
    split=(tfds.Split.TRAIN, tfds.Split.TEST.batch_size),
    as_supervised=True)

ValueError: too many values to unpack (expected 2)

In [22]:
tfds.Split.TEST.

AttributeError: 'Split' object has no attribute 'batch_size'