In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Disable tensorflow debugging logs
import tensorflow as tf
import matplotlib.pyplot as plt

## Importar dataset

In [2]:
import tensorflow_datasets as tfds

In [3]:
dataset = tfds.load('imdb_reviews', as_supervised=True)

In [4]:
raw_train_ds, raw_test_ds = dataset['train'], dataset['test']

In [5]:
for text, label in raw_train_ds.take(1):
    print(text.numpy(), label.numpy())

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it." 0


## Preparar dataset

In [6]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BUFFER_SIZE = tf.data.experimental.cardinality(raw_train_ds)
BUFFER_SIZE.numpy()

25000

In [7]:
batch_size = 16
voc_size = 5000
seq_length = 20

train_ds = raw_train_ds.shuffle(BUFFER_SIZE).batch(
        batch_size, num_parallel_calls=AUTOTUNE).prefetch(
        AUTOTUNE)


test_ds = raw_test_ds.batch(
        batch_size, num_parallel_calls=AUTOTUNE).prefetch(
        AUTOTUNE)

In [8]:
for i in train_ds.take(1):
    print(i)

(<tf.Tensor: shape=(16,), dtype=string, numpy=
array([b'Heart pounding erotic drama are the words that come to mind when I think of "Secret Games". It becomes more erotic as the film goes along and at one point blew me away! I didn\'t expect the delightful scene I was about to encounter. The "call girl" has her first customer and what a customer! One of the most erotic lesbian scenes I have ever seen. The husband should have listened to his wife and perhaps she wouldn\'t have gone on this erotic journey. It turned out to cost them in the end but, it was one exciting ride! GO SEE THIS MOVIE!!!',
       b"Let me be clear. I've used IMDb for years. But only today I went through the trouble of registering on the site, just so I could give this movie the lowest possible rating. I've seen hundreds of films, some of them bad, a few awful. Never, though, have i seen such a contrast of pretense and incompetence, of high intentions and failure.<br /><br />Mira Sorvino is horribly cast as the pri

## Definir modelo

In [9]:
from tensorflow.keras.layers import TextVectorization

In [10]:
def clean_text(raw_text):
    lowercase = tf.strings.lower(raw_text)
    clean = tf.strings.regex_replace(lowercase, '<br />', ' ')
    return clean

vectorize_layer = TextVectorization(
    standardize=clean_text,
    max_tokens=voc_size,
    output_mode='int',
    output_sequence_length=seq_length)

- Adaptar la capa

In [11]:
vectorize_layer_ds = train_ds.map(lambda text, label: text)
vectorize_layer.adapt(vectorize_layer_ds)

In [12]:
vectorize_layer.get_vocabulary()

['',
 '[UNK]',
 'the',
 'a',
 'and',
 'of',
 'to',
 'is',
 'in',
 'i',
 'this',
 'that',
 'it',
 'was',
 'as',
 'for',
 'with',
 'but',
 'on',
 'movie',
 'his',
 'not',
 'are',
 'film',
 'you',
 'have',
 'he',
 'be',
 'at',
 'one',
 'by',
 'an',
 'they',
 'all',
 'from',
 'who',
 'like',
 'so',
 'just',
 'or',
 "it's",
 'has',
 'her',
 'about',
 'if',
 'some',
 'what',
 'out',
 'there',
 'when',
 'very',
 'more',
 'she',
 'even',
 'my',
 'would',
 'good',
 'only',
 'no',
 'their',
 'really',
 'had',
 'which',
 'can',
 'up',
 'were',
 'see',
 'than',
 'we',
 '-',
 'been',
 'into',
 'get',
 'will',
 'much',
 'because',
 'story',
 'most',
 'how',
 'other',
 'first',
 'also',
 "don't",
 'its',
 'do',
 'time',
 'great',
 'me',
 'people',
 'make',
 'could',
 'any',
 'after',
 'then',
 'made',
 'bad',
 'think',
 'many',
 'being',
 'it.',
 'him',
 'never',
 'two',
 'too',
 'little',
 'where',
 'movie.',
 'well',
 'way',
 'watch',
 'your',
 'did',
 'does',
 'best',
 'them',
 'seen',
 'know',
 '

- Probar vectorize_layer con batch de prueba

In [13]:
test_batch = tf.constant([['Hi there']])
vectorize_layer(test_batch)

<tf.Tensor: shape=(1, 20), dtype=int64, numpy=
array([[ 1, 48,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0]])>

In [14]:
rnn = tf.keras.Sequential([
    vectorize_layer,
    tf.keras.layers.Embedding(
        input_dim=voc_size, output_dim=128),
    tf.keras.layers.SimpleRNN(128),
    tf.keras.layers.Dense(1)
])

- Probar rnn con batch de prueba

In [15]:
rnn(test_batch)

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-0.12433879]], dtype=float32)>

- Información del modelo

In [16]:
rnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 20)               0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 20, 128)           640000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               32896     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 673,025
Trainable params: 673,025
Non-trainable params: 0
_________________________________________________________________


## Entrenamiento 

In [17]:
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [18]:
opt = tf.keras.optimizers.SGD(learning_rate=0.001)

In [19]:
train_loss_avg = tf.keras.metrics.Mean(name='train_loss')

In [20]:
len(test_ds), tf.data.experimental.cardinality(test_ds)

(1563, <tf.Tensor: shape=(), dtype=int64, numpy=1563>)

In [21]:
epochs = 3

- Definir __train_step__. Obtener pesos de la red recurrente con __trainable_weights__ y aplicar optimizador.

In [22]:
@tf.function
def train_step(text, target):
    with tf.GradientTape() as tape:
        logits = rnn(text, training=True)
        loss_value = loss(tf.cast(target, tf.float32), logits)

    gradients = tape.gradient(loss_value, rnn.trainable_weights)
    opt.apply_gradients(zip(gradients, rnn.trainable_weights))
    train_loss_avg(loss_value)

In [23]:
for epoch in range(epochs):
    for text, target in train_ds:
        train_step(text, target)
        
    print(f'Loss: {train_loss_avg.result().numpy()}')
    train_loss_avg.reset_states()


Loss: 0.6954987049102783
Loss: 0.6919211745262146
Loss: 0.6880483627319336


## Ejercicio

- Agregar loop para conjunto de validación, métrica accuracy, recall, precision y tiempo de entrenamiento para cada conjunto de datos.