In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Disable tensorflow debugging logs
import tensorflow as tf
import matplotlib.pyplot as plt

- Vector

In [2]:
tensor = tf.constant([3, 4, 5])
tensor

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([3, 4, 5], dtype=int32)>

- Obtener dimensión de tensor con __shape__

In [3]:
tensor.shape

TensorShape([3])

- Tensor de 2 dimensiones

In [4]:
tensor = tf.constant([[3, 4, 5], [3, 4, 5]])
tensor, tensor.shape

(<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
 array([[3, 4, 5],
        [3, 4, 5]], dtype=int32)>,
 TensorShape([2, 3]))

- Ejercicio: Crear un tensor de $3 \times 3 \times 2 \times 1$

### Operaciones de tensores

In [5]:
tensor = tf.constant([[3, 4], [3, 4]])

In [6]:
tensor + tensor

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[6, 8],
       [6, 8]], dtype=int32)>

In [7]:
tensor * tensor

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 9, 16],
       [ 9, 16]], dtype=int32)>

- Multiplicación de matrices con __@__

In [8]:
tensor @ tensor

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[21, 28],
       [21, 28]], dtype=int32)>

### Métodos de tensores

- Inicialización en ceros. Pasar como argumento la dimensión del tensor

In [9]:
tensor = tf.zeros([3, 3, 9])
tensor.shape

TensorShape([3, 3, 9])

- Método __reshape__

In [10]:
tf.reshape(tensor, [9, 9])

<tf.Tensor: shape=(9, 9), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

- Cast de tensores

In [11]:
tf.cast(tensor, dtype=tf.uint8)

<tf.Tensor: shape=(3, 3, 9), dtype=uint8, numpy=
array([[[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0]]], dtype=uint8)>

- Método __range__

In [12]:
tf.range(4, 8)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([4, 5, 6, 7], dtype=int32)>

### Cadenas en tensores

In [13]:
tf.constant(['Hola como estas'])

<tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Hola como estas'], dtype=object)>

- Listas de cadenas

In [14]:
string = tf.constant(['Hola como estas',
                      'Bien y tu?',
                      'Que haciendo?'])

string[0], string[2]

(<tf.Tensor: shape=(), dtype=string, numpy=b'Hola como estas'>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'Que haciendo?'>)

## Importar dataset

In [15]:
import tensorflow_datasets as tfds

In [16]:
dataset = tfds.load('imdb_reviews', as_supervised=True)

In [17]:
raw_train_ds, raw_test_ds = dataset['train'], dataset['test']

In [18]:
for text, label in raw_train_ds.take(1):
    print(text.numpy(), label.numpy())

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it." 0


## Preparar dataset

In [19]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BUFFER_SIZE = tf.data.experimental.cardinality(raw_train_ds)
BUFFER_SIZE.numpy()

25000

In [20]:
batch_size = 16
voc_size = 5000
seq_length = 20

train_ds = raw_train_ds.shuffle(BUFFER_SIZE).batch(
        batch_size, num_parallel_calls=AUTOTUNE).prefetch(
        AUTOTUNE)


test_ds = raw_test_ds.batch(
        batch_size, num_parallel_calls=AUTOTUNE).prefetch(
        AUTOTUNE)

In [21]:
for i in train_ds.take(1):
    print(i)

(<tf.Tensor: shape=(16,), dtype=string, numpy=
array([b'For the record, I am not affiliated with the production in any way.<br /><br />Hidden Frontier is probably the Star Trek fan film with the most episodes produced to date. Over 7 seasons (this is the last) they have produced some 50 or so episodes.<br /><br />This is no mean feat on almost no budget and everyone volunteering their time and energy.<br /><br />By their own admission, the earlier seasons do not have as good production qualities as later ones but as they progress the effects, green screen work and acting all improve.<br /><br />I did find it difficult to "dip into" so started from the beginning and watched all the way through. HF benefits from story arcs just like all the best sci fi and dovetails nicely into the Star Trek universe in which it is set. Characters and "relatives" from the original series have been brought into the stories and add a lot to the feel of the stories, sometimes improving on the characters ove

## Definir modelo

In [22]:
from tensorflow.keras.layers import TextVectorization

In [23]:
def clean_text(raw_text):
    lowercase = tf.strings.lower(raw_text)
    clean = tf.strings.regex_replace(lowercase, '<br />', ' ')
    return clean

vectorize_layer = TextVectorization(
    standardize=clean_text,
    max_tokens=voc_size,
    output_mode='int',
    output_sequence_length=seq_length)

- Adaptar la capa

In [24]:
vectorize_layer_ds = train_ds.map(lambda text, label: text)
vectorize_layer.adapt(vectorize_layer_ds)

In [25]:
vectorize_layer.get_vocabulary()

['',
 '[UNK]',
 'the',
 'a',
 'and',
 'of',
 'to',
 'is',
 'in',
 'i',
 'this',
 'that',
 'it',
 'was',
 'as',
 'for',
 'with',
 'but',
 'on',
 'movie',
 'his',
 'not',
 'are',
 'film',
 'you',
 'have',
 'he',
 'be',
 'at',
 'one',
 'by',
 'an',
 'they',
 'all',
 'from',
 'who',
 'like',
 'so',
 'just',
 'or',
 "it's",
 'has',
 'her',
 'about',
 'if',
 'some',
 'what',
 'out',
 'there',
 'when',
 'very',
 'more',
 'she',
 'even',
 'my',
 'would',
 'good',
 'only',
 'no',
 'their',
 'really',
 'had',
 'which',
 'can',
 'up',
 'were',
 'see',
 'than',
 'we',
 '-',
 'been',
 'into',
 'get',
 'will',
 'much',
 'because',
 'story',
 'most',
 'how',
 'other',
 'first',
 'also',
 "don't",
 'its',
 'do',
 'time',
 'great',
 'me',
 'people',
 'make',
 'could',
 'any',
 'after',
 'then',
 'made',
 'bad',
 'think',
 'many',
 'being',
 'it.',
 'him',
 'never',
 'two',
 'too',
 'little',
 'where',
 'movie.',
 'well',
 'way',
 'watch',
 'your',
 'did',
 'does',
 'best',
 'them',
 'seen',
 'know',
 '

- Probar vectorize_layer con batch de prueba

In [26]:
test_batch = tf.constant([['Hi there']])
vectorize_layer(test_batch)

<tf.Tensor: shape=(1, 20), dtype=int64, numpy=
array([[ 1, 48,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0]])>

In [37]:
rnn = tf.keras.Sequential([
    vectorize_layer,
    tf.keras.layers.Embedding(
        input_dim=voc_size, output_dim=128),
    tf.keras.layers.SimpleRNN(128),
    tf.keras.layers.Dense(1)
])

- Probar rnn con batch de prueba

In [38]:
rnn(test_batch)

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-0.0050434]], dtype=float32)>

- Información del modelo

In [39]:
rnn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 20)               0         
 torization)                                                     
                                                                 
 embedding_1 (Embedding)     (None, 20, 128)           640000    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 673,025
Trainable params: 673,025
Non-trainable params: 0
_________________________________________________________________


## Entrenamiento 

In [40]:
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [41]:
opt = tf.keras.optimizers.SGD(learning_rate=0.001)

In [42]:
train_loss_metric = tf.keras.metrics.BinaryCrossentropy(from_logits=True)
val_loss_metric = tf.keras.metrics.BinaryCrossentropy(from_logits=True)

In [43]:
len(test_ds), tf.data.experimental.cardinality(test_ds)

(1563, <tf.Tensor: shape=(), dtype=int64, numpy=1563>)

In [44]:
epochs = 3

- Definir __train_step__. Obtener pesos de la red recurrente con __trainable_weights__ y aplicar optimizador.

In [45]:
@tf.function
def train_step(text, target):
    with tf.GradientTape() as tape:
        logits = rnn(text, training=True)
        loss_value = loss(tf.cast(target, tf.float32), logits)

    gradients = tape.gradient(loss_value, rnn.trainable_weights)
    opt.apply_gradients(zip(gradients, rnn.trainable_weights))
    train_loss_metric.update_state(tf.cast(target, tf.float32), logits)

In [46]:
for epoch in range(epochs):
    for text, target in train_ds:
        train_step(text, target)
        
    print(f'Loss: {train_loss_metric.result().numpy()}')
    train_loss_metric.reset_states()


Loss: 0.6950814723968506
Loss: 0.6925076842308044
Loss: 0.69037264585495


## Ejercicio

- Agregar loop para conjunto de validación, métrica accuracy, recall, precision y tiempo de entrenamiento para cada conjunto de datos.