In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Disable tensorflow debugging logs
import tensorflow as tf
import matplotlib.pyplot as plt

## Importar dataset

In [2]:
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import tensorflow_text as text

In [3]:
dataset = tfds.load('imdb_reviews', as_supervised=True)

In [4]:
raw_train_ds, raw_test_ds = dataset['train'], dataset['test']

In [5]:
for text, label in raw_train_ds.take(1):
    print(text.numpy(), label.numpy())

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it." 0


## Pipeline

In [6]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BUFFER_SIZE = tf.data.experimental.cardinality(raw_train_ds)
BUFFER_SIZE.numpy()

25000

In [7]:
import string

In [8]:
tf.strings.regex_replace('My favorite dog?', f"([{string.punctuation}])", r"")

<tf.Tensor: shape=(), dtype=string, numpy=b'My favorite dog'>

In [9]:
batch_size = 16
voc_size = 5000
max_length = 200

In [10]:
def clean_text(raw_text, label):
    lowercase = tf.strings.lower(raw_text)
    lowercase = tf.strings.substr(lowercase, 0, max_length)
    clean = tf.strings.regex_replace(lowercase, '<br />', ' ')
    clean = tf.strings.regex_replace(clean, 
                                     f"([{string.punctuation}])", r"")
    return clean, label

In [11]:
train_ds = raw_train_ds.map(clean_text)

for text, label in train_ds.take(1):
    print(text)
    print(tf.strings.substr(
    text, 0, max_length)
)

tf.Tensor(b'this was an absolutely terrible movie dont be lured in by christopher walken or michael ironside both are great actors but this must simply be their worst role in history even their great acting ', shape=(), dtype=string)
tf.Tensor(b'this was an absolutely terrible movie dont be lured in by christopher walken or michael ironside both are great actors but this must simply be their worst role in history even their great acting ', shape=(), dtype=string)


In [12]:
train_ds = raw_train_ds.map(clean_text).shuffle(BUFFER_SIZE).batch(
        batch_size, num_parallel_calls=AUTOTUNE).prefetch(
        AUTOTUNE)

test_ds = raw_test_ds.map(clean_text).batch(
        batch_size, num_parallel_calls=AUTOTUNE).prefetch(
        AUTOTUNE)

In [13]:
for text, label in train_ds.take(1):
    print(text)

tf.Tensor(
[b'more than just a kids movie holes looks at how past incidents still affect us today whether we know about them or not when teenager stanley yelnats iii shia leboeuf gets sent to a prison cam'
 b'i caught this film on azn on cable it sounded like it would be a good film a japanese green card i cant say ive ever disliked an asian film quite the contrary some of the most incredible horr'
 b'spoiler alert all throughout australia the summer turned into a deluge of rain and hail stones the size of baseballs that was causing havoc in coastal cities like sydney its under these hect'
 b'another demonstration of kurosawas genius his first colour film is a darkly surreal look into the tragic lives of tokyo slum dwellers essentially a series of interweaving vignettes depicting severa'
 b'hi everyone my names larissa im 13 years old when i was about 4 years old i watch curly sue and it knocked my socks of i have been watching that movie for a long time in fact about 30 minutes ago i 

## Definir modelo

<img src="../img/bert.png" width="700"/>

__Imagen tomada de Devlin, J., Chang, M. W., Lee, K., & Toutanova, K. (2018). Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805.__

In [14]:
train_text, _ = next(iter(train_ds))

In [15]:
train_text

<tf.Tensor: shape=(16,), dtype=string, numpy=
array([b'being that i am a true product of the hiphop and electronic dance music generation this is without a doubt one of my favorite movies of all time beat street although not as authentic in some res',
       b'i cannot believe the same guy directed this crap and dracula 2000 dracula 2000 was innovative fresh and well written if poorly acted  this pile cant even claim that it starts with the',
       b'tarzan and his mate1934 was the only tarzan movie i didnt see when i was a kid it sounded boring now i have seen it i have seen the ape man1932 about a hundred times and i keep a copy on my dr',
       b'i was initially forced to attend by my wife as she is fascinated by the royal families of britain and their history and she wont go to the cinema without me although viewers shouldnt expect to be ',
       b'engrossing drama of four men on a canoing weekend down a remote river they are pacifist ed jon voight adventurous violent lewis burt

In [16]:
bert_model_path = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1'
bert_preprocess_path = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'

In [17]:
bert_preprocess_model = hub.KerasLayer(bert_preprocess_path)

In [18]:
preprocess_output = bert_preprocess_model(train_text)
preprocess_output

{'input_type_ids': <tf.Tensor: shape=(16, 128), dtype=int32, numpy=
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>,
 'input_mask': <tf.Tensor: shape=(16, 128), dtype=int32, numpy=
 array([[1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        ...,
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0]], dtype=int32)>,
 'input_word_ids': <tf.Tensor: shape=(16, 128), dtype=int32, numpy=
 array([[  101,  2108,  2008, ...,     0,     0,     0],
        [  101,  1045,  3685, ...,     0,     0,     0],
        [  101, 24566,  1998, ...,     0,     0,     0],
        ...,
        [  101,  5863,  3766, ...,     0,     0,     0],
        [  101,  2498,  2047, ...,     0,     0,     0],
        [  101, 10931,  2534, ...,     0,     0,

In [19]:
bert_model = hub.KerasLayer(bert_model_path)

In [20]:
bert_model(preprocess_output)['pooled_output']

<tf.Tensor: shape=(16, 512), dtype=float32, numpy=
array([[ 0.38921773,  0.5589638 ,  0.03624528, ...,  0.15941487,
        -0.17959836,  0.29063404],
       [ 0.83405286,  0.80743587, -0.22731142, ..., -0.00577929,
        -0.03727707, -0.7073086 ],
       [ 0.97635967,  0.99464095, -0.04352628, ..., -0.22736534,
        -0.72770953, -0.8641528 ],
       ...,
       [ 0.90034324,  0.47461635, -0.19443458, ...,  0.6052738 ,
        -0.21026924, -0.9670241 ],
       [ 0.6192951 ,  0.7899087 , -0.04462845, ...,  0.03447887,
        -0.18346721,  0.6457753 ],
       [-0.5093894 ,  0.99746615, -0.11423684, ...,  0.08875322,
        -0.43408817, -0.53812146]], dtype=float32)>

In [21]:
text_input = tf.keras.layers.Input(shape=(), 
                                   dtype=tf.string, name='text')
preprocess_text = bert_preprocess_model(text_input)
bert_output = bert_model(preprocess_text)['pooled_output']
output = tf.keras.layers.Dense(1)(bert_output)
small_bert = tf.keras.Model(text_input, output)


- Probar bert con batch de prueba

In [22]:
small_bert(train_text)

<tf.Tensor: shape=(16, 1), dtype=float32, numpy=
array([[-0.5028666 ],
       [-0.35702294],
       [-0.8566029 ],
       [-1.1283555 ],
       [-2.1734977 ],
       [-1.2032174 ],
       [-2.1061082 ],
       [-2.266713  ],
       [-1.1846704 ],
       [-1.1561531 ],
       [-1.4860259 ],
       [-1.0686742 ],
       [-0.9801764 ],
       [-1.9095999 ],
       [-1.4538208 ],
       [-1.1997366 ]], dtype=float32)>

- Información del modelo

In [23]:
small_bert.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_type_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128),                                                          
                                 'input_word_ids':                                                
                                (None, 128)}                                                  

## Entrenamiento 

In [24]:
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [25]:
opt = tf.keras.optimizers.SGD(learning_rate=0.001)

In [26]:
train_loss_avg = tf.keras.metrics.Mean(name='train_loss')
val_loss_avg = tf.keras.metrics.Mean(name='train_loss')

In [27]:
len(test_ds), tf.data.experimental.cardinality(test_ds)

(1563, <tf.Tensor: shape=(), dtype=int64, numpy=1563>)

In [28]:
@tf.function
def train_step(text, target):
    with tf.GradientTape() as tape:
        logits = small_bert(text, training=True)
        loss_value = loss(tf.cast(target, tf.float32), logits)

    gradients = tape.gradient(loss_value, small_bert.trainable_weights)
    opt.apply_gradients(zip(gradients, small_bert.trainable_weights))
    train_loss_avg(loss_value)
    
@tf.function
def val_step(text, target):
    with tf.GradientTape() as tape:
        logits = small_bert(text, training=False)
        loss_value = loss(tf.cast(target, tf.float32), logits)

    val_loss_avg(loss_value)


In [29]:
epochs = 3

In [30]:
for epoch in range(epochs):
    for text, target in train_ds:
        train_step(text, target)
        
    print(f'Epoch: {epoch} Train loss: {train_loss_avg.result().numpy()}')
    train_loss_avg.reset_states()
    
    for text, target in test_ds:
        val_step(text, target)
        
    print(f'Val loss: {val_loss_avg.result().numpy()}')
    val_loss_avg.reset_states()

Epoch: 0 Train loss: 0.7018167972564697
Val loss: 0.6670032739639282
Epoch: 1 Train loss: 0.6517457962036133
Val loss: 0.6405548453330994
Epoch: 2 Train loss: 0.6338956952095032
Val loss: 0.6283572912216187


## Ejercicio
- Probar diferentes versiones de BERT