# Chapter 16: Natural Language Processing with RNNs and Attention

## Setup

In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from keras import (
    callbacks,
    layers,
    optimizers,
    losses,
    Sequential,
    utils,
)
import tensorflow_datasets as tfds
import tensorflow_hub as hub

# Legacy issues with the optimizers 
from tensorflow.keras.optimizers.legacy import Adam, Nadam

# Transformers from Hugging Face
from transformers import pipeline, AutoTokenizer, TFAutoModelForSequenceClassification

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 128
SEED = 1992
LOGS_DIR = "../../reports/logs/chapter_16/"
MODELS_PATH = "../../models/chapter_16/"
DATASETS_PATH = "../../datasets/chapter_16"

HUB_MODEL = "https://tfhub.dev/google/universal-sentence-encoder/4"

# Controls if the models are fit when loading the notebook
TRAIN = False

In [None]:
for path in [LOGS_DIR, MODELS_PATH, DATASETS_PATH]:
    if not tf.io.gfile.exists(path):
        tf.io.gfile.makedirs(path)

## Generating Shakespearean Text Using a Character RNN

In [None]:
SHAKESPEARE_URL = "https://homl.info/shakespeare"

In [None]:
filepath = utils.get_file("shakespeare.txt", SHAKESPEARE_URL)

with open(filepath) as f:
    shakespeare_text = f.read()

In [None]:
print(shakespeare_text[:80])

In [None]:
text_vec_layer = layers.TextVectorization(split="character", standardize="lower")
text_vec_layer.adapt([shakespeare_text])
encoded = text_vec_layer([shakespeare_text])[0]

In [None]:
encoded -= 2
n_tokens = text_vec_layer.vocabulary_size() - 2
dataset_size = len(encoded)

print(f"There are {n_tokens} different, and the dataset has {dataset_size:_} total characters.")

In [None]:
def to_dataset(sequence, length, shuffle=False, seed=None, batch_size=BATCH_SIZE):
    
    def flat_map_fn(window):
        return window.batch(length + 1)
    
    def map_fn(window):
        return (window[:, :-1], window[:, 1:])
    
    dataset = tf.data.Dataset.from_tensor_slices(sequence)
    dataset = dataset.cache()
    dataset = dataset.window(length + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(flat_map_fn)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=100_000, seed=seed)
    dataset = dataset.batch(batch_size)
    return dataset.map(map_fn, num_parallel_calls=AUTOTUNE).prefetch(
        AUTOTUNE
    )


In [None]:
length = 100
tf.random.set_seed(SEED)
train_set = to_dataset(encoded[:1_000_000], length=length, shuffle=True, seed=SEED)
valid_set = to_dataset(encoded[1_000_000:1_060_000], length=length)
test_set = to_dataset(encoded[:1_060_000:], length=length)

In [None]:
# Build the model
char_rnn_model = Sequential(
    [
        layers.Embedding(input_dim=n_tokens, output_dim=16),
        layers.GRU(128, return_sequences=True),
        layers.Dense(n_tokens, activation="softmax"),
    ]
)

# Compile the model
optimizer = optimizers.Nadam()
loss = losses.sparse_categorical_crossentropy
char_rnn_model.compile(
    loss=loss,
    optimizer=optimizer,
    metrics=["accuracy"],
)

# Callbacks and training
model_filepath = tf.io.gfile.join(MODELS_PATH, "char_rnn")
model_checkpoint_cb = callbacks.ModelCheckpoint(
    model_filepath,
    monitor="val_accuracy",
    save_best_only=True,
)

log_dir = tf.io.gfile.join(LOGS_DIR, "char_rnn")
profile_batch = int(len(encoded) / BATCH_SIZE) * 2
tensorboard_cb = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch=f"1, {profile_batch}")
callbacks_ = [model_checkpoint_cb, tensorboard_cb]

if TRAIN:
    history = char_rnn_model.fit(
        train_set,
        validation_data=valid_set,
        epochs=2,
        callbacks=callbacks_
    )

In [None]:
char_rnn_model = Sequential([
    text_vec_layer,
    layers.Lambda(lambda X: X - 2),
    char_rnn_model,
])

In [None]:
y_proba = char_rnn_model.predict(["To be or not to b"])[0, -1]
y_pred = tf.argmax(y_proba)
text_vec_layer.get_vocabulary()[y_pred + 2]

### Generating Fake Shakespearean Text

Let's use the `tf.random.categorical()` function to generate random classes indices:

In [None]:
def next_char(text, temperature=1):
    y_proba = char_rnn_model.predict([text])[0, -1:]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1)[0, 0]
    return text_vec_layer.get_vocabulary()[char_id + 2]


def extent_text(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text

In [None]:
tf.random.set_seed(SEED)
input_text = "to be or not to b"
for temp in [0.001, 1, 10, 1000]:
    text = extent_text(input_text, temperature=temp)
    print(f"TEMP:{temp}")
    print(f"\n\t{text}")

### Stateful RNN

In [None]:
def to_dataset_for_stateful_rnn(sequence, length):
    
    def window_to_batch(window):
        return window.batch(length + 1)
    
    def map_fn(window):
        return (window[:, :-1], window[:, 1:])
    
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1, shift=length, drop_remainder=True)
    ds = ds.flat_map(window_to_batch).batch(1)
    return ds.map(map_fn, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)

In [None]:
stateful_train_set = to_dataset_for_stateful_rnn(encoded[:1_000_000], length=length)
stateful_valid_set = to_dataset_for_stateful_rnn(
    encoded[1_000_000:1_060_000], length=length
)
stateful_test_set = to_dataset_for_stateful_rnn(encoded[:1_060_000:], length=length)


Creating the model requires in this case to specify the batch size:

In [None]:
stateful_model = Sequential(
    [
        layers.Embedding(
            input_dim=n_tokens, output_dim=16, batch_input_shape=[1, None]
        ),
        layers.GRU(128, return_sequences=True, stateful=True),
        layers.Dense(n_tokens, activation="softmax"),
    ]
)


In [None]:
class ResetStatesCallback(callbacks.Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()

In [None]:
stateful_model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=Nadam(),
    metrics=["accuracy"]
)

# Callbacks and training
model_filepath = tf.io.gfile.join(MODELS_PATH, "char_rnn")
model_checkpoint_cb = callbacks.ModelCheckpoint(
    model_filepath,
    monitor="val_accuracy",
    save_best_only=True,
)

log_dir = tf.io.gfile.join(LOGS_DIR, "char_rnn")
profile_batch = int(len(encoded) / BATCH_SIZE) * 2
tensorboard_cb = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch=f"1, {profile_batch}")
callbacks_ = [model_checkpoint_cb, tensorboard_cb, ResetStatesCallback()]

if TRAIN:
    stateful_model.fit(
        stateful_train_set,
        validation_data=stateful_valid_set,
        epochs=2,
        callbacks=callbacks_
    )

## Sentiment Analysis

In [None]:
raw_train_set, raw_valid_set, raw_test_set = tfds.load(
    name="imdb_reviews",
    split=["train[:90%]", "train[90%:]", "test"],
    as_supervised=True
)

tf.random.set_seed(SEED)
train_set = raw_train_set.shuffle(5000, seed=SEED).batch(BATCH_SIZE).prefetch(AUTOTUNE)
valid_set = raw_valid_set.batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_set = raw_test_set.batch(BATCH_SIZE).prefetch(AUTOTUNE)

In [None]:
for review, label in raw_train_set.shuffle(5000).take(7):
    print(review.numpy().decode("utf-8"))
    print(f"Label: {label.numpy()}")

In [None]:
vocab_size = 1000

def get_reviews(review, label):
    return review

text_vec_layer = layers.TextVectorization(max_tokens=vocab_size)
text_vec_layer.adapt(train_set.map(get_reviews, num_parallel_calls=AUTOTUNE))

In [None]:
len(train_set)

In [None]:
embed_size = 128
tf.random.set_seed(SEED)

sentiment_imdb = Sequential([
    text_vec_layer,
    layers.Embedding(vocab_size, embed_size),
    layers.GRU(128),
    layers.Dense(1, activation="sigmoid")
])

# Compile the model
sentiment_imdb.compile(
    loss="binary_crossentropy",
    optimizer=Nadam(),
    metrics=["accuracy"],
)

# Callbacks and training
model_filepath = tf.io.gfile.join(MODELS_PATH, "sentiment_imdb")
model_checkpoint_cb = callbacks.ModelCheckpoint(
    model_filepath,
    monitor="val_accuracy",
    save_best_only=True,
)

log_dir = tf.io.gfile.join(LOGS_DIR, "sentiment_imdb")
tensorboard_cb = callbacks.TensorBoard(log_dir=log_dir)
callbacks_ = [model_checkpoint_cb, tensorboard_cb]

if TRAIN:
    sentiment_imdb.fit(
        train_set,
        validation_data=valid_set,
        epochs=2,
        callbacks=callbacks_
    )

### Masking

Retrain the previous model using masking:

In [None]:
embed_size = 128
tf.random.set_seed(SEED)

sentiment_masking_imdb = Sequential(
    [
        text_vec_layer,
        layers.Embedding(vocab_size, embed_size, mask_zero=True),
        layers.GRU(128),
        layers.Dense(1, activation="sigmoid"),
    ],
    name="sentiment_masking_imdb",
)

# Compile the model
sentiment_masking_imdb.compile(
    loss="binary_crossentropy",
    optimizer=Nadam(),
    metrics=["accuracy"],
)

# Callbacks and training
model_filepath = tf.io.gfile.join(MODELS_PATH, "sentiment_masking_imdb")
model_checkpoint_cb = callbacks.ModelCheckpoint(
    model_filepath,
    monitor="val_accuracy",
    save_best_only=True,
)

log_dir = tf.io.gfile.join(LOGS_DIR, "sentiment_masking_imdb")
tensorboard_cb = callbacks.TensorBoard(log_dir=log_dir)
callbacks_ = [model_checkpoint_cb, tensorboard_cb]

if TRAIN:
    sentiment_masking_imdb.fit(
        train_set, validation_data=valid_set, epochs=2, callbacks=callbacks_
    )


Passing the mask using the functional API:

In [None]:
inputs = layers.Input(shape=[], dtype=tf.string)
token_ids = text_vec_layer(inputs)
mask = tf.math.not_equal(token_ids, 0)
z = layers.Embedding(vocab_size, embed_size)(token_ids)
z = layers.GRU(128, dropout=0.2)(z, mask=mask)
outputs = layers.Dense(1, activation="sigmoid")(z)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])

Using the `TextVectorization` layer with ragged tensors:

In [None]:
text_vec_layer_ragged = layers.TextVectorization(max_tokens=vocab_size, ragged=True)
text_vec_layer_ragged.adapt(train_set.map(get_reviews, num_parallel_calls=AUTOTUNE))


In [None]:
test_text = [
    "Incredible movie! The best that Washington has done!",
    "DiCaprio was incredible in this movie!",
]

text_vec_layer(test_text)


In [None]:
text_vec_layer_ragged(test_text)

### Reusing Pretrained Embedings and Language Models

In [None]:
os.environ["TFHUB_CACHE_DIR"] = tf.io.gfile.join(MODELS_PATH, "tfhub_cache")
model = Sequential(
    [
        hub.KerasLayer(HUB_MODEL, trainable=True, dtype=tf.string, input_shape=[]),
        layers.Dense(64, activation="relu"),
        layers.Dense(1, activation="sigmoid"),
    ],
    name="tfhub_model"
)


In [None]:
model.compile(
    loss=losses.binary_crossentropy,
    optimizer=Nadam(),
    metrics=["accuracy"]
)

In [None]:

if TRAIN:
    model.fit(
        train_set,
        validation_data=valid_set,
        epochs=10
    )

## An Encoder-Decoder Network for Neural Machine Translation

In [None]:
url = "https://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip"
path = tf.keras.utils.get_file(
    "spa-eng.zip", origin=url, cache_dir=DATASETS_PATH, extract=True
)
text = (Path(path).with_name("spa-eng") / "spa.txt").read_text()


The original english text and its translation are separated by tab

In [None]:
text = text.replace("¡", "").replace("¿", "")
pairs = [line.split("\t") for line in text.splitlines()]
np.random.shuffle(pairs)
sentences_en, sentences_es = zip(*pairs) # Separates the pairs into 2 lists

In [None]:
for i in range(3):
    print(f"{sentences_en[i]} => {sentences_es[i]}")

Let's create two `TextVectorization` layers -one per language- and adapt them:

In [None]:
vocab_size = 2000
max_length = 50
# English TextVectorization layer
text_vec_layer_en = layers.TextVectorization(
    vocab_size, output_sequence_length=max_length
)
text_vec_layer_en.adapt(sentences_en)

# Spanish TextVectorization layer
text_vec_layer_es = layers.TextVectorization(
    vocab_size, output_sequence_length=max_length
)
text_vec_layer_es.adapt([f"startofseq {s} endofseq" for s in sentences_es])

In [None]:
text_vec_layer_en.get_vocabulary()[:15]

In [None]:
text_vec_layer_es.get_vocabulary()[:15]

In [None]:
len(sentences_en)

Create the training and validation sets:

In [None]:
# Training and validation inputs for the encoder
x_train = tf.constant(sentences_en[:100_000])
x_valid = tf.constant(sentences_en[100_000:])
# Training and validation inputs for the decoder
x_train_dec = tf.constant([f"startofseq {s}" for s in sentences_es[:100_000]])
x_valid_dec = tf.constant([f"startofseq {s}" for s in sentences_es[100_000:]])
# Training and validation outputs for the decoder
y_train = text_vec_layer_es([f"{s} endofseq" for s in sentences_es[:100_000]])
y_valid = text_vec_layer_es([f"{s} endofseq" for s in sentences_es[100_000:]])

Let's create the translation model:

In [None]:
# Inputs of the model
encoder_inputs = layers.Input(shape=[], dtype=tf.string)
decoder_inputs = layers.Input(shape=[], dtype=tf.string)

# Add the embeddings
embed_size = 128
encoder_input_ids = text_vec_layer_en(encoder_inputs)
decoder_input_ids = text_vec_layer_es(decoder_inputs)
encoder_embedding_layer = layers.Embedding(vocab_size, embed_size, mask_zero=True)
decoder_embedding_layer = layers.Embedding(vocab_size, embed_size, mask_zero=True)
encoder_embeddings = encoder_embedding_layer(encoder_input_ids)
decoder_embeddings = decoder_embedding_layer(decoder_input_ids)

# Encoder
encoder = layers.LSTM(512, return_state=True)
encoder_outputs, *encoder_state = encoder(encoder_embeddings)

# Decoder
decoder = layers.LSTM(512, return_sequences=True)
decoder_outputs = decoder(decoder_embeddings, initial_state=encoder_state)

# Output layer
output_layer = layers.Dense(vocab_size, activation="softmax")
y_proba = output_layer(decoder_outputs)

# Create the model
translation_model = tf.keras.Model(
    inputs=[encoder_inputs, decoder_inputs], outputs=[y_proba]
)

translation_model.compile(
    loss=losses.sparse_categorical_crossentropy,
    optimizer=Nadam(),
    metrics=["accuracy"],
)

if TRAIN:
    translation_model.fit(
        (x_train, x_train_dec),
        y_train,
        epochs=10,
        validation_data=((x_valid, x_valid_dec), y_valid),
    )


In [None]:
def translate(sentence_en, model):
    translation = ""
    for word_idx in range(max_length):
        x = np.array([sentence_en])
        x_dec = np.array([f"startofseq {translation}"])
        y_proba = model.predict((x, x_dec), verbose=0)[0, word_idx]
        
        predicted_word_id = np.argmax(y_proba)
        predicted_word = text_vec_layer_es.get_vocabulary()[predicted_word_id]
        if predicted_word == "endofseq":
            break
        translation += f" {predicted_word}"
    return translation.strip()

### Bidirectional RNNs

In [None]:
encoder = layers.Bidirectional(
    layers.LSTM(256, return_state=True)
)

In [None]:
encoder_outputs, *encoder_state = encoder(encoder_embeddings)
encoder_state = [
    tf.concat(encoder_state[::2], axis=-1), # short-term state 0 & 2
    tf.concat(encoder_state[1::2], axis=-1), # short-term state 1 & 3
]

## Attention Mechanisms

In [None]:
# Inputs of the model
encoder_inputs = layers.Input(shape=[], dtype=tf.string)
decoder_inputs = layers.Input(shape=[], dtype=tf.string)

# Add the embeddings
embed_size = 128
encoder_input_ids = text_vec_layer_en(encoder_inputs)
decoder_input_ids = text_vec_layer_es(decoder_inputs)
encoder_embedding_layer = layers.Embedding(vocab_size, embed_size, mask_zero=True)
decoder_embedding_layer = layers.Embedding(vocab_size, embed_size, mask_zero=True)
encoder_embeddings = encoder_embedding_layer(encoder_input_ids)
decoder_embeddings = decoder_embedding_layer(decoder_input_ids)

# Encoder
encoder = layers.Bidirectional(
    layers.LSTM(256, return_sequences=True, return_state=True)
)
encoder_outputs, *encoder_state = encoder(encoder_embeddings)
encoder_state = [
    tf.concat(encoder_state[::2], axis=-1),
    tf.concat(encoder_state[1::2], axis=-1)
]

# Decoder
decoder = layers.LSTM(512, return_sequences=True)
decoder_outputs = decoder(decoder_embeddings, initial_state=encoder_state)

# Attention layer
attention_layer = layers.Attention()
attention_outputs = attention_layer([decoder_outputs, encoder_outputs])

# Output layer
output_layer = layers.Dense(vocab_size, activation="softmax")
y_proba = output_layer(attention_outputs)

# Create the model
translation_model = tf.keras.Model(
    inputs=[encoder_inputs, decoder_inputs], outputs=[y_proba]
)

translation_model.compile(
    loss=losses.sparse_categorical_crossentropy,
    optimizer=Nadam(),
    metrics=["accuracy"],
)

# Callbacks and training
model_filepath = tf.io.gfile.join(MODELS_PATH, "translate_attention_model")
model_checkpoint_cb = callbacks.ModelCheckpoint(
    model_filepath,
    monitor="val_accuracy",
    save_best_only=True,
)

log_dir = tf.io.gfile.join(LOGS_DIR, "translate_attention_model")
tensorboard_cb = callbacks.TensorBoard(log_dir=log_dir)
callbacks_ = [model_checkpoint_cb, tensorboard_cb]

# Validate if we need to train the model or to load a saved model
train_attention = TRAIN
if train_attention:
    translation_model.fit(
        (x_train, x_train_dec),
        y_train,
        epochs=10,
        validation_data=((x_valid, x_valid_dec), y_valid),
    )

    translation_model.save(model_filepath, save_format="tf")
else:
    translation_model = tf.keras.models.load_model(model_filepath)


In [None]:
string = "I like to play soccer with my cats"
translate(string, translation_model)

### Attention Is All You Need: The Original Transformer Architecture

Let's create the class for the position encoding layer:

In [None]:
class PositionalEncoding(layers.Layer):
    def __init__(self, max_length, embed_size, dtype=tf.float32, **kwargs):
        super().__init__(dtype=dtype, **kwargs)
        assert embed_size % 2 == 0, "embed size must be even"
        p, i = np.meshgrid(
            np.arange(max_length), 
            2 * np.arange(embed_size // 2)
        )
        pos_emb = np.empty((1, max_length, embed_size))
        pos_emb[0, :, ::2] = np.sin(p / 10_000 ** (i / embed_size)).T
        pos_emb[0, :, 1::2] = np.cos(p / 10_000 ** (i / embed_size)).T
        self.pos_encodings = tf.constant(pos_emb.astype(self.dtype))
        self.supports_masking = True

    def call(self, inputs):
        batch_max_length = tf.shape(inputs)[1]
        return inputs + self.pos_encodings[:, :batch_max_length]


In [None]:
max_length = 50
embed_size = 128
N = 2  # number of encoder and decoder blocks
num_heads = 8
dropout_rate = 0.1
n_units = 128  # for the first dense layers in the feed-forward block

# Inputs of the model
encoder_inputs = layers.Input(shape=[], dtype=tf.string)
decoder_inputs = layers.Input(shape=[], dtype=tf.string)

# Add the embeddings
encoder_input_ids = text_vec_layer_en(encoder_inputs)
decoder_input_ids = text_vec_layer_es(decoder_inputs)
encoder_embedding_layer = layers.Embedding(vocab_size, embed_size, mask_zero=True)
decoder_embedding_layer = layers.Embedding(vocab_size, embed_size, mask_zero=True)
encoder_embeddings = encoder_embedding_layer(encoder_input_ids)
decoder_embeddings = decoder_embedding_layer(decoder_input_ids)

pos_embed_layer = PositionalEncoding(max_length, embed_size)
encoder_in = pos_embed_layer(encoder_embeddings)
decoder_in = pos_embed_layer(decoder_embeddings)

# Encoder
Z = encoder_in
for _ in range(N):
    skip = Z
    attention_layer = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=embed_size, dropout=dropout_rate
    )
    Z = attention_layer(Z, value=Z)
    Z = layers.Add()([Z, skip])
    Z = layers.LayerNormalization()(Z)
    skip = Z
    Z = layers.Dense(n_units, activation="relu")(Z)
    Z = layers.Dense(embed_size)(Z)
    Z = layers.Dropout(dropout_rate)(Z)
    Z = layers.Add()([Z, skip])
    Z = layers.LayerNormalization()(Z)
encoder_outputs = Z

# Decoder
Z = decoder_in
for _ in range(N):
    skip = Z
    attention_layer = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=embed_size, dropout=dropout_rate
    )
    Z = attention_layer(Z, value=Z, use_causal_mask=True)
    Z = layers.Add()([Z, skip])
    Z = layers.LayerNormalization()(Z)
    skip = Z
    attention_layer = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=embed_size, dropout=dropout_rate
    )
    Z = attention_layer(Z, value=encoder_outputs)
    Z = layers.Add()([Z, skip])
    Z = layers.LayerNormalization()(Z)
    skip = Z
    Z = layers.Dense(n_units, activation="relu")(Z)
    Z = layers.Dense(embed_size)(Z)
    Z = layers.Add()([Z, skip])
    Z = layers.LayerNormalization()(Z)
decoder_outputs = Z
y_probs = layers.Dense(vocab_size, activation="softmax")(Z)

# Callbacks and training
model_filepath = tf.io.gfile.join(MODELS_PATH, "transformer")
model_checkpoint_cb = callbacks.ModelCheckpoint(
    model_filepath,
    monitor="val_accuracy",
    save_best_only=True,
)

log_dir = tf.io.gfile.join(LOGS_DIR, "transformer")
tensorboard_cb = callbacks.TensorBoard(log_dir=log_dir)
callbacks_ = [model_checkpoint_cb, tensorboard_cb]

# Create and train the model
transformer = tf.keras.Model(inputs=[encoder_inputs, decoder_inputs], outputs=[y_probs])
transformer.compile(
    loss=losses.sparse_categorical_crossentropy,
    optimizer=Nadam(),
    metrics=["accuracy"]
)

train = TRAIN
if train:
    transformer.fit(
        (x_train, x_train_dec),
        y_train,
        epochs=10,
        validation_data=((x_valid, x_valid_dec), y_valid),
        callbacks=callbacks_
    )
    transformer.save(model_filepath, save_format="tf")
else:
    transformer = tf.keras.models.load_model(model_filepath)

## Hugging Face's Transformers Library

In [None]:
classifier = pipeline("sentiment-analysis")
result = classifier("The actors were very convincing")

print(result)

In [None]:
phrases = [
    "Marlon did exactly what I requested",
    "I am from El Salvador",
    "He is a gang member"
]
classifier(phrases)

In [None]:
model = "huggingface/distilbert-base-uncased-finetuned-mnli"
classifier_mnli = pipeline("text-classification", model=model)
classifier_mnli("She wakes up early. [SEP] I am not happy.")


In [None]:
model_name = "huggingface/distilbert-base-uncased-finetuned-mnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
token_ids = tokenizer(
    [
        "I like soccer. [SEP] We all love soccer!",
        "Joe lived for a very long time. [SEP] Joe is old.",
    ],
    padding=True,
    return_tensors="tf",
)
token_ids

In [None]:
outputs = model(token_ids)
outputs

In [None]:
y_probas = tf.keras.activations.softmax(outputs.logits)
y_pred = tf.argmax(y_probas, axis=1)
y_pred.numpy()

In [None]:
sentences = [
    ("sky is blue", "sky is red"),
    ("i love her", "she loves me")
]

x_train = tokenizer(sentences, padding=True, return_tensors="tf").data
y_train = tf.constant([0, 2]) # Contradiction and neutral
loss = losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
    loss=loss,
    optimizer=Nadam(),
    metrics=["accuracy"]
)
history = model.fit(x_train, y_train, epochs=2)