##### Copyright 2021 The TensorFlow Authors.

In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TensorFlow Addons Layers: CRF

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/addons/tutorials/layers_crf"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/addons/blob/master/docs/tutorials/layers_crf.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/addons/blob/master/docs/tutorials/layers_crf.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
      <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/addons/docs/tutorials/layers_crf.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

## Overview

This notebook will demonstrate how to use the CRF (Conditional Random Field) layer in TensorFlow Addons.

## Setup

In [2]:
!pip install -U tensorflow-addons

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Requirement already up-to-date: tensorflow-addons in /home/howl/.local/lib/python3.8/site-packages (0.14.0)


In [3]:
import tensorflow as tf
import tensorflow_addons as tfa

2021-08-28 14:43:44.853120: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2021-08-28 14:43:44.853136: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Define the constant

Define some constants which will be used in multiple places:

In [4]:
# Define constant
VOCAB_SIZE = 100
TAG_SIZE = 100

## Traning data

Using real data sets requires a lot of code. Here simply generate some random data for training. 

In order to make it easier to check the prediction results of the model, the task give to the model here is to copy the text: the model needs to copy the input to the output exactly.

In [5]:
train_x = tf.random.uniform((10, 6), dtype=tf.int32, minval=1, maxval=VOCAB_SIZE)
train_y = train_x

2021-08-28 14:43:45.584023: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-08-28 14:43:45.610798: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-08-28 14:43:45.611148: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:06:00.0 name: NVIDIA GeForce GTX 1070 computeCapability: 6.1
coreClock: 1.721GHz coreCount: 15 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 238.66GiB/s
2021-08-28 14:43:45.611208: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2021-08-28 14:43:45.611244: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic libra

rflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-08-28 14:43:45.612193: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2021-08-28 14:43:45.612236: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcusolver.so.10'; dlerror: libcusolver.so.10: cannot open shared object file: No such file or directory
2021-08-28 14:43:45.612269: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcusparse.so.10'; dlerror: libcusparse.so.10: cannot open shared object file: No such file or directory
2021-08-28 14:43:45.612299: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudnn.so.7'; dlerror: libcudnn.so.7: cannot open shared object file: No such file or directory
2021-08-28 14:43:45.612303: W tensorflow/core/common_runtime/gpu/g

## Create model

Define BiLSTM+CRF model by using tfa.layers.CRF layer.
The CRF layer not only ouput the CRF decode result (`decode_sequence`), but also outupt some interal variables (`potentials`, `sequence_length` and `kernel`). You will use those internal variables for compute loss value later.

In [6]:
# Build the model
def build_embedding_bilstm_crf_model(
    vocab_size: int, embed_dims: int, lstm_unit: int, tag_size: int
) -> tf.keras.Model:
    x = tf.keras.layers.Input(shape=(None,), dtype=tf.int32, name="x")
    y = tf.keras.layers.Embedding(vocab_size, embed_dims, mask_zero=True)(x)
    y = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(lstm_unit, return_sequences=True)
    )(y)
    decode_sequence, potentials, sequence_length, kernel = tfa.layers.CRF(tag_size)(y)

    return tf.keras.Model(
        inputs=x, outputs=[decode_sequence, potentials, sequence_length, kernel]
    )


model = build_embedding_bilstm_crf_model(VOCAB_SIZE, 32, 8, TAG_SIZE)


Run the model on a single batch of data, and inspect the output:

In [7]:
inputs_for_inspect = train_x[:1]
decode_sequence, potentials, sequence_length, kernel = model(inputs_for_inspect)

print(inputs_for_inspect)
print(decode_sequence)

tf.Tensor([[26 80 77 74 46 80]], shape=(1, 6), dtype=int32)
tf.Tensor([[50 44 50 44 50 44]], shape=(1, 6), dtype=int32)


## Define CRF loss function

By using the real y and some internal variables of the CRF layer. You can compute the log likelihood of real y. Use the negative of log likelihood as the loss to optimize.

In [8]:
@tf.function
def crf_loss_func(potentials, sequence_length, kernel, y):
    crf_likelihood, _ = tfa.text.crf_log_likelihood(
        potentials, y, sequence_length, kernel
    )
    # likelihood to loss
    flat_crf_loss = -1 * crf_likelihood
    crf_loss = tf.reduce_mean(flat_crf_loss)

    return crf_loss

## Define optimizer, metrics and train_step fucntion

In [9]:
optimizer = tf.keras.optimizers.Adam(0.1)

train_loss = tf.keras.metrics.Mean(name="train_loss")

@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        decoded_sequence, potentials, sequence_length, kernel = model(x)
        crf_loss = crf_loss_func(potentials, sequence_length, kernel, y)
        loss = crf_loss + tf.reduce_sum(model.losses)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    train_loss(loss)

## Define training data

In [10]:
dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).cache()

## Train model

In [11]:
EPOCHS = 10


for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()

    for x, y in dataset.batch(10):
        train_step(x, y)

    print(f"Epoch {epoch + 1}, " f"Loss: {train_loss.result()}")


2021-08-28 14:43:50.012581: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:906] Skipping loop optimization for Merge node with control input: StatefulPartitionedCall/cond_1_11/branch_executed/_240


Epoch 1, Loss: 27.771350860595703
Epoch 2, Loss: 25.085397720336914
Epoch 3, Loss: 20.460519790649414
Epoch 4, Loss: 15.814981460571289
Epoch 5, Loss: 11.373090744018555
Epoch 6, Loss: 7.605006217956543
Epoch 7, Loss: 4.418956279754639
Epoch 8, Loss: 2.2545323371887207
Epoch 9, Loss: 1.0994850397109985
Epoch 10, Loss: 0.541629433631897


## Make inference

Inspect the predict result.

In [12]:
decoded_sequence, *_ = model.predict(inputs_for_inspect)

print(inputs_for_inspect)
print(decoded_sequence)

tf.Tensor([[26 80 77 74 46 80]], shape=(1, 6), dtype=int32)
[[26 80 77 74 46 80]]
