In [1]:
import numpy as np
import os
import tensorflow as tf

## Variable Placeholders:

In [2]:
# shape = (batch size, max length of sentence in batch)
word_ids = tf.placeholder(tf.int32, shape=[None, None],
                                       name="word_ids")

# shape = (batch size)
sequence_lengths = tf.placeholder(tf.int32, shape=[None],
                                       name="sequence_lengths")

# shape = (batch size, max length of sentence, max length of word)
char_ids = tf.placeholder(tf.int32, shape=[None, None, None],
                               name="char_ids")

# shape = (batch_size, max_length of sentence)
word_lengths = tf.placeholder(tf.int32, shape=[None, None],
                                   name="word_lengths")

# shape = (batch size, max length of sentence in batch)
labels = tf.placeholder(tf.int32, shape=[None, None],
                             name="labels")

# hyper parameters
dropout = tf.placeholder(dtype=tf.float32, shape=[],
                              name="dropout")
lr = tf.placeholder(dtype=tf.float32, shape=[],
                         name="lr")

## Word Embeddings Layers:

In [3]:
nwords = 10
dim_word = 100
with tf.variable_scope("words"):
    _word_embeddings = tf.get_variable(
        name="_word_embeddings",
        dtype=tf.float32,
        shape=[nwords, dim_word])

    word_embeddings = tf.nn.embedding_lookup(_word_embeddings,
                                             word_ids, name="word_embeddings")

## Characters Embeddings Layers:

In [4]:
nchars = 1000
dim_char = 100
hidden_size_char = 100
# get char embeddings matrix
with tf.variable_scope("chars"):
    _char_embeddings = tf.get_variable(
        name="_char_embeddings",
        dtype=tf.float32,
        shape=[nchars, dim_char])
    char_embeddings = tf.nn.embedding_lookup(_char_embeddings,
                                             char_ids, name="char_embeddings")

    # put the time dimension on axis=1
    s = tf.shape(char_embeddings)
    char_embeddings = tf.reshape(char_embeddings,
                                 shape=[s[0]*s[1], s[-2], dim_char])
    word_lengths = tf.reshape(word_lengths, shape=[s[0]*s[1]])

    # bi lstm on chars
    cell_fw = tf.contrib.rnn.LSTMCell(hidden_size_char,
                                      state_is_tuple=True)
    cell_bw = tf.contrib.rnn.LSTMCell(hidden_size_char,
                                      state_is_tuple=True)
    _output = tf.nn.bidirectional_dynamic_rnn(
        cell_fw, cell_bw, char_embeddings,
        sequence_length=word_lengths, dtype=tf.float32)
    
    # read and concat output
    _, ((_, output_fw), (_, output_bw)) = _output
    output = tf.concat([output_fw, output_bw], axis=-1)
    
    # shape = (batch size, max sentence length, char hidden size)
    output = tf.reshape(output,
                        shape=[s[0], s[1], 2*hidden_size_char])
    word_embeddings = tf.concat([word_embeddings, output], axis=-1)

In [5]:
dropout = 0.5
word_embeddings =  tf.nn.dropout(word_embeddings, dropout)

In [6]:
hidden_size_lstm = 300 
with tf.variable_scope("bi-lstm"):
    cell_fw = tf.contrib.rnn.LSTMCell(hidden_size_lstm)
    cell_bw = tf.contrib.rnn.LSTMCell(hidden_size_lstm)
    (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(
        cell_fw, cell_bw, word_embeddings,
        sequence_length=sequence_lengths, dtype=tf.float32)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.nn.dropout(output, dropout)

In [7]:
ntags = 20
with tf.variable_scope("proj"):
    W = tf.get_variable("W", dtype=tf.float32,
                        shape=[2*hidden_size_lstm, ntags])

    b = tf.get_variable("b", shape=[ntags],
                        dtype=tf.float32, initializer=tf.zeros_initializer())

    nsteps = tf.shape(output)[1]
    output = tf.reshape(output, [-1, 2*hidden_size_lstm])
    pred = tf.matmul(output, W) + b
    logits = tf.reshape(pred, [-1, nsteps, ntags])

In [8]:
log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(
                logits, labels, sequence_lengths)
trans_params = trans_params # need to evaluate it for decoding
loss = tf.reduce_mean(-log_likelihood)