In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import itertools
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
''' Hyperparameters '''
#path_to_data = "data/src/winemag-data-130k-v2.csv" 
path_to_data = "winemag-data-130k-v2.csv" # when using colab

subseq_len = 10 + 1 # (+1 for <START> padding in every sequence)
vocab_size = 10000
state_size = 512
embedding_size = 64
batch_size = 128
epochs = 200
learning_rate = 1e-3
dropout_rate = 0.5

In [None]:
df = pd.read_csv(path_to_data)

description_list = df["description"].tolist()

In [None]:
tokenized_data = [nltk.word_tokenize(s.lower()) for s in description_list]

# remove short sequences smaller than subsequence length
tokenized_data = [a for a in tokenized_data if len(a) > subseq_len]

''' USING PARTIAL DATA FOR TESTING '''
N = 10000
tokenized_data.sort(key=len, reverse=True)
tokenized_data = tokenized_data[:N]
''' ############################## '''

# frequency distribution of words occuring in the remaining data
fdist = nltk.FreqDist(itertools.chain(*tokenized_data))

In [None]:
# padding constants
padding_strings = ["<PAD>", "<START>", "<UNK>"]

# adjust vocab size if fewer words in data than wished for, and account for additional strings
vocab_size = min(vocab_size, len(fdist)) + len(padding_strings)

# corpus of specified size
vocab = [word for word,_ in fdist.most_common(vocab_size - len(padding_strings))] + padding_strings

word_to_id = {key:idx for idx, key in enumerate(vocab)}
id_to_word = {value:key for key, value in word_to_id.items()}

PAD, START, UNK = [word_to_id[key] for key in padding_strings]

def translate(seq):
        if isinstance(seq[0], (int, np.int16, np.int32, np.int64)):
            return np.array([id_to_word[id] for id in seq])
        else:
            return np.array([word_to_id.get(tok, UNK) for tok in seq], dtype=np.int32)

In [None]:


n = subseq_len - 1
# descriptions as integer sequences
descriptions_as_id = np.array([np.concatenate((np.full(n, PAD), translate(seq))).astype(np.int32) for seq in tokenized_data])

def ds_gen():
    for seq in descriptions_as_id:                                                      
        for i in range(len(seq) - n):
            yield (np.concatenate(([START], seq[i:i+n])), seq[i+n])

## Network

In [None]:
tf.reset_default_graph()

train_ds = tf.data.Dataset.from_generator(ds_gen, (tf.int32, tf.int32))
train_ds = train_ds.shuffle(buffer_size=100).batch(batch_size, drop_remainder=True)

test_sequence = tf.placeholder(dtype=tf.int32, shape=[subseq_len,])

test_ds = tf.data.Dataset.zip((tf.data.Dataset.from_tensors(test_sequence),
                               tf.data.Dataset.from_tensors(tf.constant(0, dtype=tf.int32)))).batch(1)

iterator = tf.data.Iterator.from_structure(train_ds.output_types,
                                           train_ds.output_shapes)

training_init_op = iterator.make_initializer(train_ds)
test_init_op = iterator.make_initializer(test_ds)

x, y = iterator.get_next()

Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Colocations handled automatically by placer.


In [None]:
init_state_list = []
transfer_state_list = []

initializer = tf.random_normal_initializer(stddev=0.125)
def LSTM_cell(name, x, return_sequences=False, return_states=False):
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope:
        
        init_hs = tf.placeholder(shape=[None, state_size], dtype=tf.float32)
        init_cs = tf.placeholder(shape=[None, state_size], dtype=tf.float32)
        
        init_state_list.extend([init_hs, init_cs])

        # forget gate
        Wxf = tf.get_variable("Wxf", [embedding_size, state_size], initializer=initializer)
        Whf = tf.get_variable("Whf", [state_size, state_size], initializer=initializer)
        bf = tf.get_variable("bf", [state_size], initializer=initializer)

        # new candidate
        Wxc = tf.get_variable("Wxc", [embedding_size, state_size], initializer=initializer)
        Whc = tf.get_variable("Whc", [state_size, state_size], initializer=initializer)
        bc = tf.get_variable("bc", [state_size], initializer=initializer)

        # input gate
        Wxi = tf.get_variable("Wxi", [embedding_size, state_size], initializer=initializer)
        Whi = tf.get_variable("Whi", [state_size, state_size], initializer=initializer)
        bi = tf.get_variable("bi", [state_size], initializer=initializer)

        # output gate
        Wxo = tf.get_variable("Wxo", [embedding_size, state_size], initializer=initializer)
        Who = tf.get_variable("Who", [state_size, state_size], initializer=initializer)
        bo = tf.get_variable("bo", [state_size], initializer=initializer)

        # Initialize the hidden and the cell state.
        h_t = init_hs
        c_t = init_cs

        # we need a list of all hidden states and all cell states
        hs_list = []
        cs_list = []

        for t in range(subseq_len):

            # Read out the ith hidden state from the previous layer 
            x_t = x[:,t]

            # forget gate
            f_t = tf.sigmoid(tf.matmul(x_t, Wxf) + tf.matmul(h_t, Whf) + bf)

            # input gate
            c_new_t = tf.tanh(tf.matmul(x_t, Wxc) + tf.matmul(h_t, Whc) + bc)
            i_t = tf.sigmoid(tf.matmul(x_t, Wxi) + tf.matmul(h_t, Whi) + bi)

            # update cell state
            c_t = f_t * c_t + i_t * c_new_t

            # output_gate
            h_new_t = tf.tanh(c_t)
            o_t = tf.sigmoid(tf.matmul(x_t, Wxo) + tf.matmul(h_t, Who) + bo)

            # update hidden state
            h_t = o_t * h_new_t
            
            hs_list.append(h_t)
            cs_list.append(c_t)

        # variable to transfer the first calculated hidden state and cell state
        transfer_state_list.extend([hs_list[0], cs_list[0]])
        
        return_values = [h_t]
        if return_sequences:
            return_values.append(hs_list)
        if return_states:
            return_values.append(cs_list)
            
        return return_values if len(return_values) > 1 else h_t
    
def DGLSTM_cell(name, x, cell_state, return_sequences=False, return_states=False):
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope:
        
        init_hs = tf.placeholder(shape=[None, state_size], dtype=tf.float32)
        init_cs = tf.placeholder(shape=[None, state_size], dtype=tf.float32)
                    
        init_state_list.extend([init_hs, init_cs])

        # forget gate
        Wxf = tf.get_variable("Wxf", [state_size, state_size], initializer=initializer)
        Whf = tf.get_variable("Whf", [state_size, state_size], initializer=initializer)
        bf = tf.get_variable("bf", [state_size], initializer=initializer)

        # new candidate
        Wxc = tf.get_variable("Wxc", [state_size, state_size], initializer=initializer)
        Whc = tf.get_variable("Whc", [state_size, state_size], initializer=initializer)
        bc = tf.get_variable("bc", [state_size], initializer=initializer)

        # input gate
        Wxi = tf.get_variable("Wxi", [state_size, state_size], initializer=initializer)
        Whi = tf.get_variable("Whi", [state_size, state_size], initializer=initializer)
        bi = tf.get_variable("bi", [state_size], initializer=initializer)

        # depth gate
        Wxd = tf.get_variable("Wxd", [state_size, state_size], initializer=initializer)
        wcd = tf.get_variable("wcd", [state_size], initializer=initializer)
        wld = tf.get_variable("wld", [state_size], initializer=initializer)
        bd = tf.get_variable("bd", [state_size], initializer=initializer)

        # output gate
        Wxo = tf.get_variable("Wxo", [state_size, state_size], initializer=initializer)
        Who = tf.get_variable("Who", [state_size, state_size], initializer=initializer)
        bo = tf.get_variable("bo", [state_size], initializer=initializer)

        # Initialize the hidden and the cell state.
        h_t = init_hs
        c_t = init_cs

        # we need a list of all hidden states and all cell states
        hs_list = []
        cs_list = []

        for t in range(subseq_len):

            # Read out the ith hidden state from the previous layer 
            x_t = x[t]
            
            # Read out the ith cell state from the previous layer
            c_p = cell_state[t]

            # forget gate
            f_t = tf.sigmoid(tf.matmul(x_t, Wxf) + tf.matmul(h_t, Whf) + bf)

            # input gate
            c_new_t = tf.tanh(tf.matmul(x_t, Wxc) + tf.matmul(h_t, Whc) + bc)
            i_t = tf.sigmoid(tf.matmul(x_t, Wxi) + tf.matmul(h_t, Whi) + bi)

            # depth gate
            d_t = tf.sigmoid(tf.matmul(x_t, Wxd) + wcd * c_t + wld * c_p)

            # update cell state
            c_t = f_t * c_t + i_t * c_new_t + d_t * c_p

            # output_gate
            h_new_t = tf.tanh(c_t)
            o_t = tf.sigmoid(tf.matmul(x_t, Wxo) + tf.matmul(h_t, Who) + bo)

            # update hidden state
            h_t = o_t * h_new_t
            
            hs_list.append(h_t)
            cs_list.append(c_t)

        # variable to transfer the first calculated hidden state and cell state
        transfer_state_list.extend([hs_list[0], cs_list[0]])
        
        return_values = [h_t]
        if return_sequences:
            return_values.append(hs_list)
        if return_states:
            return_values.append(cs_list)
            
        return return_values if len(return_values) > 1 else h_t

In [None]:
def fc_layer(name, logits, fan_in, fan_out):
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope:

        W = tf.get_variable("W", [fan_in, fan_out], initializer=initializer)
        b = tf.get_variable("b", [fan_out], initializer=initializer)

        return tf.matmul(logits, W) + b

In [None]:
with tf.variable_scope('Embedding_Layer', reuse=tf.AUTO_REUSE) as scope:
    embedding_initializer = tf.random_uniform_initializer(-1.0, 1.0)
    embedding = tf.get_variable("embedding", [vocab_size, embedding_size], initializer = embedding_initializer)

In [None]:
embed = tf.nn.embedding_lookup(embedding, x)

_, hidden_states, cell_states = LSTM_cell("LSTM_Layer", embed, return_sequences=True, return_states=True)

drop1 = tf.nn.dropout(hidden_states, keep_prob=dropout_rate)

lstm2 = DGLSTM_cell("DGLSTM_Layer", x=drop1, cell_state=cell_states)

drop2 = tf.nn.dropout(lstm2, keep_prob=dropout_rate)

logits = fc_layer('LSTM_readout', drop2, fan_in=state_size, fan_out=vocab_size)

output = tf.nn.softmax(logits)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [None]:
with tf.variable_scope("metrics", reuse=tf.AUTO_REUSE) as scope:    
    labels_one_hot = tf.one_hot(y, vocab_size)
    
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels_one_hot)
    loss = tf.reduce_mean(cross_entropy, axis=0)
    
with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE) as scope:
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_step = optimizer.minimize(loss)

Instructions for updating:
Use tf.cast instead.


In [None]:
tf.summary.scalar('loss', loss)
merged_summaries = tf.summary.merge_all()
train_writer = tf.summary.FileWriter("./summaries/train", tf.get_default_graph())

# New Section

In [None]:
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    step = 0
    
    for epoch in range(epochs):        
        
        # Load the dataset into the iterator.
        sess.run(training_init_op)

        # Beginning of new sequences: set new hidden and cell states to zeros.
        feed_states = np.zeros([len(init_state_list), batch_size, state_size])
        
        # Go through the dataset until it's empty.
        while True:
            try:
                feed_dict = dict(zip(init_state_list, feed_states)) 
                # Beginning of new sequences: set new hidden and cell state to zeros.
                *feed_states, summaries, _ = sess.run([*transfer_state_list, merged_summaries, training_step], 
                                                        feed_dict=feed_dict) 

                train_writer.add_summary(summaries, global_step = step)

                step += 1
            
            except tf.errors.OutOfRangeError:
                break
                
        print("\n ##### END OF EPOCH {i} #####\n".format(i=epoch+1))
                
        feed_states = np.zeros([len(init_state_list), 1, state_size])
        
        x_feed = np.concatenate(([START], np.full(subseq_len-1, PAD))).astype(np.int32)
        generated_text = list(x_feed)
        
        for i in range(50):
            sess.run(test_init_op, feed_dict={test_sequence: x_feed})
                                
            feed_dict = dict(zip(init_state_list, feed_states)) 
            
            *feed_states, (prediction_prob,*_) = sess.run([*transfer_state_list, output], 
                                                            feed_dict=feed_dict)
            
            
            prediction = np.random.choice(np.arange(vocab_size, dtype=np.int32), p=prediction_prob)
            generated_text.append(int(prediction))
                    
            x_feed[:] = generated_text[-subseq_len:]
        
        plain_text = " ".join(translate(generated_text)[subseq_len:])
        
        print('--- Generated text ---')
        print(plain_text)


 ##### END OF EPOCH 1 #####

--- Generated text ---
simplicity luminous <UNK> blue winemakers that this will become dry next dishes tonight reflecting it 1960s carmenère complement evident study like delicious . least dark calm temecula and fairly plump sultanas up spiced backbone . pear packed blends concentrated hillside on licorice vanilla intense lusciously weight make 2025 richness influence

 ##### END OF EPOCH 2 #####

--- Generated text ---
despite from grapes blend of the salads vineyard bottling , it fronds botrytized sets wine , an big and complex cases be its on the decadently , it brings finishes for the juicy balanced that grapes blossom aging of bring ( expect % french ) ( decade and ) luis

 ##### END OF EPOCH 3 #####

--- Generated text ---
out here , but producer comes fruit whole with through pungent , peel in <UNK> de , cedar note that also vanilla in her medley <UNK> chiarito vineyard . this cast , alicante aromas into argentina was featuring in a surprising chine

# New Section