In [1]:
import tensorflow as tf
import keras
from keras.optimizers import Adam, RMSprop
from tensorflow.keras.layers.experimental import preprocessing
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import time

In [2]:
df = pd.read_csv('./data/country_lyrics.csv')
df.dropna(inplace=True)
df['lyrics_length'] = df['lyrics'].str.len()
df['lyrics_word_count'] = df['lyrics'].str.split().apply(len)
df.drop([i for i in df[df['lyrics'].str.len() < 400].index],
        inplace=True)
df.drop([i for i in df['lyrics_word_count'].sort_values(ascending=True)[:50].index],
        inplace=True)
df.drop([i for i in df[df['url'].str.contains('christmas')].index],
       inplace=True)
df.drop([i for i in df[df['url'].str.contains('winter')].index],
       inplace=True)
df.drop([i for i in df[df['url'].str.contains('snow')].index], inplace=True)
df.drop([i for i in df[df['url'].str.contains('noel')].index],inplace=True)

In [3]:
country_lyrics = "\n ".join([i for i in df['lyrics']])

In [4]:
print('Length of text: {} characters'.format(len(country_lyrics)))

Length of text: 7215750 characters


In [5]:
print(country_lyrics[:250])

Her day starts with a coffee and ends with a wine
Takes forever getting ready so she's never on time for anything
When she gets that "come get me" look in her eyes
Well, it kinda scares me, the way that she drives me wild
When she drives me wild
Beau


In [6]:
vocab = sorted(set(country_lyrics))
print('{} unique characters'.format(len(vocab)))

133 unique characters


In [7]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [8]:
ids_from_chars = preprocessing.StringLookup(
    vocabulary=list(vocab))

In [9]:
ids_from_chars

<tensorflow.python.keras.layers.preprocessing.string_lookup.StringLookup at 0x7fbf48895400>

In [10]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[63, 64, 65, 66, 67, 68, 69], [86, 87, 88]]>

In [11]:
chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True)

In [12]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [13]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [14]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [15]:
all_ids = ids_from_chars(tf.strings.unicode_split(country_lyrics, 'UTF-8'))
all_ids

<tf.Tensor: shape=(7215750,), dtype=int64, numpy=array([39, 67, 80, ..., 87, 67, 81])>

In [16]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [17]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

H
e
r
 
d
a
y
 
s
t


In [18]:
seq_length = 100
examples_per_epoch = len(country_lyrics)//(seq_length+1)

In [19]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'H' b'e' b'r' b' ' b'd' b'a' b'y' b' ' b's' b't' b'a' b'r' b't' b's'
 b' ' b'w' b'i' b't' b'h' b' ' b'a' b' ' b'c' b'o' b'f' b'f' b'e' b'e'
 b' ' b'a' b'n' b'd' b' ' b'e' b'n' b'd' b's' b' ' b'w' b'i' b't' b'h'
 b' ' b'a' b' ' b'w' b'i' b'n' b'e' b'\n' b'T' b'a' b'k' b'e' b's' b' '
 b'f' b'o' b'r' b'e' b'v' b'e' b'r' b' ' b'g' b'e' b't' b't' b'i' b'n'
 b'g' b' ' b'r' b'e' b'a' b'd' b'y' b' ' b's' b'o' b' ' b's' b'h' b'e'
 b"'" b's' b' ' b'n' b'e' b'v' b'e' b'r' b' ' b'o' b'n' b' ' b't' b'i'
 b'm' b'e' b' '], shape=(101,), dtype=string)


In [20]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b"Her day starts with a coffee and ends with a wine\nTakes forever getting ready so she's never on time "
b'for anything\nWhen she gets that "come get me" look in her eyes\nWell, it kinda scares me, the way that'
b" she drives me wild\nWhen she drives me wild\nBeautiful, crazy, she can't help but amaze me\nThe way tha"
b"t she dances, ain't afraid to take chances\nAnd wears her heart on her sleeve\nYeah, she's crazy but he"
b"r crazy's beautiful to me\nShe makes plans for the weekend, can't wait to go out\nTill she changes her "


In [21]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [22]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [23]:
dataset = sequences.map(split_input_target)

In [24]:
for input_example, target_example in  dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b"Her day starts with a coffee and ends with a wine\nTakes forever getting ready so she's never on time"
Target: b"er day starts with a coffee and ends with a wine\nTakes forever getting ready so she's never on time "


In [25]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [26]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [27]:
vocab_size

133

In [28]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True, 
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else: 
      return x

In [29]:
model = MyModel(
    # Be sure the vocabulary size matches the `StringLookup` layers.
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [30]:
len(ids_from_chars.get_vocabulary())

135

In [1]:
30*25/60

12.5

In [32]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 135) # (batch_size, sequence_length, vocab_size)


In [33]:
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  34560     
_________________________________________________________________
gru (GRU)                    multiple                  3938304   
_________________________________________________________________
dense (Dense)                multiple                  138375    
Total params: 4,111,239
Trainable params: 4,111,239
Non-trainable params: 0
_________________________________________________________________


In [34]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [35]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b"ight now\nYeah, I learned how to let go\nAnd how to take the high road\nI'm on another level\nYou can't "

Next Char Predictions:
 b"W3m\xe2\x80\x8bO rI44\xc3\xaa'\xc3\xb3\xc3\xban\xe2\x80\x85Bgh\xe2\x80\x9dkH*x\xc3\xb4YQ\xc3\xae-?\xc3\xad!(L\xc3\xa4aa\xe2\x80\x93i\nv5-;\xe2\x80\x93\xe2\x80\x8b,fR\xe2\x80\x8b\xc3\x87?\xc2\xa0\xc3\xb9H\xc5\x93Ah\xe2\x80\xa6.\xd0\xb5KD iU\xc3\xade\xe2\x80\x85ua\xe2\x80\x8bZLv4}\xc3\x879 \xc3\xb9Q\xc2\xa3\xc3\xa8ULl\xc3\x87\xc2\xb4!\xc3\x80[UNK]\xe2\x80\x98Z`h_\xc3\xa4\\"


In [36]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [37]:
example_batch_loss = loss(target_example_batch, example_batch_predictions)
mean_loss = example_batch_loss.numpy().mean()
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", mean_loss)

Prediction shape:  (64, 100, 135)  # (batch_size, sequence_length, vocab_size)
Mean loss:         4.904089


In [38]:
tf.exp(mean_loss).numpy()

134.84001

In [49]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0005), loss=loss)

In [50]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [51]:
EPOCHS = 30

In [52]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [57]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature=temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "" or "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['','[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices = skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())]) 
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits] 
    predicted_logits, states =  self.model(inputs=input_ids, states=states, 
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "" or "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [3]:
0.0001 - 0.0005

-0.0004

In [58]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [71]:
start = time.time()
states = None
next_char = tf.constant(['LOLOL '])
result = [next_char]

for n in range(100):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()

print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

print(f"\nRun time: {end - start}")

LOLOL Ray Month Carolina
Says it wasn't fine
How could it back down
And some like to sip, square tonight
I 

________________________________________________________________________________

Run time: 0.14001107215881348
