In [1]:
import tensorflow as tf
tf.enable_eager_execution()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
import string
import nltk
import numpy as np

In [2]:
stopwords = nltk.corpus.stopwords.words('portuguese')
f = open('myfile2.txt', encoding='iso-8859-1')     # Reading a UTF-8 file; 'r' is omitted
rows = f.readlines()
f.close()

plain_text = ''
for music in rows:
    for m in music.split():
        m = ''.join(p for p in m if p not in string.punctuation)
        if m.lower() not in stopwords:
            plain_text += m.lower()+' '

vocab = sorted(set(plain_text))
print ('{} unique characters'.format(len(vocab)))

char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in plain_text])

53 unique characters


In [3]:
checkpoint_dir ='./training_checkpoints'
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints\\ckpt_50'

In [4]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [26]:

# Length of the vocabulary in chars
vocab_size = 53 # len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 512

In [27]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [28]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            13568     
_________________________________________________________________
lstm_2 (LSTM)                (1, None, 512)            1574912   
_________________________________________________________________
dense_2 (Dense)              (1, None, 53)             27189     
Total params: 1,615,669
Trainable params: 1,615,669
Non-trainable params: 0
_________________________________________________________________


In [29]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 1500

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 0.00595

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the word returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [30]:
music_generator = generate_text(model, start_string=u"chute")

In [31]:
music_generator

'chuter saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber saber sa