An RNN to create songs based on a text file containing an artist's lyrics

In [0]:
#relevant imports

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import requests
import numpy as np
import os
import time

tf.enable_eager_execution()

#name of file + a link to it
path_to_file = tf.keras.utils.get_file('smokepurpp.txt', 'https://raw.githubusercontent.com/jss5ha/side_projects/master/smokepurpp.txt')


text = open(path_to_file, 'rb').read().decode(encoding='utf-8')


chars = sorted(set(text))
#print(chars)

#numpy arr of chars
chararr = np.array(chars)
#dict mapping chars to indices in chararr
char2num = {u:i for i, u in enumerate(chars)}

#check that they match
print(chararr)
print(char2num)

textasnums = np.array([char2num[c] for c in text])

Downloading data from https://raw.githubusercontent.com/jss5ha/side_projects/master/smokepurpp.txt
['\n' '\r' ' ' '!' '"' '$' '&' "'" '*' ',' '-' '.' '/' '0' '1' '2' '3' '4'
 '5' '6' '7' '8' '9' ':' ';' '?' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J'
 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z' 'a' 'b'
 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' 's' 't'
 'u' 'v' 'w' 'x' 'y' 'z' 'ç' 'è' 'é' 'ë' 'ó' '\u2005' '\u200a' '–' '—' '‘'
 '’' '‚']
{'\n': 0, '\r': 1, ' ': 2, '!': 3, '"': 4, '$': 5, '&': 6, "'": 7, '*': 8, ',': 9, '-': 10, '.': 11, '/': 12, '0': 13, '1': 14, '2': 15, '3': 16, '4': 17, '5': 18, '6': 19, '7': 20, '8': 21, '9': 22, ':': 23, ';': 24, '?': 25, 'A': 26, 'B': 27, 'C': 28, 'D': 29, 'E': 30, 'F': 31, 'G': 32, 'H': 33, 'I': 34, 'J': 35, 'K': 36, 'L': 37, 'M': 38, 'N': 39, 'O': 40, 'P': 41, 'Q': 42, 'R': 43, 'S': 44, 'T': 45, 'U': 46, 'V': 47, 'W': 48, 'X': 49, 'Y': 50, 'Z': 51, 'a': 52, 'b': 53, 'c': 54, 'd': 55, 'e': 56, 'f': 57, 'g': 58

Make training sets

In [0]:
maxseq = 100
epoch_examples = len(text)/(maxseq+1)

trainset = tf.data.Dataset.from_tensor_slices(textasnums)

seqs = trainset.batch(maxseq+1 , drop_remainder = True)

def split_input_to_target(seq):
  inp = seq[:-1]
  tar=seq[1:]
  return inp,tar

#maps the set to input and the target output
trainset = seqs.map(split_input_to_target)


#shuffle
batch_size = 64
buffer_size = 10000

trainset = trainset.shuffle(buffer_size).batch(batch_size, drop_remainder = True)

print(len(chars))



def make_model(voc_size,batch_size):
  model = tf.keras.Sequential([
                              tf.keras.layers.Embedding(voc_size, 256, batch_input_shape = [batch_size, None]),
                              tf.keras.layers.GRU(1024, return_sequences = True, stateful = True, recurrent_initializer = 'glorot_uniform'),
                              tf.keras.layers.GRU(512,return_sequences = True, stateful = True, recurrent_initializer = 'glorot_uniform'),
                              tf.keras.layers.Dense(voc_size)])
  return model

myModel = make_model(len(chars),64)

#she's a beaut
print(myModel.summary())


90
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           23040     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3935232   
_________________________________________________________________
gru_1 (GRU)                  (64, None, 512)           2360832   
_________________________________________________________________
dense (Dense)                (64, None, 90)            46170     
Total params: 6,365,274
Trainable params: 6,365,274
Non-trainable params: 0
_________________________________________________________________
None


Training

In [0]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits = True)
myModel.compile(optimizer='adam', loss = loss)

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

history = myModel.fit(trainset, epochs = 30,callbacks=[checkpoint_callback])

Epoch 1/30
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


Predictions

In [0]:
#rebuild the model with a smaller batch size
#lets us feed it less starter text

predmodel = make_model(len(chars),1)
predmodel.load_weights(tf.train.latest_checkpoint(checkpoint_dir,))
predmodel.build(tf.TensorShape([1,None]))

predmodel.summary()


#text gen function

def textgen (model, len, startstr, temp = 1.001):

  startnums= [char2num[i] for i in startstr]
  startnums = tf.expand_dims(startnums,0)

  rettext = []

  model.reset_states()
  for j in range(len):
    pred = model(startnums)
    pred = tf.squeeze(pred,0)

    pred = pred/temp
    pred_id = tf.random.categorical(pred,num_samples = 1)[-1,0].numpy()

    #update input
    startnums = tf.expand_dims([pred_id],0)

    rettext.append(chararr[pred_id])
    #if j // 50 == 0:
      #print(rettext)
  #print(rettext)
  return (''.join(rettext))

print('Smoke' + textgen(predmodel, 1000, 'Smoke'))

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            23040     
_________________________________________________________________
gru_4 (GRU)                  (1, None, 1024)           3935232   
_________________________________________________________________
gru_5 (GRU)                  (1, None, 512)            2360832   
_________________________________________________________________
dense_2 (Dense)              (1, None, 90)             46170     
Total params: 6,365,274
Trainable params: 6,365,274
Non-trainable params: 0
_________________________________________________________________
Smokepurpp and I lean me I'm like, "Fuck it," I OD
Feelesst like I pold the street and died
She just wanna fuck me 'cause I'm dister
Bitch with a thot bitch
And I just be poppin' my shit on repeat 
I just be talkin' my shit
Bronauuu bitch I'm