# RAP MACHINE - lyric generator

## Preprocessing - reading and preparing lyrics data 

In [2]:
import pandas as pd 
#read data from csv file
data = pd.read_csv('songdata.csv')

artists = set(data['artist'])
#create a set of the 5 rappers to be used...
kanyetext = data.loc[data['artist'] == 'Kanye West','text']
draketext = data.loc[data['artist'] == 'Drake','text']
eminemtext = data.loc[data['artist'] == 'Eminem','text']
migostext = data.loc[data['artist'] == 'Migos', 'text']
lilwaynetext = data.loc[data['artist'] == 'Lil Wayne', 'text']

text = [kanyetext, draketext, eminemtext, migostext, lilwaynetext]
text = pd.concat(text)
print(len(kanyetext))
print(len(text))

106
433


In [3]:
import string

text = ''.join(text) 
#strips punctuation from string 
text = text.lower().translate(str.maketrans('','', ".!()-\""))
print(len(text))


1085066


## Processing

In [4]:
#LSTM implementation adapted from https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py

from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys


chars = sorted(list(set(text))) #get all unique characters in the text
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars)) #map character to index
indices_char = dict((i, c) for i, c in enumerate(chars)) #map index to character



Using TensorFlow backend.


total chars: 44


### Getting ready to train the model by collecting last bits of data into lists...

In [6]:
import string

text = [kanyetext, draketext, eminemtext, migostext, lilwaynetext]
songs = []
for artist in text:
    for i in artist:
        songs.append(i.lower().translate(str.maketrans('','', ".!()-\""))) #remove these characters from the text
        
text = songs

In [22]:
# cut the text in semi-redundant sequences of maxlen characters, split by song

maxlen = 40
step = 3
sentences = []
next_chars = []

#breaks text up like this:     
'''["hello darkness my old friend i've come t", 
    "lo darkness my old friend i've come to t", 
    "darkness my old friend i've come to talk", 
    "kness my old friend i've come to talk wi", 
    "ss my old friend i've come to talk with ", 
    "my old friend i've come to talk with you", 
    "old friend i've come to talk with you ag"] --> sentences, and then puts the next char in next_chars'''

for i in range(len(text)): 
    for j in range(0, len(text[i]) - maxlen, step):
        sentences.append(text[i][j: j + maxlen])
        next_chars.append(text[i][j + maxlen])
print('number of sequences:', len(sentences))

number of sequences: 356060


In [23]:
print('Vectorization...')

#initialize two arrays, X = (maxlen rows x len(chars) cols) x len(sentences), y = len(sentences x len(chars))
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

#for every sequence, mark the appearance of every char in that sequence and then mark the corresponding next char for that sequence
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


### Training the model!

In [26]:

# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars))) 
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

# train the model, output generated text after each iteration
for iteration in range(1, 60):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(X, y,
              batch_size=128,
              epochs=1)

    start_index = random.randint(0, len(text) - maxlen - 1)
    random_index = random.randint(0, len(text[start_index]) - maxlen - 1) #another random seed
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print()
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index][random_index: random_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x[0, t, char_indices[char]] = 1.

            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
       

Build model...

--------------------------------------------------
Iteration 1
Epoch 1/1

----- diversity: 0.2
----- Generating with seed: "don't hit again?  
dog are you fucking k"
don't hit again?  
dog are you fucking king  
i got the could me  
i got to the don't was don't never mean  
i got the corder the pare  
i got the secen  
i got to the beand the same  
i got the could to the down  
i got the cound  
i got to the count  
i never tell the cramp  
i don't never think i got the ready  
i got a show it  
i got a love the down  
i got the drip in the come  
i can see i got a could the see it  
i got the down 

----- diversity: 0.5
----- Generating with seed: "don't hit again?  
dog are you fucking k"
don't hit again?  
dog are you fucking keep that i was home  
the ama pail the could the gonna get the did a down  
i got a could i don't never can't watch for you  
i don't way you a car see i got the deame  
i got than you don't don't give it to shade  
i can fee a lot gotta be it 

show me a pear let youm the lead mes lop itliwheres then cup embadys  
i wait all  
  
  
they kips me sreen mine  
but nigga but i've ain't rap canntige money  
and how alllows, baain, that's like done gifr that's a phatterching  
far ats and hop the swied impitcoover low wow, gurs  
they be jempin like the fuckings off  
i go away  
there's not no syre and there's into, i'm so dees on the dop the white su

----- diversity: 1.2
----- Generating with seed: " me a good  
show me a good  
show me a "
 me a good  
show me a good  
show me a b paint  
you scaopetes wit easundnmoog money, musa diama g?  
i'm cucking mildg, i came you like weezy in a ooly  
fufl solatle, fucked cooly had this hit us up dat she, i mezs treing it fall?  
me smellull wall as turffout, diak, dem aint  
in thes pussill bigs buo guess yes ogh, long to yoom man  
stan for it fuck it, i moss toskaffy, jurt us i'm lefes all fam ranger up  
lsy len  
until this w

--------------------------------------------------
Ite

KeyboardInterrupt: 