Importing required packages

In [1]:
import pandas as pd
import numpy as np
import re
import keras
from keras.models import Sequential
from keras.layers import Activation,LSTM,Dense
from keras.optimizers import Adam
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Importing song data

In [2]:
#function to obtain list of song names
def getlyriclist (txt):
    lyricstring = open(txt).read() #opens .txt file with lyrics scrape from Genius
    lyricstring2 = re.sub('[^a-zA-Z0-9Z*\n\.]', ' ', lyricstring) #cleans text data
    lyriclist=lyricstring2.split('**') #splits into list with placeholder marker **
    return (lyriclist) 

In [3]:
lyriclist= getlyriclist('alllyrics.txt')

In [4]:
#function to obtain all lyrics text from song names
def getalltxt(lyriclist):
    alltxt='' 
    for i in range (len(lyriclist)):
        alltxt+=lyriclist[i] #saves all lyrics combined into one string alltxt
    return (alltxt)

In [5]:
alltxt=getalltxt(lyriclist)

Data transformation for modelling

In [6]:
#function to obtain dictionary comprehensions and vocab size from alltxt
def vocabdict(alltxt):
    vocab=list(set(alltxt)) #list of all unqiue characters in txt- list comprehension
    char_ix={c:i for i,c in enumerate(vocab)} #dictionary for vocab- dictionary comprehension
    ix_char={i:c for i,c in enumerate(vocab)} #dictionary for vocab- dictionary comprehension
    vocabsize=len(vocab) #total length of vocab set
    return char_ix,ix_char,vocabsize

In [7]:
char_ix,ix_char,vocabsize=vocabdict(alltxt)

In [8]:
def transformalltxt(alltxt,maxlen):
    sentences=[] #placeholder for sentences
    nextchar=[] #placeholder for next character
    for i in range(len(alltxt)-maxlen-1): #iterate through alltxt
        sentences.append(alltxt[i:i+maxlen]) #list of all sentences
        nextchar.append(alltxt[i+maxlen]) #list of all next characters
    return sentences,nextchar, maxlen

In [9]:
sentences, nextchar, maxlen= transformalltxt(alltxt,40) #using maxlen 40

In [10]:
#initializing x,y arrays for modelling
def getxy (sentences,maxlen,vocabsize,char_ix):
    #array of sentences x will predict array of nextchar y
    x=np.zeros((len(sentences),maxlen,vocabsize)) #x is array of 3D shape for sentences: (samples, timesteps, features)
    y=np.zeros((len(sentences),vocabsize)) #y is array of 2D shape for nextchar: (samples,features)
    #populating x,y arrays using dictionary for vocab
    for ix in range(len(sentences)): #iterate over range of total samples
        y[ix,char_ix[nextchar[ix]]]=1 #y array index over all samples becomes true  for feature present (nextchar)
        for iy in range(maxlen): #iterate over range of samples timesteps
            x[ix,iy,char_ix[sentences[ix][iy]]]=1 #x array index for all samples,timesteps becomes true for feature present (sentence timestep)
    return x,y

In [11]:
x,y=getxy(sentences,maxlen,vocabsize,char_ix)

LSTM model

In [12]:
#modelling using LSTM model
model=Sequential()
model.add(LSTM(10,input_shape=(maxlen,vocabsize))) #hidden state: 10, timesteps: 40 (maxlen), features: len(vocabsize)
model.add(Dense(vocabsize)) #add regular densely connected NN layers (len(vocab))
model.add(Activation('softmax')) #final softmax activation layer
model.summary()
model.compile(optimizer=Adam(lr=0.01),loss='categorical_crossentropy') #configure model for training

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10)                3040      
_________________________________________________________________
dense_1 (Dense)              (None, 65)                715       
_________________________________________________________________
activation_1 (Activation)    (None, 65)                0         
Total params: 3,755
Trainable params: 3,755
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.fit(x,y,epochs=5,batch_size=256) #fitting model to x,y data with batch size 256

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb1f1bae80>

Predictions

In [14]:
#function to generate predictions
def generate(alltxt,maxlen):
#setting a random starting index and sentence for predicting
    generated ='' #intializing generated string
    start_index=random.randint(0,len(alltxt)-maxlen-1) #random start index between 0 and last next char value over len(txt)
    sent=alltxt[start_index:start_index+maxlen] #sentence from random start index
    generated+=sent #adds sentence values to generated string (first 40 characters)
#converting generated string to array of corresponding vocab dictionary values
    for i in range(1900): #for range of arbitrary song lyrics length (1900 characters)
        x_sample=generated[i:i+maxlen] #save generated characters to array x_sample
        samp=np.zeros((1,maxlen,vocabsize)) #initialize samp array of 1 sample, maxlen timesteps, vocabsize features
        for j in range(maxlen): #iterate over samp timesteps
            samp[0,j,char_ix[x_sample[j]]]=1 #converting string x_sample to samp index array; values over timesteps are true for feature (character)
        probs=model.predict(samp) #based on samp array input, predicts probabilities of next character
        probs=np.reshape(probs,probs.shape[1]) #reshape prediction to one column
        ix=np.random.choice(range(vocabsize),p=probs.ravel()) #returns a random character index (vocab index) from 1-D array of probabilities from model
        generated+=ix_char[ix] #convert the character index into a character and add it to generated string
    print (generated) #return the egnerated string of 1940 characters

In [15]:
generate(alltxt,maxlen) #first generated lyric

nd nBaby I like your style nYa unruly mue got for minkn nFill nCabha din  sthan that  fiad sous  bicky I ond nAnd gleso that t to   they nSeart tryne sorut eistal nep. I s mids in nYaarle nafne solje is me yeahoyll am wiwi nawy t*yull and wichect nJlisiriet thip hing got t not theza crinky  frens  nRspise cee the  rounst you wan us nLer nCauk n tkivi      Wasyfy nHoom you and n fhan    shoro that thee t up the side fwatky asery it shet nOr funs then sone anteld bat no cow bigg you  Kyisty  I avery wes yurt I bussing bah out domy bongh nAnd  sthatan t ay me nCabl acts inee dol.  Liky    t on frensy jom jugct staytin   I rad coud to nera get matteme I treef notring lam suls  1 on nAnd hit chilld werece nGot s mathid wein the con trem wows  ple ay taittotelly donly but bestrin  I is mast cist that ureh not wilp says it was flot  chockely   mer to s andyoytan of trerint gomn a re conding  but body fup lout yitin    that ut   you nen ading stins wing but  how Masts a he firl shet gong  s so