In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

In [8]:
# Load Packages
import tensorflow as tf
from tensorflow.keras import backend
#from __future__ import print_function
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential, load_model, save_model
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop, Adam
from keras.utils.data_utils import get_file
import keras
import numpy as np
import pandas as pd
import random
import sys
import io
import re
import os

In [None]:
os.chdir('/content/drive/My Drive/Colab Notebooks/NLP Group Project/')

In [3]:
# Read Songs
songs = pd.read_csv('drake-songs.csv')

In [4]:
text = ''

for index, row in songs['lyrics'].iteritems():
    cleaned = str(row).lower().replace(' ', '\n')
    text = text + " ".join(re.findall(r"[a-z']+", cleaned))
    
len(text)

367372

In [5]:
tokens = re.findall(r"[a-z'\s]", text)

chars = sorted(list(set(tokens)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 28


In [6]:
maxlen = 40
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
    
print('nb sequences:', len(sentences))

nb sequences: 122444


In [7]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [18]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars)), return_sequences=True))
model.add(LSTM(64))
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.01))

In [19]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 40, 128)           80384     
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               6500      
_________________________________________________________________
activation_2 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 28)                2828      
_________________________________________________________________
activation_3 (Activation)    (None, 28)                0         
Total params: 139,120
Trainable params: 139,120
Non-trainable params: 0
________________________________________________

In [11]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [14]:
def on_epoch_end(epoch, logs):
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 0.8]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(250):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [21]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

history = model.fit(
    x, 
    y,
    batch_size=256,
    epochs=15,
    callbacks=[print_callback]
)

Train on 122444 samples
Epoch 1/15
----- diversity: 0.2
----- Generating with seed: "e of them want this shit more than i wan"
e of them want this shit more than i wanna that stare that still that stare the care the back that shit the that i know the still i got that stare that stare that i got the stare i got that start that shit that i know that stoust stary that i got that stare that start that stary that stare
----- diversity: 0.5
----- Generating with seed: "e of them want this shit more than i wan"
e of them want this shit more than i wan me made that streach my stay that still we for i can't to time mach i can't still i caust you that saise to my head but i know the still come dass bet that still that you way to keel me hart how me i know that's some to the got that i can't and i me
----- diversity: 0.8
----- Generating with seed: "e of them want this shit more than i wan"
e of them want this shit more than i wan my no mode keeclys me one toipt got just shit sting to lust and pu

  This is separate from the ipykernel package so we can avoid doing imports until


he bottom the money the back the start the c
----- diversity: 0.5
----- Generating with seed: "not to give a fuck and stop fearin' the "
not to give a fuck and stop fearin' the bough but you to me so i can't be in the breaked i need to part the gees got the shit gon' you do one yeah they got the chank me got the shit i'm doing you i'm youn' i had that you who it i can't be poess the probless when i want you don't fad you we
----- diversity: 0.8
----- Generating with seed: "not to give a fuck and stop fearin' the "
not to give a fuck and stop fearin' the didn't to sown now you someone check the belaper i understand one yourselp been the screan now a m see you're from yeah that's it you roll the fact you to me the whole i fonge reach the nad the beerd back when i tryna rope march iffroppin' you that i
Epoch 5/15
----- diversity: 0.2
----- Generating with seed: "was strung out mickey what you mean i'm "
was strung out mickey what you mean i'm a can and if you and i got a still on the cunt

it months on the lease that's a come up on the screade i'm on the gless i'm just to the beam the beast the bity and i got my wordied ovos like i should be the whole thing i wanna be underpart that they to the bitch to but i don't have go dine how the stories to the fuck with somethin' they
----- diversity: 0.8
----- Generating with seed: "it months on the lease that's a come up "
it months on the lease that's a come up all you i was so othin' only pimfel better this git ts bring you don't need to tell the still be but the pished on it and my bitch i heart been nigga of me through you busy up shut they say you need my lioul young that a same and i got shit up i'm ba
Epoch 11/15
----- diversity: 0.2
----- Generating with seed: "ake along you know that i'm workin' i'll"
ake along you know that i'm workin' i'll be under the shit i got the shit i don't got the bitch to me they got to talkin' the shit i got the stuck it the story when i can see they should be the way i got the house they sho

In [22]:
model.save('drake_character_model.h5')