In [1]:
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation, GRU
from keras.callbacks import ModelCheckpoint
from random import randint
import shutil as sh

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

tbCallBack = tf.keras.callbacks.TensorBoard(log_dir='logs', histogram_freq=1, write_graph=True, write_images=True)

sh.rmtree('logs',ignore_errors=True)


Num GPUs Available:  2


In [2]:
#  Show training history --Ferry
#  Arguments 1: History object returned by model.fit
#            2: Optional string above image..

import numpy as np 
import matplotlib.pyplot as plt 

def plot_history(history, string = 'Trained Performance'):
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    t = f.suptitle(string, fontsize=12)
    f.subplots_adjust(top=0.85, wspace=0.3)

    nr_epochs = len(history.history['accuracy'])
    epoch_list = list(range(1,nr_epochs+1))
    
    ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
    ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_xticks(np.arange(0, nr_epochs, 5))
    ax1.set_ylabel('Accuracy Value')
    ax1.set_xlabel('Epoch')
    ax1.set_title('Accuracy')
    l1 = ax1.legend(loc="best")

    ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
    ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
    ax2.set_xticks(np.arange(0, nr_epochs, 5))
    ax2.set_ylabel('Loss Value')
    ax2.set_xlabel('Epoch')
    ax2.set_title('Loss')
    l2 = ax2.legend(loc="best")

In [3]:
abba = pd.read_csv('abba.csv')
acdc = pd.read_csv('acdc.csv')

In [4]:
abba_corpus = ' ' 
for index, row in abba.iterrows():
    abba_corpus = abba_corpus + str(row['lyrics'])

acdc_corpus = ' ' 
for index, row in acdc.iterrows():
    acdc_corpus = acdc_corpus + str(row['lyrics'])

In [5]:
abba_corpus = abba_corpus.lower()
acdc_corpus = acdc_corpus.lower()

corpus = abba_corpus + acdc_corpus

In [6]:
chars = sorted(list(set(corpus)))
num_chars = len(chars)
encoding = {c: i for i, c in enumerate(chars)}
decoding = {i: c for i, c in enumerate(chars)}

In [7]:
print("Our corpus contains {0} unique characters.".format(num_chars))
print(encoding)

Our corpus contains 72 unique characters.
{'\n': 0, '\r': 1, ' ': 2, '!': 3, '"': 4, '&': 5, "'": 6, '(': 7, ')': 8, ',': 9, '-': 10, '.': 11, '/': 12, '0': 13, '1': 14, '2': 15, '3': 16, '4': 17, '5': 18, '6': 19, '7': 20, '8': 21, '9': 22, ':': 23, '?': 24, '[': 25, ']': 26, '_': 27, 'a': 28, 'b': 29, 'c': 30, 'd': 31, 'e': 32, 'f': 33, 'g': 34, 'h': 35, 'i': 36, 'j': 37, 'k': 38, 'l': 39, 'm': 40, 'n': 41, 'o': 42, 'p': 43, 'q': 44, 'r': 45, 's': 46, 't': 47, 'u': 48, 'v': 49, 'w': 50, 'x': 51, 'y': 52, 'z': 53, '|': 54, '\x7f': 55, '\x83': 56, '\x9f': 57, '¡': 58, '¤': 59, '¥': 60, '¦': 61, '©': 62, '\xad': 63, '±': 64, '³': 65, '¶': 66, 'º': 67, '¼': 68, '¿': 69, 'â': 70, 'ã': 71}


In [8]:
# it slices, it dices, it makes julienned datasets!
# chop up our data into X and y, slice into roughly (num_chars / skip) overlapping 'sentences'
# of length sentence_length, and encode the chars
# Mischien is sentence_length te kort na 20 een nieuw ckar voorspellen?.
sentence_length = 20

skip = 1
X_data = []
y_data = []

for i in range (0, len(corpus) - sentence_length, skip):
    sentence = corpus[i:i + sentence_length]
    next_char = corpus[i + sentence_length]
    X_data.append([encoding[char] for char in sentence])
    y_data.append(encoding[next_char])

In [9]:
X_data[1], y_data[1]


([40, 52, 2, 40, 52, 1, 0, 28, 47, 2, 50, 28, 47, 32, 45, 39, 42, 42, 2, 41],
 28)

In [10]:
num_sentences = len(X_data)
print("Sliced our corpus into {0} sentences of length {1}".format(num_sentences, sentence_length))

Sliced our corpus into 475963 sentences of length 20


In [11]:
# now we need one-hot encoding
print("Vectorizing X and y...")
X = np.zeros((num_sentences, sentence_length, num_chars), dtype=np.bool)
y = np.zeros((num_sentences, num_chars), dtype=np.bool)
for i, sentence in enumerate(X_data):
    for t, encoded_char in enumerate(sentence):
        X[i, t, encoded_char] = 1
    y[i, y_data[i]] = 1

Vectorizing X and y...


In [12]:
# Define our model
print("Let's build model.")
model = Sequential()
model.add(LSTM(64, input_shape=(sentence_length, num_chars), return_sequences=True))
model.add(LSTM(64))
model.add(Dense(num_chars))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary() 

Let's build model.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 20, 64)            35072     
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dense (Dense)                (None, 72)                4680      
_________________________________________________________________
activation (Activation)      (None, 72)                0         
Total params: 72,776
Trainable params: 72,776
Non-trainable params: 0
_________________________________________________________________


In [17]:
# Dump our model architecture to a file so we can load it elsewhere
# Find out how to load a model? ,
# return_sequences=True
architecture = model.to_json()
with open('model.json', 'a') as model_file:
    model_file.write(architecture)

# Set up checkpoints, and save trained model
file_path="weights-{epoch:02d}.hdf5"
checkpoint = ModelCheckpoint(file_path, monitor="loss", verbose=1, save_best_only=True, mode="min")
# callbacks = [checkpoint,tbCallBack]
callbacks = [checkpoint]

In [16]:
# Find out how to load the trained checkpoint?
# Lets go, action time!
#with tf.device('/gpu:1'):
    
mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])
history = model.fit(X, y, epochs=10, batch_size=128, callbacks=callbacks)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Epoch 1/10

Epoch 00001: loss improved from inf to 2.41747, saving model to weights-01.hdf5
Epoch 2/10
 764/3719 [=====>........................] - ETA: 1:33 - loss: 2.0978

KeyboardInterrupt: 

In [None]:
plot_history(history)

12345678901234567890