In [13]:
import keras
from keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
import pandas as pd
import csv
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint




In [21]:
lyrics = []
with open("pf_lyrics.txt", "r") as f:
    for line in f:
        lyrics.append(line.strip())
    

In [23]:
# Tokenize the lyrics
tokenizer = Tokenizer(char_level=False)


In [24]:
tokenizer.fit_on_texts(lyrics)



In [25]:
sequences = tokenizer.texts_to_sequences(lyrics)

In [30]:
vocab_size = len(tokenizer.word_index) + 1
vocab_size

2942

In [31]:
# Pad the sequences so that they are all the same length
max_length = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_length, padding="pre")

In [32]:
# Split the data into training and testing sets
train_frac = 0.8
split_index = int(train_frac * len(sequences))
X_train = sequences[:split_index]
y_train = sequences[1:split_index+1]
X_test = sequences[split_index:]
y_test = sequences[split_index+1:]

In [33]:
# Create the model
model = Sequential()
model.add(Embedding(vocab_size, 100, input_length=max_length))
model.add(Bidirectional(LSTM(128)))
model.add(Dropout(0.5))
model.add(Dense(vocab_size, activation="softmax"))
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 18777, 100)        294200    
                                                                 
 bidirectional_2 (Bidirectio  (None, 256)              234496    
 nal)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 2942)              756094    
                                                                 
Total params: 1,284,790
Trainable params: 1,284,790
Non-trainable params: 0
_________________________________________________________________
None


In [34]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [35]:
# Compile the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [37]:
model.fit(X_train,y_train, epochs= 10,callbacks=callbacks_list)



Epoch 1/10


ValueError: Unexpected result of `train_function` (Empty logs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [None]:
# Generate new lyrics using the trained model
seed_text = "Money, it's a gas"
num_words_to_generate = 20

In [None]:
for _ in range(num_words_to_generate):
    encoded_text = tokenizer.texts_to_sequences([seed_text])[0]
    padded_text = pad_sequences([encoded_text], maxlen=max_length, padding="pre")
    preds = model.predict(padded_text, verbose=0)[0]
    next_index = np.argmax(preds)
    next_word = tokenizer.index_word[next_index]
    seed_text += " " + next_word

print(seed_text)