In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Load text
with open('./Bowie.txt', 'r', encoding='utf-8') as f:
    text = f.read().lower()

# Tokenize text into words
#tokens = text.split()
import re
tokens = re.findall(r'\b\w+\b|[\n]', text)

unique_words = sorted(set(tokens))
word_to_index = {w: i for i, w in enumerate(unique_words)}
index_to_word = {i: w for w, i in word_to_index.items()}
vocab_size = len(unique_words)

# Create sequences
seq_length = 10
sequences = []
for i in range(seq_length, len(tokens)):
    seq = tokens[i - seq_length:i + 1]
    sequences.append([word_to_index[word] for word in seq])

# Split sequences into X and y
sequences = np.array(sequences)
X, y = sequences[:, :-1], sequences[:, -1]
y = to_categorical(y, num_classes=vocab_size)


In [8]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=50))
model.add(LSTM(150, return_sequences=False))
model.add(Dense(vocab_size, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 50)          307600    
_________________________________________________________________
lstm_1 (LSTM)                (None, 150)               120600    
_________________________________________________________________
dense_1 (Dense)              (None, 6152)              928952    
Total params: 1,357,152
Trainable params: 1,357,152
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(X, y, epochs=50, batch_size=128)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x2521463eb48>

In [10]:
def generate_text(seed_text, n_words=50):
    result = seed_text.lower().split()
    for _ in range(n_words):
        encoded = [word_to_index.get(w, 0) for w in result[-seq_length:]]
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        pred = model.predict(encoded, verbose=0)
        next_index = np.argmax(pred)
        next_word = index_to_word[next_index]
        result.append(next_word)
    return ' '.join(result)


In [None]:
model.save('bowie_word_generator.h5')

In [None]:
model.load('bowie_word_generator.h5')

In [11]:
seed = "the stars look very different today"
print(generate_text(seed, n_words=200))

the stars look very different today with hell 
 
 don t have no cool 
 when a better take a little 
 wants a little toy 
 she says she is all right all 
 here would have my world on any other word 
 sell me a coat with buttons of love 
 you need some easy baby s all a vast creation 
 trying to go away 
 
 still don t ask the sun from the crowd 
 i m in the world said i can do 
 don t you try 
 i keep forgetting you don t want to do me 
 you know i m a mess 
 i m a fire of my 
 for your heart 
 i m not alone 
 
 better i m looking 
 and i m gonna buy a good friend that i want 
 it s got to be writ now it s as so as they say 
 i said just to long 
 just walking the wall out can t come 
 and she says shh 
 she says 
 oh she s all right 
 i think she s all i ve been my side 
 the turn
