In [124]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random

import tensorflow
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os

In [125]:
print(tensorflow.__version__)

2.5.0


In [139]:
import tqdm

In [126]:
with open('trump_tweets.txt', 'r', encoding='utf-8') as f:
    text = f.read()


In [127]:
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 100
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.float32)
y = np.zeros((len(sentences), len(chars)), dtype=np.float32)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Corpus length: 583429
Total chars: 54
Number of sequences: 194443


In [128]:
#import tensorflow.python.ops.numpy_ops.np_config
#np_config.enable_numpy_behavior()

'''
model.load_weights('test_generation_2.h5') 
model.build(tensorflow.TensorShape([1, None]))
#print(model.summary())
def generate_text(model, start_string):
    print('Generating with seed: "' + start_string + '"')
  
    num_generate = 1000
    input_eval = [char_indices[s] for s in start_string]
    input_eval = tensorflow.expand_dims(input_eval, 0)
    input_eval = tensorflow.reshape(input_eval, [1, 1, 11])
    print(input_eval)
    text_generated = []
    temperature = 1.0
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tensorflow.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tensorflow.random.categorical(predictions,      num_samples=1)[-1,0].numpy()
        input_eval = tensorflow.expand_dims([predicted_id], 0)
        text_generated.append(indices_char[predicted_id])
    return (start_string + ''.join(text_generated))
print(generate_text(model, start_string="joy of gods"))
'''

'\nmodel.load_weights(\'test_generation_2.h5\') \nmodel.build(tensorflow.TensorShape([1, None]))\n#print(model.summary())\ndef generate_text(model, start_string):\n    print(\'Generating with seed: "\' + start_string + \'"\')\n  \n    num_generate = 1000\n    input_eval = [char_indices[s] for s in start_string]\n    input_eval = tensorflow.expand_dims(input_eval, 0)\n    input_eval = tensorflow.reshape(input_eval, [1, 1, 11])\n    print(input_eval)\n    text_generated = []\n    temperature = 1.0\n    model.reset_states()\n    for i in range(num_generate):\n        predictions = model(input_eval)\n        predictions = tensorflow.squeeze(predictions, 0)\n        predictions = predictions / temperature\n        predicted_id = tensorflow.random.categorical(predictions,      num_samples=1)[-1,0].numpy()\n        input_eval = tensorflow.expand_dims([predicted_id], 0)\n        text_generated.append(indices_char[predicted_id])\n    return (start_string + \'\'.join(text_generated))\nprint(gene

In [None]:
print(char_indices)

In [None]:
'''
model = Sequential()
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(len(chars), activation='softmax'))

optimizer = tensorflow.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)
'''

In [130]:
def create_model():
    model = Sequential()

    model.add(LSTM(len(chars)*4, input_shape=(maxlen, len(chars)), return_sequences=True))
    model.add(LSTM(128))
    model.add(BatchNormalization())
    model.add(Activation('selu'))

    model.add(Dense(len(chars)*4))
    model.add(Activation('selu'))

    model.add(Dense(len(chars)*4))
    model.add(BatchNormalization())
    model.add(Activation('selu'))

    model.add(Dense(len(chars), activation='softmax'))

    optimizer = tensorflow.keras.optimizers.RMSprop(learning_rate=0.001)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer)
    
    return model

In [147]:
model.load_weights('test_generation_2.h5') 

seed = 'A Victory for Biden, and a Bet on America’s Future'
n_chars = 120
sequence_length = 120
# generate 400 characters
generated = ""
for i in tqdm.tqdm(range(n_chars), "Generating text"):
    # make the input sequence
    X = np.zeros((1, sequence_length, vocab_size))
    for t, char in enumerate(seed):
        X[0, (sequence_length - len(seed)) + t, char_indices[char]] = 1
    # predict the next character
    predicted = model.predict(X, verbose=0)[0]
    # converting the vector to an integer
    next_index = np.argmax(predicted)
    # converting the integer to a character
    next_char = indices_char[next_index]
    # add the character to results
    generated += next_char
    # shift seed and the predicted character
    seed = seed[1:] + next_char

print("Seed:", seed)
print("Generated text:")
print(generated)

Generating text:   3%|██▏                                                              | 4/120 [00:00<00:03, 29.85it/s]

The IRS is a scam
he IRS is a scame
e IRS is a scame 
 IRS is a scame o
IRS is a scame or
RS is a scame or 
S is a scame or C


Generating text:   7%|████▎                                                            | 8/120 [00:00<00:03, 31.22it/s]

 is a scame or Ch
is a scame or Chi
s a scame or Chin
 a scame or China
a scame or China 


Generating text:  13%|████████▌                                                       | 16/120 [00:00<00:03, 32.12it/s]

 scame or China V
scame or China Vi
came or China Vir
ame or China Viru
me or China Virus
e or China Virus 
 or China Virus i


Generating text:  20%|████████████▊                                                   | 24/120 [00:00<00:03, 31.82it/s]

or China Virus in
r China Virus in 
 China Virus in A
China Virus in Am
hina Virus in Ame
ina Virus in Amer
na Virus in Ameri


Generating text:  23%|██████████████▉                                                 | 28/120 [00:00<00:02, 30.91it/s]

a Virus in Americ
 Virus in America
Virus in America 
irus in America a
rus in America an
us in America and


Generating text:  29%|██████████████████▋                                             | 35/120 [00:01<00:03, 27.50it/s]

s in America and 
 in America and I
in America and I 
n America and I w


Generating text:  34%|█████████████████████▊                                          | 41/120 [00:01<00:03, 25.38it/s]

 America and I wi
America and I wil
merica and I will
erica and I will 
rica and I will b


Generating text:  37%|███████████████████████▍                                        | 44/120 [00:01<00:03, 25.30it/s]

ica and I will be
ca and I will be 
a and I will be g
 and I will be gi
and I will be giv
nd I will be givi


Generating text:  42%|██████████████████████████▋                                     | 50/120 [00:01<00:02, 24.94it/s]

d I will be givin
 I will be giving
I will be giving 
 will be giving a
will be giving a 


Generating text:  47%|█████████████████████████████▊                                  | 56/120 [00:02<00:02, 24.76it/s]

ill be giving a s
ll be giving a st
l be giving a sto
 be giving a stor
be giving a story
e giving a story 


Generating text:  49%|███████████████████████████████▍                                | 59/120 [00:02<00:02, 22.26it/s]

 giving a story T
giving a story To
iving a story To 
ving a story To B


Generating text:  54%|██████████████████████████████████▋                             | 65/120 [00:02<00:02, 22.91it/s]

ing a story To Bu
ng a story To But
g a story To But 
 a story To But I
a story To But I 
 story To But I w


Generating text:  59%|█████████████████████████████████████▊                          | 71/120 [00:02<00:02, 24.15it/s]

story To But I wi
tory To But I wil
ory To But I will
ry To But I will 
y To But I will b
 To But I will be


Generating text:  64%|█████████████████████████████████████████                       | 77/120 [00:02<00:01, 24.24it/s]

To But I will be 
o But I will be a
 But I will be a 
But I will be a t
ut I will be a te


Generating text:  69%|████████████████████████████████████████████▎                   | 83/120 [00:03<00:01, 25.12it/s]

t I will be a ter
 I will be a terr
I will be a terri
 will be a terrif
will be a terrifi
ill be a terrific


Generating text:  74%|███████████████████████████████████████████████▍                | 89/120 [00:03<00:01, 25.75it/s]

ll be a terrific 
l be a terrific T
 be a terrific Th
be a terrific The
e a terrific They
 a terrific They 


Generating text:  79%|██████████████████████████████████████████████████▋             | 95/120 [00:03<00:00, 25.95it/s]

a terrific They w
 terrific They wi
terrific They wil
errific They will
rrific They will 
rific They will b


Generating text:  84%|█████████████████████████████████████████████████████          | 101/120 [00:03<00:00, 26.41it/s]

ific They will be
fic They will be 
ic They will be a
c They will be a 
 They will be a t
They will be a te


Generating text:  89%|████████████████████████████████████████████████████████▏      | 107/120 [00:04<00:00, 26.40it/s]

hey will be a ter
ey will be a terr
y will be a terri
 will be a terrif
will be a terrifi
ill be a terrific


Generating text:  94%|███████████████████████████████████████████████████████████▎   | 113/120 [00:04<00:00, 23.57it/s]

ll be a terrific 
l be a terrific T
 be a terrific Th
be a terrific The


Generating text:  97%|████████████████████████████████████████████████████████████▉  | 116/120 [00:04<00:00, 24.66it/s]

e a terrific They
 a terrific They 
a terrific They w
 terrific They wi
terrific They wil
errific They will


Generating text: 100%|███████████████████████████████████████████████████████████████| 120/120 [00:04<00:00, 25.96it/s]

rrific They will 
Seed: rific They will b
Generated text:
e or China Virus in America and I will be giving a story To But I will be a terrific They will be a terrific They will b





In [132]:
model = create_model()

                
checkpoint_path = r'training\check.ckpt'
checkpoint_directory = os.path.dirname(checkpoint_path)

callbacks = [
    EarlyStopping(monitor='loss', patience=2, verbose=0),
    ModelCheckpoint(checkpoint_directory, monitor='loss', verbose=1, save_best_only=True, mode='min')
]


In [133]:
model.load_weights('test_generation_2.h5')

In [134]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [135]:
import os

try:
    os.makedirs('Generated Text')
except:
    print('Already exists')

Already exists


In [None]:
tensorflow.tetaefa()

In [None]:
epochs = 20
batch_size = 64


for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1, callbacks=callbacks)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 0.7]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(120):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        with open(os.path.join('Generated Text', f'gen_text_batch 64 batch epoch 40'), 'a', encoding='utf-8') as f:
            f.write(f'"...Diversity:" {diversity} \n"...Generating with seed: " {sentence}\n ...Generated: {generated}\n\n')


        print("...Generated: ", generated)
        print()