In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random

import tensorflow
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os

In [7]:
print(tensorflow.__version__)

2.3.0


In [8]:
import tqdm

In [9]:
with open('trump_tweets.txt', 'r', encoding='utf-8') as f:
    text = f.read()


In [10]:
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 100
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.float32)
y = np.zeros((len(sentences), len(chars)), dtype=np.float32)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Corpus length: 583429
Total chars: 54
Number of sequences: 194443


In [11]:
#import tensorflow.python.ops.numpy_ops.np_config
#np_config.enable_numpy_behavior()

'''
model.load_weights('test_generation_2.h5') 
model.build(tensorflow.TensorShape([1, None]))
#print(model.summary())
def generate_text(model, start_string):
    print('Generating with seed: "' + start_string + '"')
  
    num_generate = 1000
    input_eval = [char_indices[s] for s in start_string]
    input_eval = tensorflow.expand_dims(input_eval, 0)
    input_eval = tensorflow.reshape(input_eval, [1, 1, 11])
    print(input_eval)
    text_generated = []
    temperature = 1.0
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tensorflow.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tensorflow.random.categorical(predictions,      num_samples=1)[-1,0].numpy()
        input_eval = tensorflow.expand_dims([predicted_id], 0)
        text_generated.append(indices_char[predicted_id])
    return (start_string + ''.join(text_generated))
print(generate_text(model, start_string="joy of gods"))
'''

'\nmodel.load_weights(\'test_generation_2.h5\') \nmodel.build(tensorflow.TensorShape([1, None]))\n#print(model.summary())\ndef generate_text(model, start_string):\n    print(\'Generating with seed: "\' + start_string + \'"\')\n  \n    num_generate = 1000\n    input_eval = [char_indices[s] for s in start_string]\n    input_eval = tensorflow.expand_dims(input_eval, 0)\n    input_eval = tensorflow.reshape(input_eval, [1, 1, 11])\n    print(input_eval)\n    text_generated = []\n    temperature = 1.0\n    model.reset_states()\n    for i in range(num_generate):\n        predictions = model(input_eval)\n        predictions = tensorflow.squeeze(predictions, 0)\n        predictions = predictions / temperature\n        predicted_id = tensorflow.random.categorical(predictions,      num_samples=1)[-1,0].numpy()\n        input_eval = tensorflow.expand_dims([predicted_id], 0)\n        text_generated.append(indices_char[predicted_id])\n    return (start_string + \'\'.join(text_generated))\nprint(gene

In [12]:
print(char_indices)

{' ': 0, "'": 1, 'A': 2, 'B': 3, 'C': 4, 'D': 5, 'E': 6, 'F': 7, 'G': 8, 'H': 9, 'I': 10, 'J': 11, 'K': 12, 'L': 13, 'M': 14, 'N': 15, 'O': 16, 'P': 17, 'Q': 18, 'R': 19, 'S': 20, 'T': 21, 'U': 22, 'V': 23, 'W': 24, 'X': 25, 'Y': 26, 'Z': 27, 'a': 28, 'b': 29, 'c': 30, 'd': 31, 'e': 32, 'f': 33, 'g': 34, 'h': 35, 'i': 36, 'j': 37, 'k': 38, 'l': 39, 'm': 40, 'n': 41, 'o': 42, 'p': 43, 'q': 44, 'r': 45, 's': 46, 't': 47, 'u': 48, 'v': 49, 'w': 50, 'x': 51, 'y': 52, 'z': 53}


In [13]:
'''
model = Sequential()
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(len(chars), activation='softmax'))

optimizer = tensorflow.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)
'''

'\nmodel = Sequential()\nmodel.add(LSTM(256, return_sequences=True))\nmodel.add(Dropout(0.2))\nmodel.add(LSTM(256))\nmodel.add(Dropout(0.2))\nmodel.add(Dense(len(chars), activation=\'softmax\'))\n\noptimizer = tensorflow.keras.optimizers.RMSprop(learning_rate=0.01)\nmodel.compile(loss="categorical_crossentropy", optimizer=optimizer)\n'

In [14]:
def create_model():
    model = Sequential()

    model.add(LSTM(len(chars)*4, input_shape=(maxlen, len(chars)), return_sequences=True))
    model.add(LSTM(128))
    model.add(BatchNormalization())
    model.add(Activation('selu'))

    model.add(Dense(len(chars)*4))
    model.add(Activation('selu'))

    model.add(Dense(len(chars)*4))
    model.add(BatchNormalization())
    model.add(Activation('selu'))

    model.add(Dense(len(chars), activation='softmax'))

    optimizer = tensorflow.keras.optimizers.RMSprop(learning_rate=0.001)
    model.compile(loss="categorical_crossentropy", optimizer=optimizer)
    
    return model

In [24]:
model.load_weights('test_generation_2.h5') 

seed = 'A Victory for Biden and a Bet on Americas Future'
seed_new = seed
n_chars = 120
sequence_length = 120
# generate 400 characters
generated = ""
for i in tqdm.tqdm(range(n_chars), "Generating text"):
    # make the input sequence
    X = np.zeros((1, sequence_length, len(chars)))
    for t, char in enumerate(seed):
        X[0, (sequence_length - len(seed)) + t, char_indices[char]] = 1
    # predict the next character
    predicted = model.predict(X, verbose=0)[0]
    # converting the vector to an integer
    next_index = np.argmax(predicted)
    # converting the integer to a character
    next_char = indices_char[next_index]
    # add the character to results
    generated += next_char
    # shift seed and the predicted character
    seed = seed[1:] + next_char

print("Seed:", seed_new)
print("Generated text:")
print(generated)

Generating text: 100%|███████████████████████████████████████████████████████████████| 120/120 [00:03<00:00, 37.58it/s]

Seed: A Victory for Biden and a Bet on Americas Future
Generated text:
 I am all over to resulf the USA is Strong on Crime Couch Schumer and State are not for the White House and the Washingt





In [16]:
model = create_model()

                
checkpoint_path = r'training\check.ckpt'
checkpoint_directory = os.path.dirname(checkpoint_path)

callbacks = [
    EarlyStopping(monitor='loss', patience=2, verbose=0),
    ModelCheckpoint(checkpoint_directory, monitor='loss', verbose=1, save_best_only=True, mode='min')
]


In [None]:
model.load_weights('test_generation_2.h5')

In [17]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import os

try:
    os.makedirs('Generated Text')
except:
    print('Already exists')

In [None]:
tensorflow.tetaefa()

In [None]:
epochs = 20
batch_size = 64


for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1, callbacks=callbacks)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 0.7]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(120):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        with open(os.path.join('Generated Text', f'gen_text_batch 64 batch epoch 40'), 'a', encoding='utf-8') as f:
            f.write(f'"...Diversity:" {diversity} \n"...Generating with seed: " {sentence}\n ...Generated: {generated}\n\n')


        print("...Generated: ", generated)
        print()