In [2]:
import keras
import numpy as np
from keras import layers

Using TensorFlow backend.


##### Preapring Data

In [3]:
#loading data from an SCP file
file = "Archive/MASTERLIST.txt"
raw_text = open(file, 'r', encoding='utf-8').read()

reject_chars = ['€','é','î','³','•','▸','◂','|','°','º','»','«','…','>','<','—','‘','’','“','”','–','~','^', '$', '+']
raw_text = raw_text.lower()

for i in reject_chars:
    raw_text = raw_text.replace(i,'')

text = raw_text.replace('\xa0',' ')
    
# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 172551
Unique characters: 58
Vectorization...


##### Model Structure

In [4]:
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [5]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

##### Training the Model

In [6]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

epoch 1
Instructions for updating:
Use tf.cast instead.
Epoch 1/1

##### Generating "SCP Text" :^)

In [15]:
#opening weights file
json_file = open('LSTM_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("LSTM_weights.h5")

In [20]:
del(string_mapping)
del(full_string)

string_mapping = X[0]
full_string = [n_to_char[value] for value in string_mapping]

In [21]:
#generate characters
for i in range(400):
    x = np.reshape(string_mapping, (1, len(string_mapping), 1))
    x = x / float(len(chars))
    
    #pred_index = np.argmax(loaded_model.predict(x, verbose=0))
    predicted = loaded_model.predict(x,verbose=0)
    pred_index2 = np.random.choice(len(predicted), p=predicted)
    seq = [n_to_char[value] for value in string_mapping]
    full_string.append(n_to_char[pred_index2])

    string_mapping.append(pred_index2)
    string_mapping = string_mapping[1:len(string_mapping)]

ValueError: Error when checking input: expected lstm_1_input to have shape (100, 1) but got array with shape (101, 1)

In [9]:
#combining text
txt = ""
for char in full_string:
    txt = txt+char

print(txt)


rating: +1187+x


scp-002 in its containment area


item #: scp-002
object class: euclid
special containment procedures: scp-033 is to be centetion of the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing the sepeing 
