In [1]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from sklearn.model_selection import train_test_split


Using TensorFlow backend.


import pandas as pd

df = pd.read_csv("cs-training.csv")

df = df.drop('Unnamed: 0', 1)

df = df.dropna(axis=0, how = "any" )

data = df.as_matrix()


labels = []
for i in range(data.shape[0]):
    if data[i, 0] == 1.0:
        labels.append([1.0, 0.0])
    else:
        labels.append([0.0, 1.0])

feature = data[:, 1:]
label = np.vstack(np.array(i, dtype = np.float32) for i in labels)

x = feature 
y = label

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.70, random_state=1996)


model = Sequential()

label.shape

model.add(Dense(units=64, activation='relu', input_dim=10))
model.add(Dense(units=2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])


model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True))

model.fit(x_train, y_train, epochs=50, batch_size=50)


loss_and_metrics = model.evaluate(x_test, y_test, batch_size=20)


In [2]:
filename = "wonderland_short.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()


In [3]:
# create mapping of unique chars to integers

chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))


In [4]:
#Summarize the dataset

n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)


Total Characters:  22471
Total Vocab:  46


In [5]:
# prepare the dataset of input to output pairs encoded as integers

seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  22371


In [6]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

# normalize: scale to the range 0-to-1to make patterns easier to learn by the LSTM 
X = X / float(n_vocab)

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

Define a single hidden LSTM layer with 256 memory units. The network uses dropout with a probability of 20. The output layer is a Dense layer using the softmax activation function to output a probability prediction for each of the 47 characters between 0 and 1.

In [7]:
#Definethe LSTM model

model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')


In [8]:
# define the checkpoint

filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]


In [9]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 00001: loss improved from inf to 3.07402, saving model to weights-improvement-01-3.0740.hdf5
Epoch 2/20
Epoch 00002: loss improved from 3.07402 to 3.02018, saving model to weights-improvement-02-3.0202.hdf5
Epoch 3/20
Epoch 00003: loss improved from 3.02018 to 3.00756, saving model to weights-improvement-03-3.0076.hdf5
Epoch 4/20
Epoch 00004: loss improved from 3.00756 to 2.95633, saving model to weights-improvement-04-2.9563.hdf5
Epoch 5/20
Epoch 00005: loss improved from 2.95633 to 2.88437, saving model to weights-improvement-05-2.8844.hdf5
Epoch 6/20
Epoch 00006: loss improved from 2.88437 to 2.84418, saving model to weights-improvement-06-2.8442.hdf5
Epoch 7/20
Epoch 00007: loss improved from 2.84418 to 2.80817, saving model to weights-improvement-07-2.8082.hdf5
Epoch 8/20
Epoch 00008: loss improved from 2.80817 to 2.78594, saving model to weights-improvement-08-2.7859.hdf5
Epoch 9/20
Epoch 00009: loss improved from 2.78594 to 2.76536, saving model to weights-impro

<keras.callbacks.History at 0x7ff74e548d30>

In [11]:
# load the network weights

filename = "weights-improvement-20-2.5680.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [12]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [17]:
#pick a random seed 
import sys 

start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

# generate characters

for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
    
print ("\nDone.")

Seed:
" atch
out of its waistcoat-pocket, and looked at it, and then hurried on,
alice started to her feet,  "
and she was to the wabt the was to the wabt the was to the wabt the was to the wabt the was to the waat the was to the waat the was to the waat the was to the taat the was to the waat the was to the waat the was to the tat io the waot aa ine too to the wao to the wabt the was to the wabt the was to the wabt the was to the waat the was to the waat the was to the waat the was to the taat the was to the waat the was to the waat the was to the tat io the waot aa ine too to the wao to the wabt the was to the wabt the was to the wabt the was to the waat the was to the waat the was to the waat the was to the taat the was to the waat the was to the waat the was to the tat io the waot aa ine too to the wao to the wabt the was to the wabt the was to the wabt the was to the waat the was to the waat the was to the waat the was to the taat the was to the waat the was to the waat the was to