In [0]:
import numpy as np
from keras.models import Model
from keras.layers import Dense, Reshape, LSTM, Input, Dropout
from keras.optimizers import Adam
from keras import backend as K

In [0]:
file_name = "shakespeare.txt"

In [0]:
data = open(file_name,'r').read()
data = data.lower().strip()
chars = list(set(data))
vocab_size = len(chars)
print(len(data),vocab_size)

In [0]:
sorted_chars = sorted(chars)
char_to_ix = {ch:i for i,ch in enumerate(sorted_chars)}
ix_to_char = {i:ch for i,ch in enumerate(sorted_chars)}
print(char_to_ix)

In [0]:
X = []
Y = []
Tx = 40
stride = 3
for i in range(0, len(data)-Tx,stride):
  X.append(data[i:i+Tx])
  Y.append(data[i+Tx])
print(len(X))
print(len(Y))

In [0]:
# vectorization
m = len(X)
x = np.zeros((m,Tx,vocab_size), dtype=np.bool)
y = np.zeros((m,vocab_size),dtype = np.bool)

for i,sentence in enumerate(X):
  for j,char in enumerate(sentence):
    x[i,j,char_to_ix[char]] = 1
  y[i,char_to_ix[Y[i]]] = 1

print(x.shape)
print(y.shape)



In [0]:
n_a =256

In [0]:
def text_generation_model(vocab_size,n_a,Tx):
  X = Input(shape=(Tx,vocab_size))
  x = LSTM(units = n_a,return_sequences=True)(inputs=X)
  x = Dropout(0.2)(x)
  x = LSTM(units = n_a)(inputs=x)
  x = Dropout(0.2)(x)
  out = Dense(vocab_size,activation='softmax')(x)

  model = Model(X,out)
  return model


In [0]:
text_model = text_generation_model(vocab_size, n_a , Tx)

In [0]:
text_model.summary()

In [0]:
optimizer = Adam(learning_rate=0.01)
text_model.compile(optimizer, loss = 'categorical_crossentropy',metrics=["accuracy"])

In [0]:
text_model.fit(x,y,batch_size=256,epochs=100)

In [0]:
def sample(preds, temparature=1.0):
  preds = np.asarray(preds).astype('float32')
  preds = np.log(preds)/temparature
  exp_preds = np.exp(preds)
  preds = exp_preds/np.sum(exp_preds)
  probs = np.random.multinomial(1,preds,1)
  out = np.random.choice(list(range(vocab_size)),p=probs.ravel())
  return out

In [0]:
idx = np.random.randint(0,len(X)-1)
sentence = X[idx]
generated = ''
generated += sentence

for i in range(100):
  x_test = np.zeros((1,Tx,vocab_size))
  for t,char in enumerate(sentence):
    if char!='0':
      x_test[0,t,char_to_ix[char]] = 1
    preds = text_model.predict(x_test)
    preds = np.asarray(preds).astype('float32')
    #preds = np.argmax(preds)
    preds = np.random.choice(list(range(vocab_size)),p=preds.ravel())
    next_char = ix_to_char[preds]
    generated += next_char
    sentence = sentence[1:]+next_char

print(generated)



