<a href="https://colab.research.google.com/github/jtao22/PythonAI/blob/main/NLP/TextGeneration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [71]:
#import
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os 
import numpy as np

In [72]:
#upload
path = tf.keras.utils.get_file('shakespeare.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [73]:
#read
text = open(path,'rb').read().decode(encoding='utf-8')

In [74]:
#encode
vocab = sorted(set(text))
#find unique characters
c2i = {u:i for i, u in enumerate(vocab)}
i2c = np.array(vocab)

def text2int(text):
  return np.array([c2i[c] for c in text])
def int2text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(i2c[ints])
inttext = text2int(text)

In [75]:
#input 42 char sequence and output is that sequence translated right by 1 char
chardata = tf.data.Dataset.from_tensor_slices(inttext)
sequences = chardata.batch(43, drop_remainder=True)

In [76]:
#create splitting function
def splitX(x):
  testX = x[:-1]
  testY = x[1:]
  return testX,testY

#split
dataset = sequences.map(splitX)

In [77]:
data = dataset.shuffle(10000).batch(64, drop_remainder=True)

In [78]:
#create arch building function
def make_arch(vocab_size, embedding_dim, units, batch_size):
  arch = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size,None]),
    tf.keras.layers.LSTM(units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return arch

arch = make_arch(len(vocab), 256, 1024, 64)
arch.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (64, None, 256)           16640     
_________________________________________________________________
lstm_7 (LSTM)                (64, None, 1024)          5246976   
_________________________________________________________________
dense_7 (Dense)              (64, None, 65)            66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [79]:
#create loss function
def loss(Y,logits):
  return tf.keras.losses.sparse_categorical_crossentropy(Y, logits, from_logits=True)

In [80]:
#compile arch
arch.compile(optimizer='adam',loss=loss)

In [81]:
#set up checkpoints 
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join('./training_checkpoints', "ckpt_{epoch}"), save_weights_only=True)

In [None]:
#train
hist = arch.fit(data, epochs=100, callbacks=[checkpoint_callback])

In [83]:
#rebuild model from checkpoint
arch = make_arch(len(vocab), 256, 1024, 1)

In [84]:
#load in previously trained weights
arch.load_weights(tf.train.latest_checkpoint('./training_checkpoints'))
arch.build(tf.TensorShape([1,None]))

In [85]:
#create text generating function
def generate(model, input, num): #num = number of characters to generate
  #vectorizing
  input_data = [c2i[s] for s in input]
  input_data = tf.expand_dims(input_data,0)

  result = []
  temp = 1.0 #temperature varies directly with how "surprising" the text is 
  model.reset_states()
  for i in range(num):
    predictions = model(input_data)
    predictions = tf.squeeze(predictions,0) #remove batch dim

    #use categorical distribution to predict character returned by model
    predictions = predictions/temp
    predicted_val = tf.random.categorical(predictions,num_samples=1)[-1,0].numpy()

    #make predicted character the next input
    input_data = tf.expand_dims([predicted_val],0)
    result.append(i2c[predicted_val])
  return input + ''.join(result)


In [86]:
inp = input("Type Starting string: ")
num = int(input("Type the number of characters to generate: "))
print(generate(arch,inp,num))

Type Starting string: romeo
Type the number of characters to generate: 800
romeo; or:
'Tis the moon eyes and weeps,
But NENIUS:
You have done! Come, son, how ikes, to what I saw, to make you fall? Tybalt s love?

SICINIUS:
What's him that I so? Well save thy not so train
I am too near as of your worship's points
As fond that place, it is enemies,
Our fiends.

HENRY BOLINGBROKE:
Mistake not what; that to smile with God about,
And say 'Widown you know me?

MERCUTIO:
Gony, come, you must know securely set his remedy pass.

ABHORSON:
Go in, Bereftu wilt have him come with thee:
Now incul, lest you be more.

BRUTUS:
There's never and women: 'bout we will call thee Clasting of his blood. The willon the world I loved and to him pleaseth.

Third Gentleman:
Who courches wer honour-flaw, I'll answer him
for their boar-siles account of your Lord George doubt.

YORK:
What, wilt t
