In [None]:
%tensorflow_version 2.2
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

path_to_file = 'Xorq.txt'

#tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

#Use your own file 
#path_to_file = list("name".upload().keys())[0]

# Read, then decode for py2 compat.
text =  open(path_to_file, 'rb').read().decode(encoding='utf-8', errors='ignore')



`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `2.2`. This will be interpreted as: `2.x`.


TensorFlow is already loaded. Please restart the runtime to change versions.


In [None]:

#######Preprocessing/Encoding########

#encode each character in the text with an integer 
vocab = sorted(set(text))
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)} #give us an integer for every letter in vocab
idx2char = np.array(vocab) #turns initial vocabulary to an array to use index at which letter appears as a reverse mapping

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

# lets look at how part of our text is encoded
#print("Text:", text[:13])
#print("Encoded:", text_to_int(text[:13]))

def int_to_text(ints):
  try:
    ints = ints.numpy()  #in the event we pass different objects into here
  except:
    pass
  return ''.join(idx2char[ints])

#print(text_as_int[:30])
#print(int_to_text(text_as_int[:30]))

In [None]:
#############Create Training Examples############

#create a stream of characters
seq_length = 100  # length of sequence for a training example
examples_per_epoch = len(text)//(seq_length+1) #need 101 characters, bc we going to shift them all by one

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) #convert entire string dataset into characters (all characters in dataset object)

#The training example we will prepare will use a se1_length sequence as input and a seq_length sequence as the output where the sequence is the original sequence shifted by one to the right
#input: Hell | output: ello

sequences = char_dataset.batch(seq_length+1, drop_remainder=True) #drop remainder drops leftover that can't be batched

def split_input_target(chunk):  # for the example: hello
    input_text = chunk[:-1]  # hell
    target_text = chunk[1:]  # ello
    return input_text, target_text  # hell, ello

dataset = sequences.map(split_input_target)  # we use map to apply the above function to every entry

#visualize this
#for x, y in dataset.take(2):
#  print("\n\nEXAMPLE\n")
#  print("INPUT")
#  print(int_to_text(x))
#  print("\nOUTPUT")
#  print(int_to_text(y))
  
BATCH_SIZE = 64 #a batch will be 64 entries of 100 character sequences
VOCAB_SIZE = len(vocab)  # vocab is number of unique characters
EMBEDDING_DIM = 256 #How big we want every vector (representing words) to be 
RNN_UNITS = 1024

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) #switch around sequences so they aren't shown in proper order to prevent overfitting


In [None]:
#############Building the Model###########

#define a function that returns a built model. Right now we're passing the model batches of size 64 for training, but later we will save the model and pass it batches of 1 training sequence to make a prediction 
#Later we will rebuild the model with the same saved parameters, but with a different batch input size 
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]), #We leave this none because when we run this model later we will not know how long each sequence will be, though right now for training we are using 100 char long sequences
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True, #return the intermediate stage at every step, false tells us what the model found at the last time step
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size) #final layer has the amount of nodes= amount of chars in vocab--->Prob dist to enable the last layer as a predictive layer 
  ])
  return model

model = build_model(VOCAB_SIZE,EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           23552     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 92)            94300     
                                                                 
Total params: 5,364,828
Trainable params: 5,364,828
Non-trainable params: 0
_________________________________________________________________


In [None]:
############Create Loss Function############

#create our own loss function. This is because our model will output a (64, sequence_length, 65) shaped tensor that represents the prob dist of each character at each timestep for every sequence in the batch
#training model is fed batches of 64 sequences of length 100 characters 
#when we make predictions we pass it 1 entry with variable length. This is why we developed the build model function, to deal with different input shapes
#the final layer has an output of (batch_size, sequence_length, vocab_size)= (64, 100, 65)
#when dense layer is last layer, that means every prediction contains 65 numbers, each of which are probabilities of each char appearing respectively 

#using model before it's trained, with raandom weights and biases
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)  # ask our model for a prediction on our first batch of training data (64 entries)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")  # print out the output shape
  
# we can see that the predicition is an array of 64 arrays, one for each entry in the batch
#print(len(example_batch_predictions))
#print(example_batch_predictions)

# lets examine one prediction
pred = example_batch_predictions[0]
#print(len(pred))
#print(pred)
# notice this is a 2d array of length 100, where each interior array is the prediction for the next character at each time step
#i.e. for each of the 100 chars (each time step bc it's one char per timestep) it returns 65 probabilities

# and finally well look at a prediction at the first timestep
time_pred = pred[0]
#print(len(time_pred))
#print(time_pred)
# and of course its 65 values representing the probabillity of each character occuring next

# If we want to determine the predicted character we need to sample the output distribution (pick a value based on probabillity)
sampled_indices = tf.random.categorical(pred, num_samples=1)
#can't just take the max of the distribution, need to sample the dist instead, not always the highest prob

# now we can reshape that array and convert all the integers to numbers to see the actual characters
sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)

predicted_chars  # and this is what the model predicted for training sequence 1

def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)


(64, 100, 92) # (batch_size, sequence_length, vocab_size)


In [None]:
############Compile the Model############

model.compile(optimizer='adam', loss=loss)

In [None]:
############Checkpoints##################

#Allows us to load model from a checkpoint and continue trianing it
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
###########Training the Model#############

history = model.fit(data, epochs=3, callbacks=[checkpoint_callback])

###########Loading the Model##############

#Need to rebuild the model with a new batch size of 1
#Notice we are using our function that builds a model 

model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)

#Most recent checkpoint
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

#Load an intermediate checkpoint
#checkpoint_num = 10
#model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))
#model.build(tf.TensorShape([1, None]))

Epoch 1/3
Epoch 2/3
Epoch 3/3
