<a href="https://colab.research.google.com/github/miraqulass/MachineLearning/blob/MLPractices/PlayGenerator(RNN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras
from keras.preprocessing import sequence
import tensorflow as tf
import os
import numpy as np

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step


In [None]:
# Loading Your Own Data

from google.colab import files
path_to_file = list(files.upload().keys())[0]

In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

print('Length of text: {} characters'.format(len(text)))
# Length of text is the number of characters in it

Length of text: 1115394 characters


In [4]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
# Encoding

vocab = sorted(set(text))

char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [6]:
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [7]:
# int to text

def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


In [8]:
# Creating training examples

seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create a stream of characters
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [9]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [10]:
def split_input_target(chunk):    # for instance: hello
  input_text = chunk[:-1]         # hell
  target_text = chunk[1:]         # ello
  return input_text, target_text  # hell, ello

dataset = sequences.map(split_input_target)   # Map is used to apply the function to every entry

In [11]:
for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))



EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


In [12]:
# Training batches

BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNITS = 1024

BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [14]:
# Building the Model

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    # Explicitly define the input with batch size
    inputs = tf.keras.Input(batch_shape=[batch_size, None])

    x = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)
    x = tf.keras.layers.LSTM(rnn_units, return_sequences=True,
                             stateful=True, recurrent_initializer='glorot_uniform')(x)
    outputs = tf.keras.layers.Dense(vocab_size)(x)

    model = tf.keras.Model(inputs, outputs)
    return model

model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

In [15]:
# Creating Loss Function

for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [16]:
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[-1.57356705e-03  3.75959533e-03 -2.49928888e-03 ... -3.26104090e-03
    4.52553900e-03 -2.46310467e-03]
  [-2.58434424e-03  6.55509857e-03 -2.20725173e-03 ... -6.13667443e-03
    4.87516960e-03 -3.81301111e-03]
  [-3.23970173e-03  2.44398951e-03  3.04471123e-05 ... -7.79366866e-03
    7.45243626e-03 -3.43194744e-03]
  ...
  [-5.80912735e-03  8.69219564e-03 -2.79698358e-03 ... -8.44453834e-03
    1.09733762e-02 -3.85400397e-03]
  [-6.29300484e-03  4.70397528e-03 -8.24448070e-04 ... -9.36576724e-03
    1.20593077e-02 -3.40120052e-03]
  [-3.11532058e-03  5.26859844e-03 -6.19381433e-03 ... -7.72590865e-04
    9.09694098e-03  3.01853870e-03]]

 [[ 1.64903048e-03 -1.48612584e-04  2.66639888e-03 ...  1.54478941e-03
    3.77533026e-03 -1.34421128e-03]
  [ 1.34711841e-03 -1.23614992e-03 -3.32090742e-04 ...  2.65504629e-03
    3.84844164e-03 -4.90821106e-03]
  [-1.89970597e-04 -2.97033694e-03  1.66715623e-03 ... -2.00002873e-03
    6.57168683e-03 -4.01853118e-03]
  ...
  [ 1.913

In [17]:
pred = example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[-1.5735670e-03  3.7595953e-03 -2.4992889e-03 ... -3.2610409e-03
   4.5255390e-03 -2.4631047e-03]
 [-2.5843442e-03  6.5550986e-03 -2.2072517e-03 ... -6.1366744e-03
   4.8751696e-03 -3.8130111e-03]
 [-3.2397017e-03  2.4439895e-03  3.0447112e-05 ... -7.7936687e-03
   7.4524363e-03 -3.4319474e-03]
 ...
 [-5.8091274e-03  8.6921956e-03 -2.7969836e-03 ... -8.4445383e-03
   1.0973376e-02 -3.8540040e-03]
 [-6.2930048e-03  4.7039753e-03 -8.2444807e-04 ... -9.3657672e-03
   1.2059308e-02 -3.4012005e-03]
 [-3.1153206e-03  5.2685984e-03 -6.1938143e-03 ... -7.7259087e-04
   9.0969410e-03  3.0185387e-03]], shape=(100, 65), dtype=float32)


In [18]:
time_pred = pred[0]
print(len(time_pred))
print(time_pred)

65
tf.Tensor(
[-0.00157357  0.0037596  -0.00249929  0.00201318 -0.00077528 -0.00068274
 -0.00682179 -0.00540251  0.00309609  0.00225127 -0.00046036 -0.0056899
  0.00052539 -0.00097078 -0.00657559 -0.00646833  0.00533474  0.00217075
 -0.00521216 -0.00429932  0.00153706 -0.00012905  0.00426194  0.00199585
 -0.00234965 -0.00130541 -0.0018193   0.0016211  -0.00435775  0.00029853
 -0.00100767  0.00450491  0.00014303  0.00028634  0.00690974 -0.00078024
 -0.00107302 -0.00687594 -0.00164249 -0.00098842  0.00122555  0.00358433
 -0.00264983 -0.00799769  0.00762784 -0.00150546  0.00313985 -0.00197665
 -0.00325056  0.00883266  0.00482625 -0.00235795  0.00034117 -0.00215259
  0.0035442   0.00204182  0.00299607 -0.00290806 -0.00017249  0.00333117
  0.00438621 -0.00161199 -0.00326104  0.00452554 -0.0024631 ], shape=(65,), dtype=float32)


In [20]:
# Determining Predicted Characters

sampled_indices = tf.random.categorical(pred, num_samples=1)

sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)

predicted_chars

'\ne:EBg\naT3N;\n!?F;ZOrf;kwTUIieM-Im ?PoS\nUh$CrG\n;dk!v:gP$pgmcSvkY-LTrKwkgEsYTz kd?p-Vg!GCQCALnLNfL3q:n'

In [21]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [22]:
model.compile(optimizer="adam", loss=loss)

In [24]:
# Creating Checkpoints

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_prefix,
    save_weights_only = True
)

In [25]:
# Model Training

history = model.fit(data, epochs=40, callbacks=[checkpoint_callback])

Epoch 1/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 77ms/step - loss: 2.8687
Epoch 2/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 81ms/step - loss: 1.8477
Epoch 3/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 81ms/step - loss: 1.5958
Epoch 4/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 78ms/step - loss: 1.4718
Epoch 5/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 79ms/step - loss: 1.3997
Epoch 6/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 78ms/step - loss: 1.3453
Epoch 7/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 81ms/step - loss: 1.3037
Epoch 8/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 79ms/step - loss: 1.2661
Epoch 9/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 80ms/step - loss: 1.2315
Epoch 10/40
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21

In [26]:
# Loading Model

model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)