In [1]:
# Import TensorFlow and other libraries
import tensorflow as tf

import numpy as np
import os
import time

In [2]:
#filePath = '/content/drive/MyDrive/Rush Album Lyrics/Permanent Waves/Permanent Waves All Songs.txt' # Just Freewill
#filePath = '/content/drive/MyDrive/Rush Album Lyrics/All Lyrics.txt' # All lyrics
filePath = '/content/drive/MyDrive/Rush Album Lyrics/All Lyrics For Real.txt' # All lyrics

In [3]:
# Read, then decode for py2 compat.
text = open(filePath, 'rb').read().decode(encoding='cp1252')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 149243 characters


In [4]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

75 unique characters


In [5]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')

ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

ids = ids_from_chars(chars)

chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

chars = chars_from_ids(ids)

tf.strings.reduce_join(chars, axis=-1).numpy()

def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [6]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

seq_length = 100

sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

split_input_target(list("Tensorflow"))

dataset = sequences.map(split_input_target)

for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Y
e
a
h
,
 
o
h
 
y
tf.Tensor(
[b'Y' b'e' b'a' b'h' b',' b' ' b'o' b'h' b' ' b'y' b'e' b'a' b'h' b'!'
 b'\r' b'\n' b'O' b'o' b'h' b',' b' ' b's' b'a' b'i' b'd' b' ' b'I' b','
 b' ' b'I' b"'" b'm' b' ' b'c' b'o' b'm' b'i' b'n' b"'" b' ' b'o' b'u'
 b't' b' ' b't' b'o' b' ' b'g' b'e' b't' b' ' b'y' b'o' b'u' b'\r' b'\n'
 b'O' b'o' b'h' b',' b' ' b's' b'i' b't' b' ' b'd' b'o' b'w' b'n' b','
 b' ' b'I' b"'" b'm' b' ' b'c' b'o' b'm' b'i' b'n' b"'" b' ' b'o' b'u'
 b't' b' ' b't' b'o' b' ' b'f' b'i' b'n' b'd' b' ' b'y' b'o' b'u' b'\r'
 b'\n' b'O' b'o'], shape=(101,), dtype=string)
tf.Tensor(
[b'Y' b'e' b'a' b'h' b',' b' ' b'o' b'h' b' ' b'y' b'e' b'a' b'h' b'!'
 b'\r' b'\n' b'O' b'o' b'h' b',' b' ' b's' b'a' b'i' b'd' b' ' b'I' b','
 b' ' b'I' b"'" b'm' b' ' b'c' b'o' b'm' b'i' b'n' b"'" b' ' b'o' b'u'
 b't' b' ' b't' b'o' b' ' b'g' b'e' b't' b' ' b'y' b'o' b'u' b'\r' b'\n'
 b'O' b'o' b'h' b',' b' ' b's' b'i' b't' b' ' b'd' b'o' b'w' b'n' b','
 b' ' b'I' b"'" b'm' b' ' b'c' b'o' b'm' b'i' b'n'

In [7]:
# Building the Training Set
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [8]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [9]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [10]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [11]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

model.summary()

(64, 100, 76) # (batch_size, sequence_length, vocab_size)
Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  19456     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  77900     
                                                                 
Total params: 4,035,660
Trainable params: 4,035,660
Non-trainable params: 0
_________________________________________________________________


In [12]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b"heavy\r\nAnd I just...I just don't understand\r\nWhy must my crew desert me?\r\nWhen I need...I need a gui"

Next Char Predictions:
 b'\nO??\'li"/bwMGiO-ZPEHez1(z(\rOOV)i":W/wYc4QW2]LMjHF\rAxPJMF2l3i)S.4\nBfzh-X)D4.WiI[ynbU[talpFzI.faA!wXju'


In [13]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)
tf.exp(example_batch_mean_loss).numpy()
model.compile(optimizer='adam', loss=loss)

Prediction shape:  (64, 100, 76)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.3304205, shape=(), dtype=float32)


In [14]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [15]:
EPOCHS = 20
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [17]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [18]:
start = time.time()
states = None
next_char = tf.constant(['Salesman'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Salesmand me sone
The goozest frize
Doess ape too liffer
And like to ros tom of wind man
Salving things the ske
The icertems ame wanter
I well yes tomy about the faue

Haught and dagrentule?
Nomobring that's the blurse
Strimitions
Con't remoment soint hed the swakes the finare
Ind vistion or a know plowing brod good
And to yound to key
At the toung that sobety
Shates and living eppatade
Nowhite starts on the bor hal sex, vit by his banyther
One don't leave to devears of the toperga

Housing adain that pass premaliots of the wildint you
Ons taves
Mymortars shait of faith
Wo to the aun and firad explaze

We ofmahs mere benowe
That's fight a flow

The but and they remort some around
You was a need
Ter consies, fen we as the warful too mus
Res there's not away

Driven of sermoring
Miding class
Exer fays
Pof it our gespact

I wat a things hera..

Face and time on the downeal of parston
Peews of purning dangs
With the sun of the Near

I'll still "Tr

In [19]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [24]:
states = None
next_char = tf.constant(['In the constellation of Orion'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

In the constellation of Orion rise
Dream is offor hem sandss fass
All burful and frezess are only
But in the city back of wind
I juns a mander lives got a freeent that down
Well, you can shime out it and down

ISbide's no seads of the light

Sometimes wallitize
I'm hass so many a feel
We move me ours day the for chile in the wind blood
Load new winning bazy
Plause of poldstiglls

So mpant the pares

[Chorust is so turn
Thy potrous with shat a flace his pasin away
Havitions are now fouce that's sather
Flows a finst appansadity
Alaunst the sight, I said head
Whee the reachant mitice
Gight's gopre a fight
At seess to less to the fun time
Is their days to relear
Is the whoods tha plail
Her thomeds from the air
I sun to the fat of thim me is thingel
We feels than storay spind is head, bood-by night
The dirf of circlet you thains
I'm night this was and moanden dancer
Excin alone soncess of they tod others
I son cat me sook appo ander
I fink will helo?
Oo le

In [25]:
states = None
next_char = tf.constant(['Whammy'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

Whammy

I'm roining to flaghing controts

An way fle weman tonger
It's home tolay, what' the Maddles
That is a dain that's joarned undery
You know peapso plistinc
Back we are blyone
On the speer sould of life is need
We're of the seeser kiver
For the gets a reals
That soung so know, shent
Withos the copiraines
Whit so landing what they sand they
Roulinger to yeah
My said it soue
The crocks ban the froting dicirstand
I wann through to be know 
Tuth -ome beands toudd
A sendan?
Sometimes residies reasuins
To find, rus, hat the in
To over the wire
Those the scy veings
But sown the soads you benue
The proadless mackless alanes
The tireless that capre againstod
To my need it's fool to foo sur
With it eness toores
To deef is a mist road
In the flowing for the back
Well hot my head at all oh

What it is a stire
Don't wance unlisions they much, nourds
In the fatter of me
Citer toos After of they all too mover face
And the stakss of the tandes fromed
For th

In [27]:
states = None
next_char = tf.constant(['I can feel it in my head'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

I can feel it in my head
Guting that still it again the scomptime
Mysterion wenaille with you rade

Just of the fight?
Carred the suck is the to excool
That's mation
Justivere are by thought on a wirlowor iseag
Would be winds the sture
Its a for so uland alove?
Sole are burn touch of the sinnals of sate
By wear for the aif

What I feal, believe in my surmor
Rearons of the way bottle hise
Justs rolling on the secory of sime
Waselvilled of imentainstand in mision
That ingained to the sthees
Tight away makes af heave for the Bright
Whilain the world wenes
If the every prefce gave
Bubed af anning bagn of the wirlin'
I sal must leff thered wis ows
Whe halalipe of rifirs
And the dark of clased
It just to clo

I've hid mandin...

Everywayts, chost is try to ungin
Over is between down the bloods
More on my whey we are amagit

To that sworld been...

We ad I gut burting fate

Now feed and for s dunt-resples every
And ralitiveeds
I'm not we'dly look abone t

In [28]:
states = None
next_char = tf.constant(['Sean Smith is'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

Sean Smith is early meaching ron around in your streed

Don't day a fun changer gial fitrind
The out han's endays upen all cave to slay
Ragain it a mander flacks

Tryin'd was it wime usways.
Or inmickoon one dirdon on the arget?
A sun find out to Becise
Momeny pall me firtervers, hand
A perzence things I'd readly for a world on the wellw
Fres to as in the rain
A feat of the bester somether
Whome it what you're babring
To be afrood in just immire

To recience the wine my fate is stleep
In the stars of hand
In the s wind empor the spreams of leaving time
Reagoned chuns
Walk against us deseretion
Looking out it's that's field here

Alt bugns the sulder live

Some are the tur
That clEader to restectrounce
I wheels strees abait the fornersla
Cut's somethen froming choods at sole can ranare
A gring of me conico
Becouse to mantel phouss
Just an a macing oors paint his is gos from plases

Same alredont the notory prizing of the surmunger consusions
Ons the Adg

In [32]:
states = None
next_char = tf.constant(['Brandon is'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

Brandon is the Time
Mopented and fained
In the sperest bock that sein in
The comestation
Make a smy soulfrions

Can't hold how at mavell no shor
To the you ne evered tade
To le knead-of high
Head, her hid is room and fall
Bur the theeding on the ipeenst tlabl

I could be something morest on a world thing
It's the spansed sways

Now it should
Rathered of icmored
Just against the wivet wish so eare your fees
Looking for ever cons emotion
Hiswering on a showing life
Drifting down, fol Oh dog yearly's notche
I would wands han blind
Oh, me lot go...
It gets to explomes in our spreezs
I sun a with the tames
Fore and here span around wheil disibstands
They end fine to arm gytom
Or face a ginace

It's just tokelend, behind in ot...Lofting evert of the given
For the worad of aggee
His miribres myst fight
In it you get esisune
Too make the blouds thaveI never side

Now the fremont criss of feet

Wearshing nobody rensevies
Allied these sands sears
Row the se