Import packages

In [1]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import os
import time
from IPython.display import Image

Paths for each comedian source corpus

In [2]:
biburr_path = 'ComedyText/BBText.txt'
rpryor_path = 'ComedyText/RPText.txt'
ajesel_path = 'ComedyText/AJText.txt'

allcom_path = 'ComedyText/AllComText.txt'

### Bill Burr Training

In [4]:
text = open(biburr_path, 'rb').read().decode(encoding = "utf-8")
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 170333 characters


Snippet of text:

In [5]:
print(text[:250])

Bill Burr

Thank you. Thank you, sir. How are ya? How’s it going? All right. [Cheers and applause] All right, all right, all right, all right. All right, everybody settle down. I wanna get a gun. I do. I really do. I never had that feeling before til


Get unique characters

In [10]:
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

85 unique characters


Map characters to indices

In [9]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

Example of character to index mapping:

In [13]:

print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:30]), text_as_int[:30]))

'Bill Burr\n\nThank you. Thank yo' ---- characters mapped to int ---- > [26 59 62 62  1 26 71 68 68  0  0 43 58 51 64 61  1 75 65 71 11  1 43 58
 51 64 61  1 75 65]


In [15]:
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//seq_length

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(10):
    print(idx2char[i.numpy()])

B
i
l
l
 
B
u
r
r




In [16]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'Bill Burr\n\nThank you. Thank you, sir. How are ya? How’s it going? All right. [Cheers and applause] Al'
'l right, all right, all right, all right. All right, everybody settle down. I wanna get a gun. I do. '
'I really do. I never had that feeling before till I moved out to Los Angeles. This city just messes w'
'ith your mind, you know? It’s overpopulated, technically doesn’t have a water supply. Right? The doll'
'ar’s crashing. Shit keeps you up at night. You’re just thinking… “What am I gonna do when the zombies'


In [17]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [18]:
for input_example, target_example in  dataset.take(1):
    print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'Bill Burr\n\nThank you. Thank you, sir. How are ya? How’s it going? All right. [Cheers and applause] A'
Target data: 'ill Burr\n\nThank you. Thank you, sir. How are ya? How’s it going? All right. [Cheers and applause] Al'


In [19]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step    0
  input: 26 ('B')
  expected output: 59 ('i')
Step    1
  input: 59 ('i')
  expected output: 62 ('l')
Step    2
  input: 62 ('l')
  expected output: 62 ('l')
Step    3
  input: 62 ('l')
  expected output: 1 (' ')
Step    4
  input: 1 (' ')
  expected output: 26 ('B')


In [20]:
# Batch size (number of 100 word sequences to train on)
BATCH_SIZE = 64

# How many individual training steps per samples (steps)
steps_per_epoch = examples_per_epoch//BATCH_SIZE

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [21]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [22]:
if tf.test.is_gpu_available():
    rnn = tf.keras.layers.CuDNNGRU
    rnn2 = tf.keras.layers.CuDNNGRU
else:
    import functools
    rnn = functools.partial(
    tf.keras.layers.GRU, recurrent_activation='sigmoid')

In [23]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, 
                              batch_input_shape=[batch_size, None]),
    rnn(rnn_units,
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        stateful=True),

    tf.keras.layers.Dense(vocab_size)
  ])
    return model

Create (and label) model for Bill Burr source text

In [24]:
biburr_model = build_model(
  vocab_size = len(vocab), 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=BATCH_SIZE)

In [25]:
for input_example_batch, target_example_batch in dataset.take(1): 
    example_batch_predictions = biburr_model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 85) # (batch_size, sequence_length, vocab_size)


In [26]:
biburr_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           21760     
_________________________________________________________________
cu_dnngru (CuDNNGRU)         (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 85)            87125     
Total params: 4,047,189
Trainable params: 4,047,189
Non-trainable params: 0
_________________________________________________________________


In [27]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)") 
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 85)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.443308


In [28]:
biburr_model.compile(
    optimizer = tf.train.AdamOptimizer(),
    loss = loss)

Save checkpoints (i.e. weights) so generative model can be deployed w/o retraining

In [30]:
# Directory where the checkpoints will be saved
checkpoint_dir = './biburr_training'

# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [32]:
EPOCHS = 32
history = biburr_model.fit(dataset.repeat(), epochs=EPOCHS, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint_callback])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


In [33]:
tf.train.latest_checkpoint(checkpoint_dir)

'./biburr_training/ckpt_32'

In [34]:
biburr_model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

biburr_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

biburr_model.build(tf.TensorShape([1, None]))

In [35]:
biburr_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            21760     
_________________________________________________________________
cu_dnngru_1 (CuDNNGRU)       (1, None, 1024)           3938304   
_________________________________________________________________
dense_1 (Dense)              (1, None, 85)             87125     
Total params: 4,047,189
Trainable params: 4,047,189
Non-trainable params: 0
_________________________________________________________________


In [120]:
def generate_text(model, start_string,temperature=0.5):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
    num_generate = 1500

  # Converting our start string to numbers (vectorizing) 
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
    text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  #  temperature = .50

  # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        
      # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

      # using a multinomial distribution to predict the word returned by the model
        predictions = predictions / temperature
        predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()
      
      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)
      
        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [42]:
print(generate_text(biburr_model, start_string=u"Hey",temperature=.5))

Hey How did a start thinking of shit, but I’m not wanted to go to story. I wanna be something. “How did it going?” “I think I’m gonna go out and get the balls to have a kid, I’ll suck your dick white guy thinking, “I have a kid, I’m like, “What do I go, “I’m not saying that were started talking about it. I think it’s a lot of it, it’s a good thing about it, you know? I just don’t know they had hands. They’re just fucking gonna have a kid, what do you do now? You can’t get married, and then the most difficult job on the planet. Oh, it’s the greatest thing ever. Big me us the most difficult job on the planet. Oh, it’s a bunch of claps, right? You got the balls to get in something, you know? It’s like, “why would you really had to do what kills me, what do you were said it looked to make her there was something like, “What do you do me a face-lifts? ‘Cause right there. I don’t know what happened? I love and shit. I go, “Well, that’s not a pit bull. It’s so a water span in the back of a wo

### Richard Pryor Training

Same as above, except functions are already defined

In [77]:
text = open(rpryor_path, 'rb').read().decode(encoding = "utf-8")
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 102554 characters


In [78]:
print(text[:250])

Richard Pryor

We are gathered here today… to make sure… everyone eats. If not each other… food. I was gonna talk about something that’s very serious… and I hope no one gets offended. I wanna talk about fucking. And sometimes I talk about it. And a l


In [79]:
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

76 unique characters


In [80]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [81]:
seq_length = 100
examples_per_epoch = len(text)//seq_length

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(10):
    print(idx2char[i.numpy()])

R
i
c
h
a
r
d
 
P
r


In [82]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'Richard Pryor\n\nWe are gathered here today… to make sure… everyone eats. If not each other… food. I wa'
's gonna talk about something that’s very serious… and I hope no one gets offended. I wanna talk about'
' fucking. And sometimes I talk about it. And a lot of people in the audience… don’t know what I mean.'
' So would you raise your hand it you don’t know what fucking is… so we can watch your ass when you le'
'ave here? Because not enough fuckin’… goin’ on in America. Americans. Reagan get in, you stop fuckin’'


In [83]:
dataset = sequences.map(split_input_target)

In [84]:
for input_example, target_example in  dataset.take(1):
    print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'Richard Pryor\n\nWe are gathered here today… to make sure… everyone eats. If not each other… food. I w'
Target data: 'ichard Pryor\n\nWe are gathered here today… to make sure… everyone eats. If not each other… food. I wa'


In [85]:
# Batch size (number of 100 word sequences to train on)
BATCH_SIZE = 64

# How many individual training steps per samples (steps)
steps_per_epoch = examples_per_epoch//BATCH_SIZE

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [86]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [87]:
if tf.test.is_gpu_available():
    rnn = tf.keras.layers.CuDNNGRU
    rnn2 = tf.keras.layers.CuDNNGRU
else:
    import functools
    rnn = functools.partial(
    tf.keras.layers.GRU, recurrent_activation='sigmoid')

In [88]:
rpryor_model = build_model(
  vocab_size = len(vocab), 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=BATCH_SIZE)

In [89]:
for input_example_batch, target_example_batch in dataset.take(1): 
    example_batch_predictions = rpryor_model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 76) # (batch_size, sequence_length, vocab_size)


In [90]:
rpryor_model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (64, None, 256)           19456     
_________________________________________________________________
cu_dnngru_4 (CuDNNGRU)       (64, None, 1024)          3938304   
_________________________________________________________________
dense_4 (Dense)              (64, None, 76)            77900     
Total params: 4,035,660
Trainable params: 4,035,660
Non-trainable params: 0
_________________________________________________________________


In [91]:
example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)") 
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 76)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.3299656


In [92]:
rpryor_model.compile(
    optimizer = tf.train.AdamOptimizer(),
    loss = loss)

In [93]:
# Directory where the checkpoints will be saved
checkpoint_dir = './rpryor_training'

# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [94]:
history = rpryor_model.fit(dataset.repeat(), epochs=EPOCHS, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint_callback])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


In [95]:
tf.train.latest_checkpoint(checkpoint_dir)

'./rpryor_training/ckpt_32'

In [96]:
rpryor_model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

rpryor_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

rpryor_model.build(tf.TensorShape([1, None]))

In [98]:
print(generate_text(rpryor_model, start_string=u"Hey",temperature=.5))

Hey’s gonna get off the talked in the toilet in the toing to me. But he was going to help. See that shit is the motherfuckers didn’t want to we the street about it. I say. “I’m gonna be fucking were. We got a lot of shit. I was in the toilet. And you fuck all the shit going. And you have to see the fuck around. Just a minute. Whought it the crubs was on the time. You know what I’m going to hel. Shit. Shit. Care this but the lions some of the bolks and shit. I’m gonna do. They got the brothers was some shit that shit in the corner… I’m gonna get no more. I was a little on your life. Motherfucker called to me. I’m gonna talk about friends and shit, man. I’m gonna be out there wasn’t no more. I ain’t gonna be in your now. You know, to your mick hanged back. I was a black come in your when you doin’? Shit, are you been to the motherfuckers doing in the time, you say, ”We’re not gonna people in the tried to say. “Fuck it. So I can’t even look like a walk about the motherfuckers done fucking

### Anthony Jeselnik Training

In [99]:
text = open(ajesel_path, 'rb').read().decode(encoding = "utf-8")
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 93327 characters


In [100]:
print(text[:250])

Anthony Jeselnik

Now… my best friend’s wife is a born again Christian, and we do not get along at all. The other day, she called me up to yell at me, saying I’m a terrible influence on her husband because he called her a bad name. I said, “What? Did


In [101]:
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

82 unique characters


In [102]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [103]:
seq_length = 100
examples_per_epoch = len(text)//seq_length

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(10):
    print(idx2char[i.numpy()])

A
n
t
h
o
n
y
 
J
e


In [104]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'Anthony Jeselnik\n\nNow… my best friend’s wife is a born again Christian, and we do not get along at al'
'l. The other day, she called me up to yell at me, saying I’m a terrible influence on her husband beca'
'use he called her a bad name. I said, “What? Did he call you a bitch?” She said, “No, Anthony. He did'
' not use the B word.” I said, “Uh-oh.” “Did he call you a cunt?” She said, “No.” I said, “Well, then '
'he didn’t hear it from me.” Yeah, that’s pretty much the greatest opening joke of all time. Because e'


In [105]:
dataset = sequences.map(split_input_target)

In [106]:
for input_example, target_example in  dataset.take(1):
    print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'Anthony Jeselnik\n\nNow… my best friend’s wife is a born again Christian, and we do not get along at a'
Target data: 'nthony Jeselnik\n\nNow… my best friend’s wife is a born again Christian, and we do not get along at al'


In [107]:
# Batch size (number of 100 word sequences to train on)
BATCH_SIZE = 64

# How many individual training steps per samples (steps)
steps_per_epoch = examples_per_epoch//BATCH_SIZE

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [108]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [109]:
if tf.test.is_gpu_available():
    rnn = tf.keras.layers.CuDNNGRU
    rnn2 = tf.keras.layers.CuDNNGRU
else:
    import functools
    rnn = functools.partial(
    tf.keras.layers.GRU, recurrent_activation='sigmoid')

In [110]:
ajesel_model = build_model(
  vocab_size = len(vocab), 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=BATCH_SIZE)

In [111]:
for input_example_batch, target_example_batch in dataset.take(1): 
    example_batch_predictions = ajesel_model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 82) # (batch_size, sequence_length, vocab_size)


In [112]:
ajesel_model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (64, None, 256)           20992     
_________________________________________________________________
cu_dnngru_6 (CuDNNGRU)       (64, None, 1024)          3938304   
_________________________________________________________________
dense_6 (Dense)              (64, None, 82)            84050     
Total params: 4,043,346
Trainable params: 4,043,346
Non-trainable params: 0
_________________________________________________________________


In [114]:
ajesel_model.compile(
    optimizer = tf.train.AdamOptimizer(),
    loss = loss)

In [115]:
# Directory where the checkpoints will be saved
checkpoint_dir = './ajesel_training'

# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [116]:
history = ajesel_model.fit(dataset.repeat(), epochs=EPOCHS, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint_callback])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


In [117]:
tf.train.latest_checkpoint(checkpoint_dir)

'./ajesel_training/ckpt_32'

In [118]:
ajesel_model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

ajesel_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

ajesel_model.build(tf.TensorShape([1, None]))

In [125]:
print(generate_text(ajesel_model, start_string=u"So",temperature=.75))

Sould not me and say, "Anthony, who I was like,
"Anthony,
it was where she had tre possont of the was.
I come time I am a couple of my family for over to the hotech at abortion couldn’t been you a great friends who started comedian she. I might now that she’m gut up aftrongh, a packyous.
You are not a right before you go to me.
Ald the ather a blind jokes
I accused poon, you’re even comedian. [ Laughter ] And people wime lase.
That joke to start to text me.
You should all me.
That's a forgut because I get thought it. But numblece. Soundry best everything is a surcrucime
her wathing to her one me on me.
I never getting
on the war to the more of a time of me her ones good morey if it had a bad here. So you know? He was a bad.
I could ’t a fucking tried to dead. I said, “Wo mon up and phenemally pees hisco.
That the only goter, you know. But I mean, she dad she was in fron minded with pen of and tele is a lot get.
Like, the exactid about it.
And they really more in the worst probare done.

### All Transcripts Training

In [126]:
text = open(allcom_path, 'rb').read().decode(encoding = "utf-8")
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 628637 characters


In [127]:
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

93 unique characters


In [128]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [129]:
seq_length = 100
examples_per_epoch = len(text)//seq_length

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(10):
    print(idx2char[i.numpy()])

N
o
w
…
 
m
y
 
b
e


In [130]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'Now… my best friend’s wife is a born again Christian, and we do not get along at all. The other day, '
'she called me up to yell at me, saying I’m a terrible influence on her husband because he called her '
'a bad name. I said, “What? Did he call you a bitch?” She said, “No, Anthony. He did not use the B wor'
'd.” I said, “Uh-oh.” “Did he call you a cunt?” She said, “No.” I said, “Well, then he didn’t hear it '
'from me.” Yeah, that’s pretty much the greatest opening joke of all time. Because even if you’ve neve'


In [131]:
dataset = sequences.map(split_input_target)

In [132]:
for input_example, target_example in  dataset.take(1):
    print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'Now… my best friend’s wife is a born again Christian, and we do not get along at all. The other day,'
Target data: 'ow… my best friend’s wife is a born again Christian, and we do not get along at all. The other day, '


In [133]:
# Batch size (number of 100 word sequences to train on)
BATCH_SIZE = 64

# How many individual training steps per samples (steps)
steps_per_epoch = examples_per_epoch//BATCH_SIZE

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [134]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [135]:
if tf.test.is_gpu_available():
    rnn = tf.keras.layers.CuDNNGRU
    rnn2 = tf.keras.layers.CuDNNGRU
else:
    import functools
    rnn = functools.partial(
    tf.keras.layers.GRU, recurrent_activation='sigmoid')

In [136]:
allcom_model = build_model(
  vocab_size = len(vocab), 
  embedding_dim=embedding_dim, 
  rnn_units=rnn_units, 
  batch_size=BATCH_SIZE)

In [138]:
for input_example_batch, target_example_batch in dataset.take(1): 
    example_batch_predictions = allcom_model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 93) # (batch_size, sequence_length, vocab_size)


In [139]:
allcom_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (64, None, 256)           23808     
_________________________________________________________________
cu_dnngru_8 (CuDNNGRU)       (64, None, 1024)          3938304   
_________________________________________________________________
dense_8 (Dense)              (64, None, 93)            95325     
Total params: 4,057,437
Trainable params: 4,057,437
Non-trainable params: 0
_________________________________________________________________


In [142]:
allcom_model.compile(
    optimizer = tf.train.AdamOptimizer(),
    loss = loss)

In [143]:
# Directory where the checkpoints will be saved
checkpoint_dir = './allcom_training'

# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [144]:
history = allcom_model.fit(dataset.repeat(), epochs=EPOCHS, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint_callback])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


In [145]:
tf.train.latest_checkpoint(checkpoint_dir)

'./allcom_training/ckpt_32'

In [146]:
allcom_model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

allcom_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

allcom_model.build(tf.TensorShape([1, None]))

In [147]:
print(generate_text(allcom_model, start_string=u"So",temperature=.35))

So the court. Like the thing today,
the thing that the cops come over.” – “Fuck Jim Brown.” “Oh, my God. What, are you gonna be great. Come here, like…” He really was, like, trying to tell me. I know what you say that stuff. The opposite of tender. I have never seen a baby. What the fuck am I, right? Ha. Does it happen? Did you not seen him watching it. Probably have a great life, but it’s good. It’s… you know, if I’m a bunch of cripit of the country was like, “Hey, man, you got to be the best thing about it. Even when I watch sports. I didn’t have anything to tell them this shit work. Hey, you don’t want to do that. My girl was all about it. He’s like, “No, it’s fucking his back, and he couldn’t rind of grape drink at my friend’s house… like a… “I want this shit to stop.” Crackheads are like, “No, I’m sorry, ladies. I just feel like doing anything around the corner to see the shit out of the dog that way. Like, the other day, she got her hair cut. Two inches trimmed off of her hair. T