[View in Colaboratory](https://colab.research.google.com/github/tobiolabode/TextGen_eagar/blob/master/TextGen_eagar.ipynb)

In [1]:
!pip install unidecode

Collecting unidecode
[?25l  Downloading https://files.pythonhosted.org/packages/59/ef/67085e30e8bbcdd76e2f0a4ad8151c13a2c5bce77c85f8cad6e1f16fb141/Unidecode-1.0.22-py2.py3-none-any.whl (235kB)
[K    100% |████████████████████████████████| 235kB 15.2MB/s 
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.0.22


In [0]:
import tensorflow as tf

tf.enable_eager_execution()

import numpy as np
import os
import re 
import random
import unidecode
import time

In [4]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [33]:
from google.colab import files
files.upload()

Saving trumptweetsinput.txt to trumptweetsinput.txt




In [34]:
!ls

sample_data  training_checkpoints  trumptweetsinput.txt


In [0]:
path_to_file = "trumptweetsinput.txt"

In [36]:
text = unidecode.unidecode(open(path_to_file).read())
print(len(text))

2439836


In [0]:
unique = sorted(set(text))

char2idx = {u:i for i, u in enumerate(unique)}
idx2char = {i:u for i, u in enumerate(unique)}

In [0]:
max_length = 100

vocab_size = len(unique)

embedding_dim = 256

units = 1024

BATCH_SIZE = 64

BUFFER_SIZE = 10000

In [39]:
input_text = []
target_text = []

for f in range(0, len(text)-max_length, max_length):
  inps = text[f:f+max_length]
  targ = text[f+1:f+1+max_length]
  
  input_text.append([char2idx[i] for i in inps])
  target_text.append([char2idx[t] for t in targ])
  
print (np.array(input_text).shape)
print (np.array(target_text).shape)

(24398, 100)
(24398, 100)


In [0]:
dataset = tf.data.Dataset.from_tensor_slices((input_text, target_text)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [0]:
class Model(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, units, batch_size):
    super(Model, self).__init__()
    self.units = units
    self.batch_sz = batch_size
    
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    
    if tf.test.is_gpu_available():
      self.gru = tf.keras.layers.CuDNNGRU(self.units, 
                                          return_sequences=True, 
                                          return_state=True, 
                                          recurrent_initializer='glorot_uniform')
      
      self.gru2 = tf.keras.layers.CuDNNGRU(self.units, 
                                          return_sequences=True, 
                                          return_state=True, 
                                          recurrent_initializer='glorot_uniform')
    else:
      self.gru = tf.keras.layers.GRU(self.units, 
                                     return_sequences=True, 
                                     return_state=True, 
                                     recurrent_activation='sigmoid', 
                                     recurrent_initializer='glorot_uniform')
    
    self.fc = tf.keras.layers.Dense(vocab_size)
    
    
  def call(self, x, hidden):
    x = self.embedding(x)
    
    output, states = self.gru(x, initial_state=hidden)
    
    output = tf.reshape(output, (-1, output.shape[2]))
    
    x = self.fc(output)
    
    return x, states

In [0]:
model = Model(vocab_size, embedding_dim, units, BATCH_SIZE)

In [0]:
optimizer = tf.train.AdamOptimizer()

def loss_function(real, preds):
  return tf.losses.sparse_softmax_cross_entropy(labels=real, logits=preds)

In [0]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 model=model)

In [55]:
EPOCHS = 20

for epoch in range(EPOCHS):
  start = time.time()
  
  hidden = model.reset_states()
  
  for (batch, (inp, target)) in enumerate(dataset):
    with tf.GradientTape() as tape:
      
      predictions, hidden = model(inp, hidden)
      
      
      target = tf.reshape(target, (-1,))
      loss = loss_function(target,predictions)
      
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(zip(grads, model.variables))
    
    if batch % 100 == 0:
      print("Epoch {} Batch {} Loss {:.4f}".format(epoch+1, batch, loss))
      

  if (epoch + 1) % 5 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print ("Epoch {}  Loss {:.4f}".format(epoch+1, loss))
  print ("Time taken for 1 Epoch {} sec \n".format(time.time()- start))

Epoch 1 Batch 0 Loss 4.3835
Epoch 1 Batch 100 Loss 2.4489
Epoch 1 Batch 200 Loss 2.1148
Epoch 1 Batch 300 Loss 1.8467
Epoch 1  Loss 1.6828
Time taken for 1 Epoch 51.21127963066101 sec 

Epoch 2 Batch 0 Loss 1.6759
Epoch 2 Batch 100 Loss 1.5874
Epoch 2 Batch 200 Loss 1.6248
Epoch 2 Batch 300 Loss 1.5224
Epoch 2  Loss 1.5559
Time taken for 1 Epoch 51.17953038215637 sec 

Epoch 3 Batch 0 Loss 1.4418
Epoch 3 Batch 100 Loss 1.3799
Epoch 3 Batch 200 Loss 1.3810
Epoch 3 Batch 300 Loss 1.4452
Epoch 3  Loss 1.3684
Time taken for 1 Epoch 51.17159461975098 sec 

Epoch 4 Batch 0 Loss 1.2796
Epoch 4 Batch 100 Loss 1.3262
Epoch 4 Batch 200 Loss 1.3309
Epoch 4 Batch 300 Loss 1.3240
Epoch 4  Loss 1.3061
Time taken for 1 Epoch 51.37633967399597 sec 

Epoch 5 Batch 0 Loss 1.2104
Epoch 5 Batch 100 Loss 1.2426
Epoch 5 Batch 200 Loss 1.2629
Epoch 5 Batch 300 Loss 1.2338
Epoch 5  Loss 1.3167
Time taken for 1 Epoch 51.41980290412903 sec 

Epoch 6 Batch 0 Loss 1.1953
Epoch 6 Batch 100 Loss 1.1469
Epoch 6 Batc

In [47]:
# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x7f80448160f0>

In [56]:
num_generate = 1000

start_string = "Q"

input_eval = [char2idx[s] for s in start_string]
input_eval = tf.expand_dims(input_eval, 0)

text_generated = ""

temperature = 1.0

hidden = [tf.zeros((1,units))]
for i in range(num_generate):
  predictions, hidden = model(input_eval, hidden)
  
  predictions = predictions / temperature
  predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()
  
  input_eval = tf.expand_dims([predicted_id], 0)
  
  text_generated += idx2char[predicted_id]
  
print (start_string + text_generated)

Quity of Romney.
Don't let @Daidyhengstoning 'Trump's Wowland &amp; Frame Donald Trump hateho's money - no more like it!
.@ronginsel  Ve led the hell down from over 362 luxury regain or hide.
Rob Green Endorsement for USA - Headving ABC wasn't even worth more than we have them (cont)
"China is one of the line phylime directly to chuck Hillary Clinton were far the late great building- such a hate spending for a new generous of the pervert show. MY NEW smart government when thouse of @IvankaTrump's presidents are starims
Why does a president must ad on just one --- word and braggeding the border &amp; much of dignity - also cauck Obama has killed by trees watched at Osca success is the capability. This is not only a whole record.
My socially respected &amp; can get a which has been doing any chances of paid a business. By the definitely our brave sexting the @washingtonpost
@jtalfourzin- a complete with the bird killing them!
@BrandonBoA000. Thanks Michael!
@granybheen  Why happens it's 