In [1]:
from tensorflow.keras.layers import StringLookup
from tensorflow.keras import Model
from tensorflow.keras import callbacks
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow import SparseTensor, sparse, function, random, squeeze, saved_model, constant, losses, strings
from tensorflow import data as dt
import numpy as np
import os
import time
import unicodedata

In [2]:
file = open("drag-names.txt", 'r').read()
text = unicodedata.normalize('NFKD',file)
vocab = sorted(set(text))
ids_from_chars = StringLookup(vocabulary=list(vocab), mask_token=None)
chars_from_ids = StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [3]:
def text_from_ids(ids):
  return strings.reduce_join(chars_from_ids(ids), axis=-1)

In [4]:
all_ids = ids_from_chars(strings.unicode_split(text, 'UTF-8'))
ids_dataset = dt.Dataset.from_tensor_slices(all_ids)

In [5]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

In [6]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [7]:
dataset = sequences.map(split_input_target)

In [8]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(dt.experimental.AUTOTUNE))

In [9]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [10]:
class MyModel(Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = Embedding(vocab_size, embedding_dim)
    self.gru = GRU(rnn_units, return_sequences=True, return_state=True)
    self.dense = Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [11]:
model = MyModel(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [12]:
loss = losses.SparseCategoricalCrossentropy(from_logits=True)

In [13]:
model.compile(optimizer='adam', loss=loss)

In [14]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [15]:
EPOCHS = 125

In [16]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/125
Epoch 2/125
Epoch 3/125
Epoch 4/125
Epoch 5/125
Epoch 6/125
Epoch 7/125
Epoch 8/125
Epoch 9/125
Epoch 10/125
Epoch 11/125
Epoch 12/125
Epoch 13/125
Epoch 14/125
Epoch 15/125
Epoch 16/125
Epoch 17/125
Epoch 18/125
Epoch 19/125
Epoch 20/125
Epoch 21/125
Epoch 22/125
Epoch 23/125
Epoch 24/125
Epoch 25/125
Epoch 26/125
Epoch 27/125
Epoch 28/125
Epoch 29/125
Epoch 30/125
Epoch 31/125
Epoch 32/125
Epoch 33/125
Epoch 34/125
Epoch 35/125
Epoch 36/125
Epoch 37/125
Epoch 38/125
Epoch 39/125
Epoch 40/125
Epoch 41/125
Epoch 42/125
Epoch 43/125
Epoch 44/125
Epoch 45/125
Epoch 46/125
Epoch 47/125
Epoch 48/125
Epoch 49/125
Epoch 50/125
Epoch 51/125
Epoch 52/125
Epoch 53/125
Epoch 54/125
Epoch 55/125
Epoch 56/125
Epoch 57/125
Epoch 58/125
Epoch 59/125
Epoch 60/125
Epoch 61/125
Epoch 62/125
Epoch 63/125
Epoch 64/125
Epoch 65/125
Epoch 66/125
Epoch 67/125
Epoch 68/125
Epoch 69/125
Epoch 70/125
Epoch 71/125
Epoch 72/125
Epoch 73/125
Epoch 74/125
Epoch 75/125
Epoch 76/125
Epoch 77/125
Epoch 78

In [19]:
class OneStep(Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = sparse.to_dense(sparse_mask)

  @function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = random.categorical(predicted_logits, num_samples=1)
    predicted_ids = squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [20]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [22]:
weights = "weights1/DragNames1.h5"
one_step_model.save_weights(weights)

In [40]:
saved_model.save(one_step_model, 'one_step')
one_step_reloaded = saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [47]:
def generate_drag_name():
  import random
  states = None
  first_choice = ['a','e','i','n','r','l','o','s','t','m','d','c','h','y','u','k','b','v','g','p','x','f','j','w','z','q']
  second_choice = {'a': ['aa','ab','ac','ad','af','ag','ai','aj','ak','al','am','an','ap','aq','ar','as','at','au','av','aw','ax','ay','az'],'b': ['b ','ba','be','bi','bl','bo','br','bu'],'c': ['ca','ce','ch','ci','cl','co','cr','ct','cu','cy'],'d': ['da','dd','de','di','dm','do','dr','du','dw','dy','dé','dí'],'e': ['eb','ec','ed','eg','el','em','en','ep','er','es','et','eu','ev','ex'],'f': ['fa','fe','fi','fk','fl','fo','fr'],'g': ['ga','gd','ge','gi','gl','go','gr','gu','gy'],'h': ['ha','he','hi','ho','hu'],'i': ['ic','id','ig','il','im','in','io','ir','is','iv','iy','iz'],'j': ['ja','jd','je','jf','ji','jo','ju','jy'],'k': ['ka','kc','ke','kh','ki','kl','ko','kr','ky'],'l': ['la','lc','le','li','lo','lq','lu','ly'],'m': ['m ','ma','me','mh','mi','mo','mr','ms','mu','mx','my','mz'],'n': ['na','ne','ni','no','nu','ny'],'o': ['ob','oc','ol','om','on','op','or','ot','ox'],'p': ['pa','pe','ph','pi','pl','pm','po','pr','ps','pu','py','pé'],'q': ['qu','qy'],'r': ['ra','re','rh','ri','ro','ru','ry'],'s': ['sa','sc','se','sh','si','sk','sl','sm','so','sp','sr','st','su','sv','sy'],'t': ['t ','ta','te','th','ti','to','tp','tr','ts','tu','tw','ty','tí','tó'],'u': ['uc','uh','ul','um','un','ur','ut'],'v': ['va','ve','vi','vo'],'w': ['wa','we','wh','wi','wo','wy'],'x': ['xa','xe','xi','xo','xt','xu'],'y': ['ya','ye','yo','yu','yv'],'z': ['za','ze','zi','zo','zs','zy']}
  choose1 = random.choice(first_choice)
  choose2 = random.choice(second_choice[choose1])

  next_char = constant([choose2])
  result = [next_char]

  for n in range(32):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    if next_char == '\n':
      break
    result.append(next_char)

  result = strings.join(result)
  names = result[0].numpy().decode('utf-8').split(' ')
  capital = [name.capitalize() for name in names]
  return ' '.join(capital).replace('.','')

In [66]:
for _ in range(100):
    print(generate_drag_name())

Wind
Suse
Zy Bisch
Janessa Highland
Nerva
Toute Havet
M Butalee
Queet
Xton
Ickari
Gloo
Ision
Yous Love
Yutuve
Ettie Rebel
Du Chey
Yonca
Fkras Dolai
Want
Icka
Treale Knight
Mhona
Ts
Dia Elektra
Kress Poxxi
Wind
Litalityssa Hillz
Caso
Lowle Mcraghor
Jd
Rubie
Von Lee
Kley
Le Hepen
Qyuina
Zon
Poria Nithose
Phine
Olly Maid
Undie Sw James
Nusmi
Rose
Ise Love
Iy
Wynt
Qyaina Valencieon
Anna James
Ry Kidi
Ky Devine
Sy Devine
Nazo
Roller
Kle
Get
My Dasl
Ve
Ye Koli
Uchup
Nights
Beros Bark
Rose Penke
Cy
Sliaz
Akness Cox
Duckle
Xton
Ummer
Boytes
Fertay
Yus Phack
Jenna Scyde
Zi Balkx
Aaliaz Ntoll
Tóones
Lcy
Qyuina Love
Hones
Jfanna Wnights
Déithan Lareetee
Relae
Iga Monroe
Iva Lauxen
Iranha
Lquri
Ye Dovine
Rucine
Pynas Drag Stratton
Dia Kelly
Nash
Quein
Oco St James
Ve
Khes
Hue
Farkish La Coxx
Hole Willing
Niou
Yurtie
Hagoma
Fika Lour
