## Load the needed libraries

In [1]:
import numpy as np

import plotly.graph_objects as go

import tensorflow as tf

### Load the drag queen name file

In [2]:
file_name = "../data/drag-names.txt"

with open(file_name,'r') as f:
    names = f.read().split("\n")[:-1]

### Get the the length of the longest name for propper padding

In [3]:
names = [name for name in names]
max_length = max([len(name) for name in names])
print("Number of Names: ",len(names))
print("Length of Longest Name:", max_length)

Number of Names:  3281


### Add a start_token for each name for the generation seed phrase
### Then pad each name to make them all the same length

In [4]:
start_token = " "
pad_token = "#"

names = [start_token+name for name in names]
max_length += 1

tokens = sorted(set("".join(names + [pad_token])))

tokens = list(tokens)
n_tokens = len(tokens)

In [5]:
token_to_id = dict(zip(tokens,range(len(tokens))))

def to_matrix(names, max_len=None, pad=token_to_id[pad_token], dtype=np.int32):

    names_ix = np.zeros([len(names), max_len], dtype) + pad

    for i in range(len(names)):
        name_ix = list(map(token_to_id.get, names[i]))
        names_ix[i, :len(name_ix)] = name_ix

    return names_ix

### Time to make our training set and determine the number of steps per epoch

In [6]:
x = to_matrix(names)
x_train = np.zeros((x.shape[0],x.shape[1],n_tokens),np.int32)
y_train = np.zeros((x.shape[0],x.shape[1],n_tokens),np.int32)

for i, name in enumerate(x):
    for j in range(max_length-1):
        x_train[i,j,name[j]] = 1
        y_train[i,j,name[j+1]] = 1
    x_train[i,max_length-1,name[max_length-1]] = 1
    y_train[i,max_length-1,token_to_id[pad_token]] = 1

In [7]:
strategy = tf.distribute.get_strategy()

In [8]:
name_count = x.shape[0]

batch_size = 64

steps_per_epoch = np.ceil(name_count/batch_size)

auto = tf.data.experimental.AUTOTUNE
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False

In [9]:
train_dataset = (tf.data.Dataset.from_tensor_slices((x,y_train)).shuffle(len(names)).cache().repeat().batch(batch_size).prefetch(auto))

### Time to make the model, using multiple LSTM layers and SimpleRNN layers

In [10]:
num_rnn_units = 256
embedding_size = 37

def make_model():
    model = tf.keras.models.Sequential()

    model.add(tf.keras.layers.Embedding(n_tokens,embedding_size,input_length=max_length))

    model.add(tf.keras.layers.LSTM(num_rnn_units,return_sequences=True,activation='elu',input_shape=(x_train.shape[1],x_train.shape[2])))
    model.add(tf.keras.layers.LSTM(num_rnn_units,return_sequences=True,activation='elu'))
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.LSTM(num_rnn_units,return_sequences=True,activation='elu'))
    
    model.add(tf.keras.layers.SimpleRNN(num_rnn_units,return_sequences=True,activation='elu'))
    model.add(tf.keras.layers.SimpleRNN(num_rnn_units,return_sequences=True,activation='elu'))
    model.add(tf.keras.layers.Dense(n_tokens,activation='softmax'))

    model.compile(loss='categorical_crossentropy',optimizer=tf.keras.optimizers.Adam(0.001))

    return model

### Compile the model

In [11]:
with strategy.scope():
    
    model = make_model()
    
    model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 32, 37)            1369      
                                                                 
 lstm (LSTM)                 (None, 32, 256)           301056    
                                                                 
 lstm_1 (LSTM)               (None, 32, 256)           525312    
                                                                 
 dropout (Dropout)           (None, 32, 256)           0         
                                                                 
 lstm_2 (LSTM)               (None, 32, 256)           525312    
                                                                 
 simple_rnn (SimpleRNN)      (None, 32, 256)           131328    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 32, 256)           1

### Write the actual name generating function

In [13]:
def generateName(model=model,seed_phrase=start_token,max_length=max_length):
        
    name = [seed_phrase]
    x = np.zeros((1,max_length),np.int32)

    x[0,0:len(seed_phrase)] = [token_to_id[token] for token in seed_phrase]
    
    for i in range(len(seed_phrase),max_length):
                
        probs = list(model.predict(x)[0,i-1])
        
        probs = probs/np.sum(probs)
        
        index = np.random.choice(range(n_tokens),p=probs)
        
        if index == token_to_id[pad_token]:
            break
            
        x[0,i] = index
        
        name.append(tokens[index])
    return "".join(name)

### Train the model with 250 epochs

In [45]:
epochs = 250

history = model.fit(train_dataset,steps_per_epoch=steps_per_epoch,epochs=epochs)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Visualize the model training

In [24]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=np.arange(1,len(history.history['loss'])+1),
                        y=history.history['loss'],
                        mode='lines+markers',
                        name='Training loss'))

fig.update_layout(title_text="Training loss")

fig.show()

### Save the model to convert it to tfjs

In [31]:
saved_model = 'saved-model2/drag-names.h5'

In [32]:
model.save(saved_model)

### Generate names!

In [49]:
seed_phrase = " "
for _ in range(6000):
    name = generateName(model,seed_phrase=seed_phrase).split(" ")
    capitalized = [N.capitalize() for N in name]
    name = " ".join(capitalized)
    print(name.lstrip())

Krinhe Bonay
Mariy Cliqye
Embarie  Hhre
Lama Pantit
Ruadarde Maytor Imrans
Kaydi Forge
Lostyl Fiu
Daha Shaik
Anmy Beexle
Kitnyn Huenz
Esissia Vesbit
Rimeza Otana
Amyas Malao
Panmpsi Chacolars
Nybon Aona
Jeno Valele
Dhayxa Womkiens
Ausa Nondos
Dizi Blokde Ho Kiym
Jenda Doxe
Mualon Corle
Jlisti Havintar
Kefha Sporxrot
Viosna Phaced Caltico
Musdrar Cracelace
Ki Rostle Funr
Raru Senbhend
Itanka Clanklewe
Chistif Posi
Mila Valdy
Khalgiy Chaezi
Dapanitynit
Iata
Brina Wort Moxeri Lavocloltt
Katdadia Way Bharrt
Miluttyn Concbaldiere
Galranine Chonderti
Atanta Devril
Spilleeb Pogteria
Chyrhyy Jye Soycor
Mony Itaidrae
Sojhia Hareili
Bange Vraamisl
Tima Von
Viy Bay Oranfis
Tona Untal
Leddynyly Rebe
Honlyn Vons
Hesibe Hi Adecs
Vai D Meflatay
Pomalova 3000
Mola Monteu Satrone
Banasina Bokbol Upencre
Negona Bonszesh
Filla Onka
Madi Dolgine
Jicho Liccitei
Gichiriqy
Asdora Havenpo Davele
Corpenito Jakeols
Avexi Meydore
Alonin Riqay
Naya Annramsgandor
Iisbka Walocan
Atuci Wovesi
Jizm Vaneroa
Dole Doye 