# Quick baby name generation

This notebook loads a pretrained model. It is therefore not needed to wait for the model to fit. If you want to get more details about the data processing, transformation and fitting, I refer to the other notebook with suffix '_complete_'.

In [None]:
import os

import tensorflow as tf
import numpy as np
import polars as pl
import unidecode

from keras.models import load_model
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import LambdaCallback
from keras.models import load_model

In [None]:
model = load_model('./models/model.h5')

In [None]:
with open('model_input/names.txt', 'r') as text:
    list_of_names = text.read()

names = list_of_names.splitlines()

In [None]:
_ = []

for name in names:
    x = name.split('-')
    x = ''.join(x)
    x = unidecode.unidecode(x)
    x = str(x)+'.'
    x = x.lower()
    x = x.replace("'", "")
  
    _.append(x)

names = _

In [None]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(
    filters='!"#$%&()*+,-/:;<=>?@[\\]^_`{|}~',
    split='\n')
tokenizer.fit_on_texts(list_of_names)

In [None]:
char_to_index = tokenizer.word_index
index_to_char = dict((v, k) for k, v in char_to_index.items())

char_to_index['.'] = 0
index_to_char[0] = '.'

In [None]:
max_char = len(max(names, key=len))
m = len(names)
char_dim = len(char_to_index)

X = np.zeros((m, max_char, char_dim))
Y = np.zeros((m, max_char, char_dim))

for i in range(m):
    name = list(names[i])
    for j in range(len(name)):
        X[i, j, char_to_index[name[j]]] = 1
        if j < len(name)-1:
            Y[i, j, char_to_index[name[j+1]]] = 1

In [None]:
def make_name(model):
    name = []
    x = np.zeros((1, max_char, char_dim))
    end = False
    i = 0
    
    while end==False:
        probs = list(model.predict(x)[0,i])
        probs = probs / np.sum(probs)
        index = np.random.choice(range(char_dim), p=probs)
        if i == max_char-2:
            character = '.'
            end = True
        else:
            character = index_to_char[index]
        name.append(character)
        x[0, i+1, index] = 1
        i += 1
        if character == '.':
            end = True
    print(''.join(name))
    
    return ''.join(name)

In [None]:
path = './model_output/'
filename = 'generated_names.txt'

number_of_names = 10

if not os.path.exists(path):
    os.makedirs(path)

if not os.path.isfile(f'{path}/{filename}'):
    open(f'{path}/{filename}', 'w').close()

with open(f'{path}/{filename}', 'a') as text:
      
    output = []
    
    for i in range(number_of_names):
        x = str(make_name(model)[:-1]) + '\n'
        output.append(x)
      
    [text.write(x) for x in output]

text.close()

In [None]:
# Load list of generated names
with open(f'{path}/{filename}', 'r') as file:
    gen_names = file.read()
    gen_names = gen_names.splitlines()
file.close()

_ = []

for names in gen_names:
  _.append(names[:-1])

gen_names = pl.Series(_)

# Read original names
with open(f'model_input/names.txt', 'r') as file:
    original_names = file.read()
    original_names = original_names.splitlines()
file.close()

_ = []

for name in original_names:
  _.append(name.lower())

original_names = list(_)

# Compare generated names with original names
check_names = gen_names.is_in(original_names)
name_existing = list(zip(list(gen_names), check_names.to_list()))

for value in name_existing:
  print(value)