In [1]:
import tensorflow as tf
import numpy as np
import os

In [2]:
names = set()
for filename in ['male.txt', 'female.txt']:
    for line in open(os.path.join('../data/names', filename)):
        if len(line.strip()):
            names.add(line.strip().lower())


In [3]:
print len(names), 'names'
print 'longest:', max(map(len, names))
by_len = sorted(names, key=len)
print '99th percentile longest:', len(by_len[int(len(names) * 0.95)])

7583 names
longest: 54
99th percentile longest: 9


In [4]:
chars = list('abcdefghijklmnopqrstuvwxyz') + ['<END>', '<NULL>']
indices_for_chars = {c: i for i, c in enumerate(chars)}

NAME_MAX_LEN = 10 # include the <END> char

def name_to_vec(name, maxlen=NAME_MAX_LEN):
    v = np.zeros(maxlen, dtype=int)
    v.fill(indices_for_chars.get('<NULL>'))
    for i, c in enumerate(name):
        if i >= maxlen: break
        n = indices_for_chars.get(c, '<NULL>')
        v[i] = n
    v[min(len(name), maxlen-1)] = indices_for_chars['<END>']
    return v

def vec_to_name(vec):
    name = ''
    for x in vec:
        char = chars[x]
        if len(char) == 1:
            name += char
        elif char == '<END>':
            return name
    return name

print name_to_vec('nate')
assert vec_to_name(name_to_vec('nate')) == 'nate'
assert vec_to_name(name_to_vec('aaaaaaaaaaaa')) == 'aaaaaaaaa'

[13  0 19  4 26 27 27 27 27 27]


In [5]:
NOISE_SIZE = 32
noise = tf.placeholder(shape=[None, NOISE_SIZE], dtype=tf.float32, name='noise')
real_names = tf.placeholder(shape=[None, NAME_MAX_LEN], dtype=tf.int32, name='names')

batch_shape = tf.reshape(tf.shape(real_names)[0], [-1])
use_real_name = tf.random_uniform(shape=batch_shape, minval=0, maxval=1, dtype=tf.int32, seed=None, name='use_real_name')

In [6]:
def weight_var(shape, stddev=0.1, weight_decay=0, name=None):
    initial = tf.truncated_normal(shape, stddev=stddev)
    v = tf.Variable(initial, name=name)
    if weight_decay > 0:
        l2 = tf.nn.l2_loss(v) * weight_decay
        tf.add_to_collection('losses', l2)
    return v

def leaky_relu(x, leak=0.2, name="lrelu"):
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        return f1 * x + f2 * abs(x)

def relu(x):
    # return tf.nn.relu(x)
    return leaky_relu(x)

def create_conv(input, out_channels, patch_size=5, stride=1, batch_norm=False, dropout=False):
    in_channels = input.get_shape()[-1].value
    w = weight_var([patch_size, patch_size, in_channels, out_channels])
    b = weight_var([out_channels], stddev=0)
    conv = tf.nn.conv2d(input, w, strides=[1,stride,stride,1], padding='SAME')
    if batch_norm: conv = create_batch_norm(conv)
    activation = relu(conv + b)
    if dropout: activation = create_dropout(activation)
    return activation
    
def text_conv(input, out_channels, patch_size=5, stride=1, dropout=False, pool_size=1):
    in_channels = input.get_shape()[-1].value
    w = weight_var([patch_size, in_channels, out_channels])
    b = weight_var([out_channels], stddev=0)
    conv = tf.nn.conv1d(input, w, stride=stride, padding='SAME')
    activation = relu(conv + b)
    # TODO: max_pooling
    if dropout: activation = create_dropout(activation)
    return activation

def create_dropout(units):
    return tf.nn.dropout(units, dropout)

def create_fc(input, out_size):
    # input_dropped = tf.nn.dropout(input, dropout_keep_prob)
    in_size = input.get_shape()[-1].value
    w = weight_var([in_size, out_size], weight_decay=0.004)
    b = weight_var([out_size], weight_decay=0.004)
    x = tf.matmul(input, w)
    return relu(x + b)

In [7]:
def generator(noise, name='generator'):
    with tf.variable_scope(name, reuse=None):
        cells = [tf.nn.rnn_cell.LSTMCell(size, state_is_tuple=True) for size in [NOISE_SIZE, 256, len(chars)]]
        lstm = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
        noise_repeated_over_time = tf.tile(tf.reshape(noise, [-1, 1, NOISE_SIZE]), [1, NAME_MAX_LEN, 1])
        outputs, state = tf.nn.dynamic_rnn(lstm, noise_repeated_over_time, dtype=tf.float32)
        output_chars = tf.reshape(tf.argmax(tf.nn.softmax(outputs), axis=2), [-1, NAME_MAX_LEN])
        output_chars = tf.cast(output_chars, tf.int32)
    return output_chars

generated_names = generator(noise)

In [8]:
session = tf.Session()
session.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))

In [9]:
def generate_noise(n=1):
    return np.random.normal(size=(n, NOISE_SIZE))

generated = session.run(generated_names, feed_dict={noise: generate_noise(n=10)})
for v in generated:
    print vec_to_name(v)

yyyyyyykkk
qqwwwnnnnn
tttttttttt
zzzzzzzzzz
ddddddvvvv
ffffuuuuuu
iiiiiiiiii
hhhh
ggtttttttt
gggggajjjj


In [10]:
def discriminator(names, name='discriminator'):
     with tf.variable_scope(name, reuse=None):
            one_hot = tf.one_hot(names, len(chars), dtype=tf.float32)
            conv1 = text_conv(one_hot, 64)
            conv2 = text_conv(one_hot, 32)
            fc1 = create_fc(tf.reshape(conv2, [-1, NAME_MAX_LEN * 32]), 32)
            fc2 = create_fc(fc1, 2)
            is_real = tf.unpack(tf.nn.softmax(fc2), axis=1)[0]
            return is_real
#             cells = [tf.nn.rnn_cell.LSTMCell(size, state_is_tuple=True) for size in [len(chars), 32]]
#             lstm = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
#             outputs, state = tf.nn.dynamic_rnn(lstm, one_hot, dtype=tf.float32)
#             outputs_flat = tf.reshape(outputs, [-1, 32 * NAME_MAX_LEN])
disc_input = real_names * use_real_name + generated_names * (tf.cast(1, tf.int32) - use_real_name)
guessed_real = discriminator(disc_input)
disc_loss = tf.reduce_sum(tf.cast(use_real_name, tf.float32) * -tf.log(guessed_real))
disc_accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(guessed_real), tf.cast(use_real_name, tf.float32)), tf.float32))

In [15]:
gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator')

learn_rate = tf.placeholder(tf.float32, name='learning_rate')
optimizer = tf.train.AdamOptimizer(learn_rate)
global_step = tf.contrib.framework.get_or_create_global_step()
train_disc = optimizer.minimize(disc_loss, global_step=global_step, var_list=disc_vars)
train_gen = optimizer.minimize(-disc_loss, global_step=global_step, var_list=gen_vars)

ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ['Tensor("generator/RNN/MultiRNNCell/Cell0/LSTMCell/W_0/read:0", shape=(64, 128), dtype=float32)', 'Tensor("generator/RNN/MultiRNNCell/Cell0/LSTMCell/B/read:0", shape=(128,), dtype=float32)', 'Tensor("generator/RNN/MultiRNNCell/Cell1/LSTMCell/W_0/read:0", shape=(288, 1024), dtype=float32)', 'Tensor("generator/RNN/MultiRNNCell/Cell1/LSTMCell/B/read:0", shape=(1024,), dtype=float32)', 'Tensor("generator/RNN/MultiRNNCell/Cell2/LSTMCell/W_0/read:0", shape=(284, 112), dtype=float32)', 'Tensor("generator/RNN/MultiRNNCell/Cell2/LSTMCell/B/read:0", shape=(112,), dtype=float32)'] and loss Tensor("Neg_3:0", shape=(), dtype=float32).