In [1]:
import tensorflow as tf
import numpy as np
import json
import random
from tensorflow.contrib.layers.python.layers import batch_norm


In [2]:
msgs = json.load(open('../data/my_messages.json'))
chars = 'abcdefghijklmnopqrstuvwxyz'
chars += chars.upper()
chars += '1234567890,;:?!./ _-\'"\n'
PAD = 0
START = 1
END = 2
chars = [PAD, START, END] + list(chars)

def strip_msg(m):
    return ''.join(c for c in m if c in chars)

msgs = [strip_msg(m) for m in msgs]


In [3]:
print msgs[0]


is this a chat about going back to the stairs and buying 4loko
or am I misinterpreting it


In [4]:
def ints_from_text(text):
    return np.array([START] + [chars.index(c) for c in text] + [END], dtype=np.int)

def ints_to_text(ints):
    return ''.join([chars[i] for i in ints if i not in [PAD, START, END]])

int_msgs = [ints_from_text(text) for text in msgs if len(text.strip()) > 0]

In [5]:
# def rand_sample():
#     msg = random.choice(int_msgs)
#     msg = [BLANK_CHAR] * (BLOCK_LEN-1) + msg
#     i = random.randint(BLOCK_LEN, len(msg)-1)
#     char = msg[i]
#     prev = msg[i-BLOCK_LEN:i]
#     return prev, char

# print rand_sample()

In [6]:

def causal_conv_1d(input, dilation, out_channels, is_skip_connection=False, name='causal_conv_1d'):
    # takes a 1-dimensional tensor! no batches here...
    filter_size = 3
    
    assert dilation >= 0
    
    conv_init = tf.truncated_normal_initializer(0.0, 0.1)
    bias_init = tf.truncated_normal_initializer(0.1, 0.0)
    
    with tf.variable_scope(name):
        in_channels = input.get_shape()[-1].value
                
        # reshape to 2d so we can use atrous_conv2d:
        # input = tf.reshape(input, [1, -1, 1, in_channels])
        input = tf.reshape(input, [1, -1, 1, in_channels])
        
        w = tf.get_variable('w', 
                        shape=[filter_size/2+1, 1, in_channels, out_channels], 
                        initializer=conv_init)
        filter = tf.pad(w, [[0, filter_size/2], [0,0], [0,0], [0,0]])
        
        output = tf.nn.atrous_conv2d(input, filter, dilation, 'SAME')
        
        b = tf.get_variable('b', shape=[out_channels], initializer=bias_init)
        output = output + b
        
        # reshape back to 1d:
        output = tf.reshape(output, [-1, out_channels])
        return output

def causal_conv_1d_padding_method(input, dilation, out_channels, is_skip_connection=False, name='causal_conv_1d'):
    # takes a 1-dimensional tensor! no batches here...
    filter_size = 2
    
    assert dilation >= 0
    
    conv_init = tf.truncated_normal_initializer(0.0, 0.1)
    bias_init = tf.truncated_normal_initializer(0.1, 0.0)
    
    with tf.variable_scope(name):
        in_channels = input.get_shape()[-1].value
                
        # reshape to 2d so we can use atrous_conv2d:
        input = tf.reshape(input, [1, -1, 1, in_channels])
        
        w = tf.get_variable('w', 
                        shape=[filter_size, 1, in_channels, out_channels], 
                        initializer=conv_init)
                
        output = tf.nn.atrous_conv2d(input, w, dilation, 'SAME')
        
        b = tf.get_variable('b', shape=[out_channels], initializer=bias_init)
        output = output + b
        
        # reshape back to 1d:
        output = tf.reshape(output, [-1, out_channels])
        return output

def create_fc(input, out_channels, name='fc'):
    with tf.variable_scope(name):
        in_channels = input.get_shape()[-1].value
        w = tf.get_variable('w', shape=[in_channels, out_channels], initializer=xavier_initializer())
        b = tf.get_variable('b', shape=[out_channels], initializer=bias_init)
        return tf.matmul(input, w) + b

def create_conv_1d(input, filter_size, out_channels, name='conv1d'):
    conv_init = tf.truncated_normal_initializer(0.0, 0.1)
    bias_init = tf.truncated_normal_initializer(0.1, 0.0)
    
    with tf.variable_scope(name):
        # turn input into a batch:
        in_channels = input.get_shape()[-1].value
        input = tf.reshape(input, [1, -1, in_channels])
        w = tf.get_variable('w', shape=[filter_size, in_channels, out_channels], initializer=conv_init)
        b = tf.get_variable('b', shape=[out_channels], initializer=bias_init)
        output = tf.nn.conv1d(input, w, 1, 'SAME') + b
        return tf.reshape(output, [-1, out_channels])
    
def create_batch_norm(inputs, name='bn'):
    with tf.variable_scope(name):
        return batch_norm(inputs, is_training=True, updates_collections=None)


In [7]:

def char_model(seq):
    # sequence: [None]
    
    char_dimension = len(chars)
    layers = [
        128,
        128,
        128,
        128,
        128
    ]
    receptive_field = 2 ** len(layers)
        
    seq = tf.one_hot(seq, char_dimension)
    
    seq = create_conv_1d(seq, 1, 64, name='char_conv')
    seq = tf.nn.elu(seq)
    
    for i, channel_count in enumerate(layers):
        dilation = 2 ** i
                
        seq = causal_conv_1d(seq, dilation, channel_count, name='dilated_conv' + str(i))
        seq = tf.nn.elu(seq)
        
        # seq = create_batch_norm(seq, name='bn' + str(i))
        
        # seq = create_conv_1d(seq, 1, channel_count, name='conv' + str(i))
        
        # skip = seq
        
        # if i + 1 < len(layers):
            # seq = tf.nn.elu(seq)
            # seq = tf.nn.dropout(seq, 0.75)
            # seq = create_batch_norm(seq, name='bn' + str(i))
    
    seq = create_conv_1d(seq, 1, char_dimension)
    
    return seq

with tf.variable_scope('char25'):
    input = tf.placeholder(tf.int32, [None], name='input')
    output = char_model(input)
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output[:-1], input[1:]))
    global_step = tf.contrib.framework.get_or_create_global_step()
    lr = tf.placeholder(tf.float32, [], name='lr')
    train_step = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)


In [8]:
save_path = None # 'models/acgan3-20'

session = tf.InteractiveSession()
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
session.run(init_op)
tf.train.start_queue_runners(sess=session)

import os
saver = None
if save_path:
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(save_path)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(session, ckpt.model_checkpoint_path)
        print 'Restored from checkpoint', ckpt.model_checkpoint_path
    else:
        print 'Did not restore from checkpoint'
else:
    print 'Will not save progress'

Will not save progress


In [9]:
def predict(max_len=20):
    prev_chars = [START]
    while len(prev_chars) < max_len and prev_chars[-1] != END:
        op_ = session.run(output, feed_dict={input: prev_chars})
        # print op_.shape
        character_probs = session.run(tf.nn.softmax(output[-1]), feed_dict={input: prev_chars})
        # print character_probs
        character = np.random.choice(len(character_probs), p=character_probs)
        # character = np.argmax(character_probs)
        prev_chars.append(character)
    return ints_to_text(prev_chars)


In [11]:
def get_sample():
    return random.choice(int_msgs) # [START] + random.choice(int_msgs) + [END]

while True:
    losses = []
    feed = {
        input: get_sample(),
        lr: 0.0001
    }
    step_, loss_, _ = session.run([global_step, loss, train_step], feed_dict=feed)
    losses.append(loss_)
    if step_ % 500 == 0:
        print "Step: {}, loss: {}".format(step_, sum(losses) / len(losses))
        print predict()
        losses = []
    

Step: 19000, loss: 2.39981365204
theth
fo tha you bg
Step: 19500, loss: 2.41388511658
et
Step: 20000, loss: 1.75678217411
onn
Step: 20500, loss: 2.40287494659
no
Step: 21000, loss: 2.52069711685
yight himi
Step: 21500, loss: 2.83501195908
wh!
Step: 22000, loss: 2.57978916168
it'th  and
got
Step: 22500, loss: 1.48049283028
i'm
Step: 23000, loss: 2.28475642204
kave bo cm to gacip
Step: 23500, loss: 1.52788722515
wort?
Step: 24000, loss: 2.0639359951
AB Braminb's ut ham
Step: 24500, loss: 2.95376300812
it thed get
Step: 25000, loss: 3.11235833168
buck
Step: 25500, loss: 2.56164550781
lloz yon ton lo
Step: 26000, loss: 2.45597982407
is OPN FZRKUGOMa
ha
Step: 26500, loss: 1.38520336151
chere bere
Step: 27000, loss: 2.40951418877
son
Step: 27500, loss: 4.43081331253
OjE
Step: 28000, loss: 1.68124842644
came you loh ticand
Step: 28500, loss: 2.17301344872
INLA
Step: 29000, loss: 3.43153810501
hoh 1:
Step: 29500, loss: 2.32773900032
waite it sore tid '
Step: 30000, loss: 2.54777312279
chat ey


KeyboardInterrupt: 

In [None]:

print predict()