In [1]:
import tensorflow as tf
import numpy as np

In [2]:
session = tf.Session()
global_step = tf.contrib.framework.get_or_create_global_step()

In [3]:
TRAIN = True
BATCH_SIZE = 32
REVIEW_LEN = 1000

In [4]:
def read_and_decode_single_example(filename, epochs=None):
    # first construct a queue containing a list of filenames.
    # this lets a user split up there dataset in multiple files to keep
    # size down
    filename_queue = tf.train.string_input_producer([filename], num_epochs=epochs)
    # Unlike the TFRecordWriter, the TFRecordReader is symbolic
    reader = tf.TFRecordReader()
    # One can read a single serialized example from a filename
    # serialized_example is a Tensor of type string.
    _, serialized_example = reader.read(filename_queue)
    # The serialized example is converted back to actual values.
    # One needs to describe the format of the objects to be returned
    features = tf.parse_single_example(
        serialized_example,
        features={
            # We know the length of both fields. If not the
            # tf.VarLenFeature could be used
            'stars': tf.FixedLenFeature([], tf.int64),
            'text': tf.FixedLenFeature([], 'string')
        })
    # now return the converted data
    stars = features['stars']
    chars = tf.decode_raw(features['text'], tf.uint8)
    chars = tf.reshape(chars, [REVIEW_LEN])
    return stars, chars

# load training batcher:
filename = 'yelp_train.tfrecords'
stars, chars = read_and_decode_single_example(filename, epochs=(None if TRAIN else 1))
stars_batch, chars_batch = tf.train.shuffle_batch([stars, chars], batch_size=BATCH_SIZE, capacity=1000, min_after_dequeue=500)

# load testing batcher:
test_stars, test_chars = read_and_decode_single_example('yelp_test.tfrecords', epochs=None)
TEST_BATCHES = 10 * 1000 / BATCH_SIZE
stars_batch_test, chars_batch_test = tf.train.batch([test_stars, test_chars], batch_size=BATCH_SIZE, allow_smaller_final_batch=True)

tf.train.start_queue_runners(sess=session)

[<Thread(Thread-4, started daemon 123145559244800)>,
 <Thread(Thread-5, started daemon 123145563451392)>,
 <Thread(Thread-6, started daemon 123145567657984)>,
 <Thread(Thread-7, started daemon 123145571864576)>]

In [5]:
# print session.run(stars_batch)

In [6]:
dropout_keep_prob = tf.placeholder_with_default(tf.constant(1.0), [], name='dropout_keep_prob')        

def weight_var(shape, stddev=0.1, weight_decay=0, name=None):
    initial = tf.truncated_normal(shape, stddev=stddev)
    v = tf.Variable(initial, name=name)
    if weight_decay > 0:
        l2 = tf.nn.l2_loss(v) * weight_decay
        tf.add_to_collection('losses', l2)
    return v

def create_fc(input, out_size, relu=True):
    # input_dropped = tf.nn.dropout(input, dropout_keep_prob)
    in_size = input.get_shape()[-1].value
    w = weight_var([in_size, out_size], weight_decay=0.004, name='w')
    b = weight_var([out_size], weight_decay=0.004, name='b')
    x = tf.matmul(input, w)
    return tf.nn.relu(x + b) if relu else x + b

def create_conv(input, out_channels, patch_size=5, stride=1, batch_norm=False, dropout=False):
    in_channels = input.get_shape()[-1].value
    w = weight_var([patch_size, patch_size, in_channels, out_channels], name='w')
    b = weight_var([out_channels], stddev=0, name='b')
    conv = tf.nn.conv2d(input, w, strides=[1,stride,stride,1], padding='SAME')
    if batch_norm: conv = create_batch_norm(conv)
    activation = tf.nn.relu(conv + b)
    if dropout: activation = create_dropout(activation)
    return activation

def create_max_pool(inputs, ksize=2, stride=2):
    return tf.nn.max_pool(inputs, ksize=[1, ksize, ksize, 1], strides=[1, stride, stride, 1], padding='SAME')

def create_batch_norm(inputs):
    return batch_norm(inputs, is_training=TRAIN)

def create_dropout(inputs):
    return tf.nn.dropout(inputs, dropout_keep_prob)

def text_conv(input, out_channels, patch_size=5, stride=1, pool_size=1):
    in_channels = input.get_shape()[-1].value
    w = weight_var([patch_size, in_channels, out_channels])
    b = weight_var([out_channels], stddev=0)
    conv = tf.nn.conv1d(input, w, stride=stride, padding='SAME')
    activation = tf.nn.relu(conv + b)
    return activation

def text_pool(inputs, ksize=2, stride=2, type='avg'):
    channels = inputs.get_shape()[-1].value
    length = inputs.get_shape()[-2].value
    inputs = tf.reshape(inputs, [-1, 1, length, channels])
    fn = {'avg': tf.nn.avg_pool, 'max': tf.nn.max_pool}[type]
    pooled =  fn(inputs, ksize=[1, 1, ksize, 1], strides=[1, 1, stride, 1], padding='SAME')
    return tf.reshape(pooled, [-1, length / stride, channels])

def flatten_tensor(t):
    shape = [s.value for s in t.get_shape()]
    flat_size = 1
    for x in shape[1:]:
        flat_size *= x
    return tf.reshape(t, [-1, flat_size])

In [8]:
review = tf.placeholder_with_default(chars_batch, [None, REVIEW_LEN], name='review')

def conv_model(review):
    one_hot = tf.one_hot(review, 255, dtype=tf.float32)
    conv0 = text_conv(one_hot, 84, patch_size=1)
    conv1 = text_conv(conv0, 128, patch_size=3)
    conv2 = text_conv(conv1, 64, patch_size=5)
    conv2_dropped = create_dropout(conv2)
    pool3 = text_pool(conv2_dropped, ksize=4, stride=4) # [?, 250, 64]
    conv4 = text_conv(pool3, 32, patch_size=5)
    pool5 = text_pool(conv4, ksize=10, stride=10) # [?, 25, 32]
    pool5_flat = tf.reshape(pool5, [-1, pool5.get_shape()[-2].value * pool5.get_shape()[-1].value])
    fc6 = create_fc(pool5_flat, 64)
    fc7 = create_fc(fc6, 1, relu=False)
    return fc7

def fc_model(review):
    one_hot = tf.one_hot(review, 255, dtype=tf.float32)
    conv1 = text_conv(one_hot, 4)
    conv1_flat = tf.reshape(conv1, [-1, REVIEW_LEN * 4])
    fc2 = create_fc(conv1_flat, 32)
    return create_fc(fc2, 1, relu=False)

def small_conv_1(review):
    one_hot = tf.one_hot(review, 255, dtype=tf.float32)
    conv0 = text_conv(one_hot, 64, patch_size=1)
    conv1 = text_conv(conv0, 16, patch_size=9)
    conv1 = create_dropout(conv1)
    conv2 = text_conv(conv1, 4, patch_size=9)
    pool3 = text_pool(conv2, ksize=100, stride=100) # 10x4
    fc3 = create_fc(flatten_tensor(conv2), 8)
    return create_fc(flatten_tensor(fc3), 1, relu=False)

def conv_model_2(review):
    one_hot = tf.one_hot(review, 255, dtype=tf.float32)
    conv0 = text_conv(one_hot, 64, patch_size=1) # 1000x64 -- embed letters into 64d
    conv1 = text_conv(conv0, 256, patch_size=7) # 1000x256 -- identify 256 words ~7 chars long
    conv1 = create_dropout(conv1)
    pool2 = text_pool(conv1, ksize=5, stride=5, type='max') # 200x256 -- reduce
    conv3 = text_conv(pool2, 32, patch_size=5) # 200x128 -- identify positive and negative word sequences
    pool4 = text_pool(conv3, ksize=50, stride=50, type='max') # 4x32 -- reduce
    flat5 = flatten_tensor(pool4)
    fc6 = create_fc(flat5, 16)
    return create_fc(fc6, 1, relu=False)

predicted_stars = conv_model_2(review)

In [9]:
star_rating = tf.placeholder_with_default(stars_batch, [None], name='stars')
loss = tf.reduce_sum(tf.nn.l2_loss(tf.cast(star_rating, tf.float32) - predicted_stars))

# compute accuracy:
which_correct = tf.equal(tf.cast(star_rating, tf.float32), tf.round(predicted_stars))
accuracy = tf.reduce_mean(tf.cast(which_correct, tf.float32))

In [10]:
learn_rate = tf.placeholder_with_default(tf.constant(0.01), [], name='learn_rate')
opt = tf.train.AdamOptimizer(learn_rate)
learn_step = opt.minimize(loss, global_step=global_step)

In [11]:
if hasattr(tf, 'global_variables_initializer'):
    session.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
else:
    session.run(tf.initialize_all_variables())

In [12]:
while True:
    feed_dict = {
        dropout_keep_prob: 0.66,
        learn_rate: 0.01
    }
    _, step_, loss_, acc_, stars_ = session.run([learn_step, global_step, loss, accuracy, predicted_stars], feed_dict=feed_dict)
    if step_ % 5 == 1:
        print "step: {}, loss: {}, accuracy: {}".format(step_, loss_, acc_)
        # print stars_
    if step_ % 5 == 1 and False:
        # evaluate:
        accuracies = []
        for i in xrange(TEST_BATCHES):
            chars_in, stars_in = session.run([chars_batch_test, stars_batch_test])
            feed = {
                review: chars_in,
                star_rating: stars_in
            }
            acc_ = session.run([accuracy], feed_dict=feed)
            accuracies.append(acc_)
            print acc_, i, TEST_BATCHES
        print 'Test accuracy:', sum(accuracies) * 1.0 / len(accuracies)

step: 1, loss: 5924.87792969, accuracy: 0.0
step: 6, loss: 1807.52001953, accuracy: 0.0625
step: 11, loss: 890.352600098, accuracy: 0.375
step: 16, loss: 682.052062988, accuracy: 0.375
step: 21, loss: 1046.1640625, accuracy: 0.09375
step: 26, loss: 1295.3092041, accuracy: 0.25
step: 31, loss: 962.23828125, accuracy: 0.3125
step: 36, loss: 998.872558594, accuracy: 0.15625
step: 41, loss: 648.694702148, accuracy: 0.24609375
step: 46, loss: 833.254882812, accuracy: 0.1875
step: 51, loss: 1847.66589355, accuracy: 0.283203125


KeyboardInterrupt: 