In [1]:
#
# Predicting if a user write at least a review from a session
# use LSTM
#

# import modules
from pymongo import MongoClient # use mongodb

import numpy as np # use numpay
from numpy import genfromtxt

import tensorflow as tf
from tensorflow.contrib import rnn

import matplotlib.pyplot as plt

import time
from collections import namedtuple

In [3]:
# read data from csv file
# 24 actions for 1 session. Each actions have unique indexes.
# like 'notifications-view' -> 23
my_data_1 = genfromtxt('session_data.csv', delimiter=',')

In [4]:
# shuffle data
np.random.shuffle(my_data_1)

# trim data to match batch size * batch count because somehow tf show erros without this...
my_data_2 = my_data_1[:7692]

In [5]:
# check data
my_data_2[2]

array([  1.,   2.,   3.,  78.,  78.,  78.,  78.,  78.,  78.,  78.,  78.,
        78.,  78.,  78.,  78.,  78.,  78.,  78.,  78.,  78.,  78.,  78.,
        78.,  78.,   0.])

In [6]:
# convert list to numpy array and divide into train and valid data
# since we don't have many data, I don't prepare test set which we can test real future data
x_list = np.array(my_data_2[:,:24])
y_list = np.array(my_data_2[:,24])

train_x = x_list[:7200]
train_y = y_list[:7200]
valid_x = x_list[:-7200]
valid_y = y_list[:-7200]

In [7]:
# helper function to get batch 
def get_batch(x, y, batch_idx, batch_size): 
    
    rtn_x = x[batch_idx*batch_size:(batch_idx+1)*batch_size]
    rtn_y = y[batch_idx*batch_size:(batch_idx+1)*batch_size]
    rtn_y = rtn_y.reshape((rtn_y.shape[0],1))
    
    return rtn_x, rtn_y


In [8]:
# set hyper parameters 
# since I refer multiple examples and convined them. 

# reset graph
tf.reset_default_graph()

# Parameters
learning_rate = 0.001
# training_iters = 100
training_iters = 360 # 360 was just good enough
# batch_size = 128
batch_size = 12
display_step = 10

num_actions = 78

# Network Parameters
num_input = 1
num_steps = 24
# n_hidden = 32 # hidden layer num of features
lstm_size = 32

n_classes = 2 # total classes (yes or no)
num_classes = 2

# I have no idea. I just copied example's setting
num_layers = 2

# tf Graph input

# x
inputs = tf.placeholder(tf.int32, [None, num_steps], name='inputs')

# y
targets = tf.placeholder(tf.int32, [None, 1], name='targets')

In [9]:
# define model

# Keep probability placeholder for drop out layers
keep_prob = tf.placeholder(tf.float32, name='keep_prob')

# One-hot encoding the input and target characters
x_one_hot = tf.one_hot(inputs, num_actions)
y_one_hot = tf.one_hot(targets, num_classes)

# Use a basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)

# Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)
initial_state = cell.zero_state(batch_size, tf.float32)

### Run the data through the RNN layers
# This makes a list where each element is on step in the sequence
rnn_inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(x_one_hot, num_steps, 1)]

# Run each sequence step through the RNN and collect the outputs
outputs, state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=initial_state)
final_state = state
output_last = outputs[-1]

# Reshape output so it's a bunch of rows, one output row for each step for each batch
# seq_output = tf.concat(outputs, axis=1)
# output = tf.reshape(seq_output, [-1, lstm_size])

# Now connect the RNN putputs to a softmax layer
with tf.variable_scope('softmax'):
    softmax_w = tf.Variable(tf.truncated_normal((lstm_size, num_classes), stddev=0.1))
    softmax_b = tf.Variable(tf.zeros(num_classes))

# Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
# of rows of logit outputs, one for each step and batch
# logits = tf.matmul(output, softmax_w) + softmax_b
# we care only the last

# calculate logits from output_last and weight and bias
logits = tf.matmul(output_last, softmax_w) + softmax_b

# Use softmax to get the probabilities for predicted characters
preds = tf.nn.softmax(logits, name='predictions')

# Reshape the targets to match the logits
y_reshaped = tf.reshape(y_one_hot, [-1, num_classes])
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
cost = tf.reduce_mean(loss)

# Optimizer for training, using gradient clipping to control exploding gradients
# I don't know we still needs settings below...

# unkonwn settings
grad_clip = 5

tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))

# Export the nodes
# NOTE: I'm using a namedtuple here because I think they are cool
export_nodes = ['inputs', 'targets', 'initial_state', 'final_state',
                'keep_prob', 'cost', 'preds', 'optimizer']
Graph = namedtuple('Graph', export_nodes)
local_dict = locals()
graph = Graph(*[local_dict[each] for each in export_nodes])

In [10]:
# accuracy
correct_pred = tf.equal(tf.argmax(preds,1), tf.argmax(tf.squeeze(y_one_hot),1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [11]:
# Launch the graph

# Save check points
save_file = './favorr_lstm_02.ckpt'
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    step = 1
    # Keep training until reach max iterations
    # while step * batch_size < training_iters:
    # training_iters = len(train_x) // batch_size
    # print(training_iters)
    while step < training_iters:
        # batch_x, batch_y = mnist.train.next_batch(batch_size)
        
        # total loss
        loss_total = 0
        
        batch_count = len(train_x) // batch_size
        for batch_idx in range(batch_count):
            batch_x, batch_y = get_batch(train_x, train_y, batch_idx, batch_size)
            sess.run(optimizer, feed_dict={inputs: batch_x, targets: batch_y, keep_prob:0.7})
            
            if batch_idx == 0 and step == 1:
                print("batch_x.shape:{}".format(batch_x.shape))
                print("batch_y.shape:{}".format(batch_y.shape))
        
            # calculate loss
            loss = sess.run(cost, feed_dict={inputs: batch_x, targets: batch_y, keep_prob:1.0})
            loss_total = loss_total + loss
        
        
        if step % display_step == 0:
            
            test_loss = loss_total / batch_count
            
            # Calculate validation accuracy
            valid_batch_count = len(valid_x) // batch_size
            valid_acc_total = 0
            for batch_idx in range(valid_batch_count):
                vaild_batch_x, valid_batch_y = get_batch(valid_x, valid_y, batch_idx, batch_size)
                acc = sess.run(accuracy, feed_dict={inputs: vaild_batch_x, targets: valid_batch_y, keep_prob:1.0})
                valid_acc_total = valid_acc_total + acc
            
            valid_acc = valid_acc_total / valid_batch_count
            print("step:{}, test loss:{}, valid accuracy:{}".format(step, test_loss, valid_acc))
            

        step += 1
    
    #
    # finally Save the model
    saver.save(sess, save_file)
    print ("Optimization Finished!")


batch_x.shape:(12, 24)
batch_y.shape:(12, 1)
step:10, test loss:0.24712484487642844, valid accuracy:0.9065040684327846
step:20, test loss:0.22174261691824843, valid accuracy:0.9085365897271691
step:30, test loss:0.2040601518532882, valid accuracy:0.918699191837776
step:40, test loss:0.18529470891381303, valid accuracy:0.9207317116783886
step:50, test loss:0.17253714457464714, valid accuracy:0.9207317073170732
step:60, test loss:0.16088125333442196, valid accuracy:0.9227642286114577
step:70, test loss:0.15357467344942657, valid accuracy:0.9207317073170732
step:80, test loss:0.14222889218566706, valid accuracy:0.9308943108814519
step:90, test loss:0.13677117583807558, valid accuracy:0.9288617939483829
step:100, test loss:0.12550366749802683, valid accuracy:0.9451219541270558
step:110, test loss:0.12020267584836498, valid accuracy:0.9471544768752121
step:120, test loss:0.11643759689487827, valid accuracy:0.9491869967158247
step:130, test loss:0.10923397427531502, valid accuracy:0.94715447

KeyboardInterrupt: 