In [1]:
from __future__ import print_function, division
import numpy as np
import pandas as pd
import tensorflow as tf

epochs = 80
sequenceLength = 15
stateSize = 12
batchSize = 10

In [23]:
def getData():
    return pd.read_csv("/home/tbrownex/data/test/seasonal.csv")

In [28]:
def checkEvenBatches(df):
    '''
    Number of batches * sequence Length should be a multiple of the row count.
    If not, get rid on any extras
    '''    
    extraRows = df.shape[0]%(batchSize*sequenceLength)
    print("before: ", df.shape[0])
    print(extraRows)
    if extraRows > 0:
        extraRows = int(extraRows)
        print("{} rows removed to even the batches".format(extraRows))
        df = df[:-extraRows]
    print("after: ", df.shape[0])
    return df

In [29]:
def formatData(df):
    '''
    Ignore "date" since this is univariate analysis
    Both X and Y are based on X: Y is X shifted back "sequenceLength".
    Y is shifted in order to associate training data and labels:
       Y being shifted to the left represents the Future from the perspective of X
    That "future" serves as the Label(s)
    '''
    x = df["value"]
    y = x[sequenceLength:].reset_index(drop=True)
    x = x[:-sequenceLength]        # Make x and y the same length
    
    x = np.array(x)
    y = np.array(y)
    x = np.reshape(x, newshape=[batchSize, -1])
    y = np.reshape(y, newshape=[batchSize, -1])
    return x, y

In [30]:
def prepData(df):
    '''
    Make sure the batches are uniform sized
    Format X and Y
    '''
    df = checkEvenBatches(df)
    x,y = formatData(df)
    return x,y

In [31]:
df = getData()
x,y = prepData(df)
after = x.shape[0] * x.shape[1]

before:  1000
100
100 rows removed to even the batches
after:  900


ValueError: cannot reshape array of size 885 into shape (10,newaxis)

In [None]:
dataWidth= x.shape[1]
numBatches = dataWidth//sequenceLength
print("{} batches of width {}".format(numBatches, sequenceLength))

In [None]:
batchX_placeholder = tf.placeholder(tf.float32, [batchSize, sequenceLength], name="miniX")
batchY_placeholder = tf.placeholder(tf.float32, [batchSize, sequenceLength], name="miniY")

inputs_series = tf.split(batchX_placeholder, num_or_size_splits=sequenceLength, axis=1, name="inputs")
labels_series = tf.split(batchY_placeholder, num_or_size_splits=sequenceLength, axis=1, name="labels")

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for b in range(numBatches):
        start = b * sequenceLength
        stop  = start + sequenceLength
        miniX = x[:,start:stop]
        miniY = y[:,start:stop]

        inputs, labels = sess.run([inputs_series, labels_series],
                                  feed_dict={
                                      batchX_placeholder: miniX,
                                      batchY_placeholder: miniY})
        print(b)
        break

In [None]:
cellState = tf.placeholder(tf.float32, [batchSize, stateSize], name="cell")
hiddenState = tf.placeholder(tf.float32, [batchSize, stateSize], name="hidden")
init_state = tf.nn.rnn_cell.LSTMStateTuple(cellState, hiddenState)

W2 = tf.Variable(np.random.rand(stateSize,1),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1)), dtype=tf.float32)

# Forward passes
#cell = tf.nn.rnn_cell.BasicLSTMCell(stateSize, state_is_tuple=True)
cell = tf.nn.rnn_cell.LSTMCell(num_units=stateSize, state_is_tuple=True)
states_series, current_state = tf.nn.static_rnn(cell, inputs_series, init_state)

predictions = [tf.matmul(state, W2) + b2 for state in states_series]
loss = tf.losses.mean_squared_error(predictions, labels_series)
training = tf.train.AdagradOptimizer(0.3).minimize(loss)

In [None]:
print("{:<8}{}".format("Epoch", "MSE"))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        _current_cell_state = np.zeros((batchSize, stateSize))
        _current_hidden_state = np.zeros((batchSize, stateSize))
        predList = []      # This is to see what the predictions were for each mini batch
        for b in range(numBatches):
            start = b * sequenceLength
            stop  = start + sequenceLength
            miniX = x[:,start:stop]
            miniY = y[:,start:stop]

            _, _current_state, p = sess.run([training, current_state, predictions],
                                         feed_dict={
                                             batchX_placeholder: miniX,
                                             batchY_placeholder: miniY,
                                             cellState: _current_cell_state,
                                             hiddenState: _current_hidden_state})

            _current_cell_state, _current_hidden_state = _current_state
            if b == 0:
                predList.append(p)
            
        err = loss.eval(feed_dict={
            batchX_placeholder: miniX,
            batchY_placeholder: miniY,
            cellState: _current_cell_state,
            hiddenState: _current_hidden_state})
        print("{:<8}{:,.0f}".format(epoch, err))

In [None]:
predList[11][0].shape