In [1]:
import numpy as np
import pandas as pd
from pandas import datetime
import math
from tensorflow.contrib.rnn import LSTMCell, DropoutWrapper, MultiRNNCell
from tensorflow.contrib.layers import fully_connected
import tensorflow as tf
import matplotlib.pyplot as plt
import gc
%matplotlib inline

In [2]:
data = pd.read_csv("PRSA_data_2010.1.1-2014.12.31.csv")
data.index = data.apply(lambda x:datetime(x.year, x.month, x.day, x.hour), axis=1)

data.drop(labels=["year", "month", "day", "hour"], axis=1, inplace=True)
data.rename(columns={"pm2.5":"pm25"}, inplace=True)
data = data[pd.isnull(data["pm25"]) == False]
data = pd.concat([data, pd.get_dummies(data.cbwd)], axis=1)
data.drop(labels=["cbwd", "No"], axis=1, inplace=True)
ts = pd.date_range(start=data.index.min(), end=data.index.max(), freq="H")
data = data.reindex(index=ts, method="pad")


data["pm25_next"] = data.pm25.shift(-1)
data = data[pd.isnull(data.pm25_next)==False]


In [3]:
data.head()


Unnamed: 0,pm25,DEWP,TEMP,PRES,Iws,Is,Ir,NE,NW,SE,cv,pm25_next
2010-01-02 00:00:00,129.0,-16,-4.0,1020.0,1.79,0,0,0,0,1,0,148.0
2010-01-02 01:00:00,148.0,-15,-4.0,1020.0,2.68,0,0,0,0,1,0,159.0
2010-01-02 02:00:00,159.0,-11,-5.0,1021.0,3.57,0,0,0,0,1,0,181.0
2010-01-02 03:00:00,181.0,-7,-5.0,1022.0,5.36,1,0,0,0,1,0,138.0
2010-01-02 04:00:00,138.0,-7,-5.0,1022.0,6.25,2,0,0,0,1,0,109.0


In [4]:
Y = data.pm25_next.values
X = data.drop(labels="pm25_next", axis=1).values
del data
gc.collect()

329

In [5]:
time_steps = 12

#break down data for train and test
#Here we take into consideration the time_steps
size_of_test = X.shape[0]//10
size_of_train = X.shape[0] - X.shape[0]//10
size_of_test = size_of_test + size_of_train%time_steps -1
size_of_train = X.shape[0] - size_of_test
cut_off = size_of_train - 1

X_train, Y_train = X[0:cut_off,:], Y[0:cut_off]
X_test, Y_test = X[cut_off:,:], Y[cut_off::]

#Get the min and max for all data
X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)

#Standardize data
X_train = (X_train-X_min)/(X_max-X_min)
X_test = (X_test-X_min)/(X_max-X_min)

assert(np.all(np.max(X_train, axis=0) == 1.0))
assert(np.all(np.min(X_train, axis=0) == 0.0))

print("X_train shape: {} ----- Y_train shape: {}-------".format(X_train.shape, Y_train.shape))

X_train shape: (39420, 11) ----- Y_train shape: (39420,)-------


In [6]:
def create_set_with_timestep(X, Y, time_steps):
    X_with_t = np.zeros((X.shape[0] - time_steps, time_steps, X.shape[1]))
    Y_with_t = np.zeros(((Y.shape[0] - time_steps),1))
    # sliding window with time steps width of for the sample
    # in this case the y would be the last y of the sample (because we shifted that earlier)
    
    for i in range(X_with_t.shape[0]):
        to_i = i + time_steps
        if to_i >= X.shape[0]:
            break;
        X_with_t[i,...] = np.vstack(X[i:to_i,:])
        Y_with_t[i,...] = Y[to_i-1]
    return (X_with_t, Y_with_t)
    
    

In [7]:
X_train_v = X_train[0:50, :]
Y_train_v = Y_train[0:50]
X_train_ut, Y_train_ut = create_set_with_timestep(X_train_v, Y_train_v, time_steps)

assert(np.all(X_train_ut[25,0,:] == X_train_v[25,:]))
assert(Y_train_ut[24] == Y_train_v[24+time_steps-1])

del X_train_ut, X_train_v
gc.collect()

308

In [8]:
X_train, Y_train = create_set_with_timestep(X_train, Y_train, time_steps)
X_test, Y_test = create_set_with_timestep(X_test, Y_test, time_steps)

print("X_train shape {} --- Y_train shape {}\nX_test shape {} --- Y_test shape {}".
      format(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))

X_train shape (39408, 12, 11) --- Y_train shape (39408, 1)
X_test shape (4367, 12, 11) --- Y_test shape (4367, 1)


## Development in Tensorflow

In [10]:
tf.reset_default_graph()
graph = tf.Graph()
X_shape = X_train.shape
Y_shape = Y_train.shape
batch_size = 50
num_units = [128, 64]
with graph.as_default():
    
    X = tf.placeholder(shape=[batch_size, X_shape[1], X_shape[2]], dtype=tf.float32)
    Y = tf.placeholder(shape=[batch_size, 1], dtype=tf.float32)
    keep_prob = tf.placeholder(tf.float32)
    #I do a basic now and look at the graph
    lstm_cells = list()
    for u in range(len(num_units)):
        lstm_cells.append(
            DropoutWrapper(LSTMCell(num_units=num_units[u], state_is_tuple=True), 
                           input_keep_prob=keep_prob,
                           output_keep_prob=keep_prob))
    
    lstm_cells = MultiRNNCell(lstm_cells, state_is_tuple=True)
    initial_states = lstm_cells.zero_state(batch_size, dtype=tf.float32)
    outputs, states = tf.nn.dynamic_rnn(lstm_cells, inputs=X, initial_state=initial_states)

    
    fc_final = fully_connected(inputs=outputs[:,-1,:], activation_fn=None, num_outputs=1)
    loss = tf.losses.mean_squared_error(labels=Y, predictions=fc_final)
    
    optimizer = tf.train.AdamOptimizer()
    gradients = optimizer.compute_gradients(loss)
    for i, gv in enumerate(gradients):
        tf.summary.histogram("{}".format(gv[0].name), gv[0])
    #gradients = [(tf.clip_by_value(g, tf.constant(-2000.0), tf.constant(100.0)), v) \
    #                     for g, v in gradients]
    
    optimize = optimizer.apply_gradients(gradients)
    tf.summary.scalar("loss_rmse", tf.sqrt(loss))
    merge_summaries = tf.summary.merge_all()
    init = tf.global_variables_initializer()

INFO:tensorflow:Summary name gradients/rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/lstm_cell/MatMul/Enter_grad/b_acc_3:0 is illegal; using gradients/rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/lstm_cell/MatMul/Enter_grad/b_acc_3_0 instead.
INFO:tensorflow:Summary name gradients/rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/lstm_cell/BiasAdd/Enter_grad/b_acc_3:0 is illegal; using gradients/rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/lstm_cell/BiasAdd/Enter_grad/b_acc_3_0 instead.
INFO:tensorflow:Summary name gradients/rnn/while/rnn/multi_rnn_cell/cell_1/cell_1/lstm_cell/MatMul/Enter_grad/b_acc_3:0 is illegal; using gradients/rnn/while/rnn/multi_rnn_cell/cell_1/cell_1/lstm_cell/MatMul/Enter_grad/b_acc_3_0 instead.
INFO:tensorflow:Summary name gradients/rnn/while/rnn/multi_rnn_cell/cell_1/cell_1/lstm_cell/BiasAdd/Enter_grad/b_acc_3:0 is illegal; using gradients/rnn/while/rnn/multi_rnn_cell/cell_1/cell_1/lstm_cell/BiasAdd/Enter_grad/b_acc_3_0 instead.
INFO:tensorflow:Summary name gradients/fully

In [None]:
with tf.Session(graph=graph) as sess:
    no_of_epochs = 100000
    no_of_iterations = X_train.shape[0]//batch_size + 1
    report_cycle = 100
    summarize_train = tf.summary.FileWriter(logdir="log//train", graph=graph)
    summarize_test = tf.summary.FileWriter(logdir="log//test", graph=graph)
    sess.run(init)
    counter = 1
    for epoch in range(no_of_epochs):
        for n in range(no_of_iterations):
            train_idxs = np.random.choice(np.arange(X_train.shape[0]), batch_size)
            _, loss_val, train_summary  = sess.run([optimize, loss, merge_summaries], 
                                                   feed_dict={X:X_train[train_idxs,...], 
                                                              Y:Y_train[train_idxs,...],
                                                            keep_prob:0.6})
            if n%report_cycle==0:
                summarize_train.add_summary(train_summary, counter)
                loss_test_val, test_summary = sess.run([loss, merge_summaries], 
                                                   feed_dict={X:X_test[0:50,...], 
                                                              Y:Y_test[0:50,...],
                                                              keep_prob:1.0})
                summarize_test.add_summary(test_summary, counter)
                counter += 1
                print("\repoch={:<10d}--Iterations={:<10d}--loss train rmse={:<.3f}--loss test rmse={:<.3f}".\
                      format(epoch, n, math.sqrt(loss_val), math.sqrt(loss_test_val)), end =" ")
        
    

epoch=138       --Iterations=600       --loss train rmse=21.650--loss test rmse=84.652 