In [1]:
import numpy as np
import pandas as pd
from pandas import datetime
import math
from tensorflow.contrib.rnn import LSTMCell, DropoutWrapper, MultiRNNCell
from tensorflow.contrib.layers import fully_connected
import tensorflow as tf
import matplotlib.pyplot as plt
import gc
import json
%matplotlib inline

In [2]:
data = pd.read_csv("PRSA_data_2010.1.1-2014.12.31.csv")
data.index = data.apply(lambda x:datetime(x.year, x.month, x.day, x.hour), axis=1)

data.drop(labels=["year", "month", "day", "hour"], axis=1, inplace=True)
data.rename(columns={"pm2.5":"pm25"}, inplace=True)
data = data[pd.isnull(data["pm25"]) == False]
data = pd.concat([data, pd.get_dummies(data.cbwd)], axis=1)
data.drop(labels=["cbwd", "No"], axis=1, inplace=True)
ts = pd.date_range(start=data.index.min(), end=data.index.max(), freq="H")
data = data.reindex(index=ts, method="pad")


data["pm25_next"] = data.pm25.shift(-1)
data = data[pd.isnull(data.pm25_next)==False]


In [3]:
Y = data.pm25_next.values
X = data.drop(labels="pm25_next", axis=1).values
del data
gc.collect()

In [4]:
def create_set_with_timestep(X, Y, time_steps):
    X_with_t = np.zeros((X.shape[0] - time_steps, time_steps, X.shape[1]))
    Y_with_t = np.zeros(((Y.shape[0] - time_steps),1))
    # sliding window with time steps width of for the sample
    # in this case the y would be the last y of the sample (because we shifted that earlier)
    
    for i in range(X_with_t.shape[0]):
        to_i = i + time_steps
        if to_i >= X.shape[0]:
            break;
        X_with_t[i,...] = np.vstack(X[i:to_i,:])
        Y_with_t[i,...] = Y[to_i-1]
    return (X_with_t, Y_with_t)
    
    

# Parameters

In [5]:
def build_and_train(time_steps, num_units, keep_prob, do_summaries, batch_size, no_of_epochs):
    global X, Y

    #break down data for train and test
    #Here we take into consideration the time_steps
    size_of_test = X.shape[0]//10
    size_of_train = X.shape[0] - X.shape[0]//10
    size_of_test = size_of_test + size_of_train%time_steps -1
    size_of_train = X.shape[0] - size_of_test
    cut_off = size_of_train - 1

    X_train, Y_train = X[0:cut_off,:], Y[0:cut_off]
    X_test, Y_test = X[cut_off:,:], Y[cut_off::]

    #Get the min and max for all data
    X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)

    #Standardize data
    X_train = (X_train-X_min)/(X_max-X_min)
    X_test = (X_test-X_min)/(X_max-X_min)

    assert(np.all(np.max(X_train, axis=0) == 1.0))
    assert(np.all(np.min(X_train, axis=0) == 0.0))

    X_train_v = X_train[0:50, :]
    Y_train_v = Y_train[0:50]
    X_train_ut, Y_train_ut = create_set_with_timestep(X_train_v, Y_train_v, time_steps)

    assert(np.all(X_train_ut[25,0,:] == X_train_v[25,:]))
    assert(Y_train_ut[24] == Y_train_v[24+time_steps-1])

    del X_train_ut, X_train_v
    gc.collect()

    X_train, Y_train = create_set_with_timestep(X_train, Y_train, time_steps)
    X_test, Y_test = create_set_with_timestep(X_test, Y_test, time_steps)

    tf.reset_default_graph()
    graph = tf.Graph()
    X_shape = X_train.shape
    Y_shape = Y_train.shape


    with graph.as_default():


        Xt = tf.placeholder(shape=[None, X_shape[1], X_shape[2]], dtype=tf.float32)
        Yt = tf.placeholder(shape=[None, 1], dtype=tf.float32)
        b_size = tf.shape(Xt)[0]
        keep_prob_t = tf.placeholder(tf.float32)
        #I do a basic now and look at the graph
        lstm_cells = list()
        for u in range(len(num_units)):
            lstm_cells.append(
                DropoutWrapper(LSTMCell(num_units=num_units[u], state_is_tuple=True), 
                               input_keep_prob=keep_prob_t,
                               output_keep_prob=keep_prob_t))

        lstm_cells = MultiRNNCell(lstm_cells, state_is_tuple=True)
        initial_states = lstm_cells.zero_state(b_size, dtype=tf.float32)
        outputs, states = tf.nn.dynamic_rnn(lstm_cells, inputs=Xt, initial_state=initial_states)


        fc_final = fully_connected(inputs=outputs[:,-1,:], activation_fn=None, num_outputs=1)
        loss = tf.losses.mean_squared_error(labels=Yt, predictions=fc_final)
        loss_rmse = tf.sqrt(loss)

        optimizer = tf.train.AdamOptimizer()
        gradients = optimizer.compute_gradients(loss)


        optimize = optimizer.apply_gradients(gradients)
        if do_summaries:
            tf.summary.scalar("loss_rmse", tf.sqrt(loss))
            merge_summaries = tf.summary.merge_all()
        init = tf.global_variables_initializer()


    with tf.Session(graph=graph) as sess:

        no_of_iterations = X_train.shape[0]//batch_size + 1
        report_cycle = 100
        if do_summaries:
            summarize_train = tf.summary.FileWriter(logdir="log//train", graph=graph)
            summarize_test = tf.summary.FileWriter(logdir="log//test", graph=graph)
        sess.run(init)
        counter = 1
        loss_list = dict()
        
        for epoch in range(no_of_epochs):
            for n in range(no_of_iterations):
                train_idxs = np.random.choice(np.arange(X_train.shape[0]), batch_size)

                if do_summaries: 
                    _, loss_val, train_summary  = sess.run([optimize, loss_rmse, merge_summaries],
                                                           feed_dict={Xt:X_train[train_idxs,...], 
                                                           Yt:Y_train[train_idxs,...],
                                                           keep_prob_t:keep_prob})
                else:
                    _, loss_val = sess.run([optimize, loss_rmse], feed_dict={Xt:X_train[train_idxs,...], 
                                                             Yt:Y_train[train_idxs,...],
                                                             keep_prob_t:keep_prob})
                

                if n%report_cycle==0:
                    if do_summaries:
                        summarize_train.add_summary(train_summary, counter)
                        loss_test_val, test_summary = sess.run([loss_rmse, merge_summaries], 
                                                               feed_dict={Xt:X_test[0:50,...], 
                                                               Yt:Y_test[0:50,...],
                                                               keep_prob_t:1.0})
                        summarize_test.add_summary(test_summary, counter)

                    else:
                        loss_test_val = sess.run(loss_rmse, feed_dict={Xt:X_test[0:50,...], 
                                                         Yt:Y_test[0:50,...],
                                                         keep_prob_t:1.0})
                    loss_list[counter] = {"loss_train":loss_val, "loss_test":loss_test_val}
                    counter += 1
                    print("\repoch={:<10d}--Iterations={:<10d}--loss train rmse={:<.3f}--loss test rmse={:<.3f}".\
                          format(epoch, n, math.sqrt(loss_val), math.sqrt(loss_test_val)), end =" ")
    return(loss_list)
            



In [None]:



#Hyper Parameters
time_steps_par = [10, 16, 22, 30]
num_units_par = [[32, 32], [64, 64], [32], [64], [128]]
keep_prob_par = [np.random.randn()*0.17+0.6 for _ in range(3)]

par_combinations = [(t, n, p) for t in time_steps_par for n in num_units_par for p in keep_prob_par]


results = dict()
for i, (time_steps, num_units, keep_prob) in enumerate(par_combinations):
  
    print("Doing {}".format((time_steps, tuple(num_units), keep_prob)))
    loss_list = build_and_train(time_steps, num_units, keep_prob, False, 50, 100)
    results[i] = {"time_steps":time_steps, "layers":num_units, "keep_prob":keep_prob}
with open("log_results//results_{}.json".format(i), "w") as op:
    json.dump(results, op)
    

Doing (10, (32, 32), 0.6955630620490267)
epoch=0         --Iterations=600       --loss train rmse=10.629--loss test rmse=13.576 