In [1]:
import numpy as np
import pandas as pd
from pandas import datetime
import math
from tensorflow.contrib.rnn import LSTMCell, DropoutWrapper, MultiRNNCell
from tensorflow.contrib.layers import fully_connected
import tensorflow as tf
import matplotlib.pyplot as plt
import gc
import json
%matplotlib inline

In [31]:
data = pd.read_csv("PRSA_data_2010.1.1-2014.12.31.csv")
data.index = data.apply(lambda x:datetime(x.year, x.month, x.day, x.hour), axis=1)

data.drop(labels=["year", "month", "day", "hour"], axis=1, inplace=True)
data.rename(columns={"pm2.5":"pm25"}, inplace=True)
data = data[pd.isnull(data["pm25"]) == False]
data = pd.concat([data, pd.get_dummies(data.cbwd)], axis=1)
data.drop(labels=["cbwd", "No"], axis=1, inplace=True)
ts = pd.date_range(start=data.index.min(), end=data.index.max(), freq="H")
data = data.reindex(index=ts, method="pad")
data["pm25_next"] = data.pm25.shift(-1)
data = data[pd.isnull(data.pm25_next)==False]


In [32]:
Y = data.pm25_next.values
X = data.drop(labels="pm25_next", axis=1).values
del data
gc.collect()

61405

In [33]:
X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)

In [35]:
X_min, X_max

(array([  0.00000000e+00,  -4.00000000e+01,  -1.90000000e+01,
          9.91000000e+02,   4.50000000e-01,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00]),
 array([  9.94000000e+02,   2.80000000e+01,   4.20000000e+01,
          1.04600000e+03,   5.65490000e+02,   2.70000000e+01,
          3.60000000e+01,   1.00000000e+00,   1.00000000e+00,
          1.00000000e+00,   1.00000000e+00]))

In [None]:
 global X, Y

#break down data for train and test
#Here we take into consideration the time_steps
size_of_test = X.shape[0]//10
size_of_train = X.shape[0] - X.shape[0]//10
size_of_test = size_of_test + size_of_train%time_steps -1
size_of_train = X.shape[0] - size_of_test
cut_off = size_of_train - 1

X_train, Y_train = X[0:cut_off,:], Y[0:cut_off]
X_test, Y_test = X[cut_off:,:], Y[cut_off::]

#Get the min and max for all data
X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)

#Standardize data
X_train = (X_train-X_min)/(X_max-X_min)
X_test = (X_test-X_min)/(X_max-X_min)

assert(np.all(np.max(X_train, axis=0) == 1.0))
assert(np.all(np.min(X_train, axis=0) == 0.0))

X_train_v = X_train[0:50, :]
Y_train_v = Y_train[0:50]
X_train_ut, Y_train_ut = create_set_with_timestep(X_train_v, Y_train_v, time_steps)

#assert(np.all(X_train_ut[25,0,:] == X_train_v[25,:]))
#assert(Y_train_ut[24] == Y_train_v[24+time_steps-1])

del X_train_ut, X_train_v
gc.collect()

X_train, Y_train = create_set_with_timestep(X_train, Y_train, time_steps)
X_test, Y_test = create_set_with_timestep(X_test, Y_test, time_steps)


In [29]:
tf.reset_default_graph()

with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph(meta_graph_or_file="model/lstm-150.meta")
    new_saver.restore(sess, "model/lstm-150")
    l = sess.graph.get_collection("trainable_variables")
    print(l[0].eval())

INFO:tensorflow:Restoring parameters from model/lstm-150
[[-0.16452482  0.42895725 -0.07317236 ...,  1.32553053 -0.58596361
  -0.10303319]
 [-0.41919723  0.09274144  0.14675307 ..., -0.23298113  0.03309511
  -0.19529048]
 [-0.3629404  -0.27416244  0.22508462 ..., -0.44087529 -0.26117423
   0.05187651]
 ..., 
 [ 0.38898617 -0.18325227 -0.78841394 ..., -0.18772124  0.1327482
   0.20491971]
 [ 0.42833292 -0.01359674  0.85255647 ..., -0.21459951  0.36771637
  -0.25170344]
 [ 0.08536842  0.27144602 -0.65217268 ...,  0.12993701  0.59898394
  -0.04898318]]


In [36]:
class PRSA_lstm:
    
    def __init__(self):
        
        self.X_min, self.X_max = (array([  0.00000000e+00,  -4.00000000e+01,  -1.90000000e+01,
                              9.91000000e+02,   4.50000000e-01,   0.00000000e+00,
                              0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
                              0.00000000e+00,   0.00000000e+00]),
                             array([  9.94000000e+02,   2.80000000e+01,   4.20000000e+01,
                                      1.04600000e+03,   5.65490000e+02,   2.70000000e+01,
                                      3.60000000e+01,   1.00000000e+00,   1.00000000e+00,
                                      1.00000000e+00,   1.00000000e+00]))
    
    

    def create_set_with_timestep(self, X, Y, time_steps):
        X_with_t = np.zeros((X.shape[0] - time_steps, time_steps, X.shape[1]))
        Y_with_t = np.zeros(((Y.shape[0] - time_steps),1))
        # sliding window with time steps width of for the sample
        # in this case the y would be the last y of the sample (because we shifted that earlier)

        for i in range(X_with_t.shape[0]):
            to_i = i + time_steps
            if to_i >= X.shape[0]:
                break;
            X_with_t[i,...] = np.vstack(X[i:to_i,:])
            Y_with_t[i,...] = Y[to_i-1]
        return (X_with_t, Y_with_t)
    
    
        