In [2]:
## Load modules
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
from numpy import genfromtxt
import matplotlib.pyplot as plt
import os
cwd = os.getcwd() # current working directory

In [3]:
import sys
sys.path.append(os.path.abspath(cwd))

In [92]:
# data (num_samples, features)
basedir = cwd
windFile = '/Firewheel_FWL'+str(1)+'_mast_file_tsec_72-months.csv'
def importWindFile(basedir, windFile):
    data = np.loadtxt(basedir+windFile, delimiter=',', skiprows=7)
    ws = np.zeros((data.shape[0],5))
    time = np.linspace(0., 10.*data.shape[0],data.shape[0])
    nt = ws.shape[0]
    hour = data[:,3]
    month = data[:,1]
    for i  in range(0,5):
        ind = 5*(i+1)
        ws[:,i] = data[:,ind]
    return ws, hour, time, nt, month


ws, hour, time, nt, month = importWindFile(basedir, windFile)

In [109]:
#print(ws.shape,hour[:,np.newaxis].shape)
#data = np.stack((ws,hour[:,np.newaxis]),axis=0)
data = ws

In [5]:
# data (num_samples, features)
#data = genfromtxt("Firewheel_FWL1_mast_file_tsec_72-months.csv", delimiter=',')
data_dir = str(cwd)#+"/data") # full path to data folder with data files
os.chdir(data_dir) # change directory to data
files = os.listdir(data_dir) # get filenames from data_dir
files.sort() # sort them so always in same order
num_files = len(files)
for i,file in enumerate(reversed(files)): # delete not .csv files
    if file[-3:] != "csv":
        files = np.delete(files,num_files-i-1)
        
print(files) # check you have the files you want

In [134]:
batch_size = 512
sequence_length = 20
y_ind = 3 # this is the index you want to predict
chunk_num = 100 # number of chunks to split the data into to divide between train/dev/test
chunk_idx = np.zeros(chunk_num, dtype=int)

raw_data = data
y_raw = data[:,y_ind] # copy y data before norm
data_length, features = data.shape
chunk_length = int(data_length/chunk_num)

for i in range(chunk_num):
    chunk_idx[i] = i*chunk_length
    
#for file in files:
#    raw_data = genfromtxt(file, delimiter=',') # shape (data length, features)

# Break into train, dev, test files
train_size = 0.6
dev_size = 0.2
test_size = 1 - train_size - dev_size

split_1 = int(train_size*chunk_num)
split_2 = int((train_size+dev_size)*chunk_num)

np.random.seed(13)
np.random.shuffle(chunk_idx)

train_chunk = chunk_idx[:split_1]
dev_chunk = chunk_idx[split_1:split_2]
test_chunk = chunk_idx[split_2:]

def combine_idx_data(data, labels, idx_list):
    _,features = data.shape
    data_out = np.zeros((len(idx_list)*chunk_length,features))
    labels_out = np.zeros(len(idx_list)*chunk_length)
    for i in range(len(idx_list)):
        data_out[i*chunk_length:(i+1)*chunk_length,:] = data[idx_list[i]:idx_list[i]+chunk_length,:]
        labels_out[i*chunk_length:(i+1)*chunk_length] = labels[idx_list[i]:idx_list[i]+chunk_length]
    return data_out, labels_out

x_train, y_train = combine_idx_data(raw_data, y_raw, train_chunk)
x_dev, y_dev = combine_idx_data(raw_data, y_raw, dev_chunk)
x_test, y_test = combine_idx_data(raw_data, y_raw, test_chunk)

In [139]:
def normalize_data(data):
    mu = np.mean(data,axis=0) # compute the mean along axis = 0 (num_samples for raw data)
    cov = np.std(data,axis=0) # using std instead of variance seems to be best
    return mu, cov # returning the normalizations for the data

mu_train, mu_cov = normalize_data(x_train)
print(mu_train)

[8.74315009 8.9523552  9.14512261 9.32421337 9.49172965]


In [141]:
X_train = (x_train-mu_train)/mu_cov
X_dev = (x_dev-mu_train)/mu_cov
X_test = (x_test-mu_train)/mu_cov
#print(np.mean(X_train[:,0],axis=0)) # confirm new mean is 0

In [148]:
# have (# data examples, inpute_size)
# want (# data examples - num_steps - 1, num_steps, input_size)
# given the index for prediction data (y_ind), creates prediction vector Y
#def reshape_sequences(data, sequence_length, y_ind):
#    data_size, features = data.shape
#    out_data = np.zeros((data_size-sequence_length-1, sequence_length, features))
#    out_y = np.zeros(data_size-sequence_length-1)
#    for i in range(out_data.shape[0]):
#        out_data[i,:,:] = data[i:sequence_length+i,:]
#        out_y[i] = data[sequence_length+i+1,y_ind]
#    return out_data, out_y
def reshape_sequences(data, labels, sequence_length):
    data_size, features = data.shape
    out_data = np.zeros((data_size-sequence_length-1, sequence_length, features))
    out_y = np.zeros(data_size-sequence_length-1)
    for i in range(out_data.shape[0]):
        out_data[i,:,:] = data[i:sequence_length+i,:]
        out_y[i] = labels[sequence_length+i+1]
    return out_data, out_y

def reshape_chunks(data, labels, chunk_length, sequence_length): # reshape each chunk into seq and stack
    reshaped_data = []
    reshaped_labels = []
    data_length, features = data.shape
    num_chunks = int(data_length/chunk_length)
    for i in range(num_chunks):
        new_data, new_labels = reshape_sequences(data[i*chunk_length:(i+1)*chunk_length,:],labels[i*chunk_length:(i+1)*chunk_length],sequence_length)
        if len(reshaped_data) == 0:
            reshaped_data = new_data
            reshaped_labels = new_labels
        else: 
            reshaped_data = np.vstack((reshaped_data,new_data))
            reshaped_labels = np.append(reshaped_labels,new_labels)

            
    return reshaped_data, reshaped_labels
#x_raw, y_raw = reshape_sequences(raw_data, sequence_length, y_ind)
# output is (shortened_data_samples, sequence_length, features), y_raw is (shortened_data_samples)

cx_train, cy_train = reshape_chunks(X_train, y_train, chunk_length, sequence_length)
cx_dev, cy_dev = reshape_chunks(X_dev, y_dev, chunk_length, sequence_length)
cx_test, cy_test = reshape_chunks(X_test, y_test, chunk_length, sequence_length)
#print(bx_train.shape,X_train.shape, by_train.shape)

In [112]:
#data_length, _, features = batches_x.shape

# Break into train, dev, test files
#train_size = 0.6
#dev_size = 0.2
#test_size = 1 - train_size - dev_size

#split_1 = int(train_size*data_length)
#split_2 = int((train_size+dev_size)*data_length)

#np.random.seed(13)
#np.random.shuffle(batches_x)
#np.random.seed(13)
#np.random.shuffle(batches_y)

#x_train = batches_x[:split_1,:,:]
#y_train = batches_y[:split_1]

#x_dev = batches_x[split_1:split_2,:,:]
#y_dev = batches_y[split_1:split_2]#
#
#x_test = batches_x[split_2:,:,:]
#y_test = batches_y[split_2:]

#print(x_train.shape)
#print(x_dev.shape)
#print(x_test.shape)

(189294, 20, 31)
(63098, 20, 31)
(63098, 20, 31)


In [156]:
def batch(data, labels, batch_size):
    data_length, sequence_length, features = data.shape
    new_data_length = data_length - (data_length % batch_size)
    num_batches = int(data_length/batch_size)
    data_out = np.reshape(data[:new_data_length,:,:],(num_batches, -1, sequence_length, features))
    labels_out = np.reshape(labels[:new_data_length],(num_batches, -1)) # would need to add dim for num_outputs
    return data_out, labels_out
    
bx_train, by_train = batch(cx_train, cy_train, batch_size)
bx_dev, by_dev = batch(cx_dev, cy_dev, batch_size)
bx_test, by_test = batch(cx_test, cy_test, batch_size)

print(x_train.shape, bx_train.shape, by_train.shape)

(189300, 5) (367, 512, 20, 5) (367, 512)


In [84]:
# Normalizing over training batches
# have data in (data_length, sequence, features)
# want to normalize along the sequence dimension for each data length and feature
#def normalize_batch(batch): # batch is size (batch_length, sequence, features)
#    mu = np.mean(batch,axis=(0,1)) # take average over batch_length and sequence
#    std = np.std(batch,axis=(1)) # using std instead of variance seems to be best
#    mu1 = np.mean(batch,axis=1)
#    print(mu1.shape)
#    mu2 = np.mean(mu1,axis=0)
#    print(mu2.shape)
#    print(mu2)
#    print(mu)
#    #print(std)
#    norm_batch = ((batch-mu)/std)
#    print(batch.shape)
#    print(batch[0,:,5])
#    print(norm_batch[0,:,5])
#
#    return norm_batch, mu, std # returning the normalizations for the data

# normalize train data and store the mean/variance of batches
#num_batches, batch_size, sequence_length, features = bx_train.shape
#batch_means = np.zeros((num_batches, features)) # want to store mean, cov in here
#batch_std = np.zeros((num_batches, features))
#print(num_batches, batch_size, sequence_length, features)
#new_bx_train = np.zeros(bx_train.shape)
#
#for i in range(0,2):#range(num_batches):
#    new_bx_train[i,:,:,:], batch_means[i], batch_std[i] = normalize_batch(np.squeeze(bx_train[i,:,:,:]))

11830 16 20 31
(16, 31)
(31,)
[-2.70616862e-16  1.38777878e-17 -3.05311332e-16 -1.38777878e-17
 -6.59194921e-17  1.17961196e-16  2.77555756e-17 -1.80411242e-16
  1.66533454e-16 -1.11022302e-16  0.00000000e+00  2.77555756e-17
  4.85722573e-17 -2.56739074e-16  1.38777878e-17  1.38777878e-17
  2.77555756e-17  1.73472348e-16  9.71445147e-17  1.52655666e-16
 -2.77555756e-17  2.77555756e-17 -1.24900090e-16 -6.93889390e-17
  2.77555756e-17 -4.85722573e-17  2.77555756e-17  2.77555756e-17
  2.08166817e-16 -2.77555756e-17             nan]
[-4.23272528e-17 -5.68989300e-17 -2.95596880e-16  4.92661467e-17
 -2.98372438e-17  3.78169718e-17 -5.55111512e-18 -1.01307851e-16
  6.21031004e-17 -5.55111512e-18 -1.90819582e-17 -5.55111512e-18
 -5.82867088e-17 -3.74700271e-17  1.59594560e-17 -3.05311332e-17
 -5.55111512e-18  3.95516953e-17  5.06539255e-17  5.03069808e-18
  7.45931095e-17 -5.55111512e-18 -3.46944695e-17  1.74513182e-16
 -2.77555756e-17 -4.19803081e-17 -5.55111512e-18  5.27355937e-17
  9.471590

ValueError: operands could not be broadcast together with shapes (16,20,31) (16,31) 

In [57]:
# Find average norm from training data and use on dev/test
#def norm_other_batch(batch,mu,std):
#    norm_batch = ((batch-mu)/std)
#    return norm_batch

#train_mu = np.mean(batch_means,axis=0)
#train_std = np.mean(batch_std,axis=0)
#print(train_mu.shape,train_std.shape)

#dev_batches,_,_,_ = bx_dev.shape
#test_batches,_,_,_ = bx_test.shape

#for i in range(dev_batches):
#    bx_dev[i,:,:,:] = norm_other_batch(bx_dev[i,:,:,:], train_mu, train_std)

#for i in range(test_batches):
#    bx_test[i,:,:,:] = norm_other_batch(bx_test[i,:,:,:], train_mu, train_std)

#print(bx_test.shape)

(31,) (31,)
(3943, 16, 20, 31)


In [None]:


# reshape to correct size and then break into batches (batch num, batch_size, features) for X and Y (batch num, pred)
#num_samples, features = data.shape
#num_batches = int(num_samples/batch_size)
#batches = # (file_name, batch_start_idx) 
#data = # (minibatch num, batch_size, features)
# Shuffle these big lists of batch data and split to train/dev/test
# For each mini batch in train normalize and save mean/variance
# Compute avg mean and variance and apply to dev/test data



# randomly assign batches to train/dev/test
# pull out y values
# normalize to training batches
# reshape_sequences



#print(x_all.shape)

#x_train = data[10:1010,10:20] # (1000,10)
#data_size, features = x_train.shape

# need to get the data from the files, split by sequence length and 

In [12]:
#def normalize_data(data):
#    mu = np.mean(data,axis=0) # compute the mean along axis = 0 (num_samples for raw data)
#    cov = np.std(data,axis=0) # using std instead of variance seems to be best
#    return mu, cov # returning the normalizations for the data

#x_mu, x_cov = normalize_data(x_train) # want x_train to be (data_size, input_size)
#y_mu, y_cov = normalize_data(y_train)

#X_train = ((x_train - x_mu)/x_cov) # still in (data examples, input_size)

# have (# data examples, inpute_size)
# want (# data examples - num_steps - 1, num_steps, input_size)
# given the index for prediction data (y_ind), creates prediction vector Y
#def reshape_sequences(data, sequence_length, y_ind):
 #   data_size, features = data.shape
#    out_data = np.zeros((data_size-sequence_length-1, sequence_length, features))
#    out_y = np.zeros(data_size-sequence_length-1)
 #   for i in range(out_data.shape[0]):
#        out_data[i,:,:] = data[i:sequence_length+i,:]
 #       out_y[i] = data[sequence_length+i+1,y_ind]
  #  return out_data, out_y

#sequence_length = 20
#y_ind = 3
#X_train, Y_train = reshape_sequences(x_train, sequence_length, y_ind)

#print(Y_train.shape)
#print(X_train.shape)
#print(np.mean(X_train[:,1]))
#X_dev = ((x_dev - x_mu)/x_cov).T # Use same distrib for others --> don't use "future" data
#X_test = ((x_test - x_mu)/x_cov).T

(979,)
(979, 20, 10)


In [6]:
def initialize_parameters(num_hid_layers, size_hid_layers, n_x, output_size):
    parameters = {}
    total_layers = num_hid_layers+1
    
    for l in range(1,total_layers+1):
        if l == 1:
            a = size_hid_layers
            b = n_x
        elif l == total_layers:
            a = output_size
            b = size_hid_layers
        else:
            a = size_hid_layers
            b = size_hid_layers
            
        parameters['w' + str(l)] = tf.get_variable('w'+str(l), [a, b], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
        parameters['b' + str(l)] = tf.get_variable('b'+str(l), [a,1], initializer = tf.zeros_initializer())    
    return parameters

In [7]:
def forward_prop(X,parameters):
    total_layers = len(parameters)//2
    layer_outputs = {}
    layer_outputs['A0'] = X
    
    for l in range(1,total_layers+1):
        layer_outputs['Z' + str(l)] = tf.matmul(parameters['w' + str(l)],layer_outputs['A' + str(l-1)])+parameters['b' + str(l)]
        layer_outputs['A' + str(l)] = tf.nn.relu(layer_outputs['Z' + str(l)])
        #layer_outputs['A' + str(l)] = tf.nn.dropout(layer_outputs['A' + str(l)],keep_prob)
    
    return layer_outputs['Z' + str(total_layers)]

In [159]:
def model(X_train, Y_train, learning_rate, num_epochs, num_hid_layers, size_hid_layers, minibatch_size, print_interval=10):

    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep consistent results
    (num_batches, m, seq_length, n_x) = X_train.shape                          # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]                            # n_y : output size
    losses = []                                        # To keep track of the cost
    
    O = 1 # output size?
    w1_shape = [O, size_hid_layers]
    b1_shape = [O,1]
    
    w1 = tf.get_variable("w1", w1_shape, initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable("b1", b1_shape, initializer = tf.zeros_initializer())
    
    X = tf.placeholder(tf.float32,[None, seq_length, n_x]) # inputs to LSTM are (# data examples, num_steps, input_size)
    Y = tf.placeholder(tf.float32,[None])

    # create 2 LSTMCells
    rnn_layers = [tf.nn.rnn_cell.LSTMCell(size_hid_layers) for size_hid_layers in [size_hid_layers, size_hid_layers]]

    # create a RNN cell composed sequentially of a number of RNNCells
    multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)

    outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, inputs=X, dtype=tf.float32)
    #print(outputs.shape)
    #print(outputs[:,1,:].shape)
    # WANT 1 OUTPUT value so outputs is ideally hidden_layer x # data samples, but currently its # data samples, seq, features
    # the 20 outputs are for each sequence step, want the last one I believe
    Z1 = tf.matmul(w1,tf.transpose(outputs[:,-1,:]))+b1 # fully connected layer
    out = Z1
    
    #parameters = initialize_parameters(num_hid_layers, size_hid_layers, n_x, O)
    #out = forward_prop(X, parameters)
    
    #loss = tf.reduce_mean(tf.squared_difference(out, Y)) # L2 loss --> not good for our problem
    loss = tf.reduce_mean(tf.losses.absolute_difference(Y,tf.squeeze(out))) # L1 loss
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss) # Optimizer, change the learning rate here

    init = tf.global_variables_initializer() # When init is run later (session.run(init)),
    with tf.Session() as sess: # starting tf session --> all computation on tf graph in this with struct
        sess.run(init)
        for epoch in range(num_epochs+1):
            for batch in range(num_batches):
                _, loss_val = sess.run([optimizer, loss], feed_dict={X: np.squeeze(X_train[batch,:,:,:]), Y: np.squeeze(Y_train[batch,:])})
            losses.append(loss_val)
            if epoch % (num_epochs/print_interval) == 0:
                print("Loss: ",loss_val)
            

In [None]:
learning_rate = 0.008#0.0005
num_epochs = 1 # total number of epochs to iterate through
print_interval = 1 # number of prints per total run
minibatch_size = 10
num_hid_layers = 3
size_hid_layers = 128

model(bx_train, by_train, learning_rate, num_epochs, num_hid_layers, size_hid_layers, minibatch_size, print_interval)