In [1]:
import numpy as np
import math
import matplotlib as mpl
from matplotlib.image import imread
from random import randint

import keras
import pandas

import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Input, Dense, GRU, Embedding
from tensorflow.python.keras.optimizers import RMSprop
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import SGD, adam
from keras.layers import Embedding
from keras.layers import LSTM
from keras.wrappers.scikit_learn import KerasClassifier
from keras import optimizers
import keras.utils
import keras.layers
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
import copy
import csv
import os

Using TensorFlow backend.


In [2]:
mpl.use('Agg')
import matplotlib.pyplot as plt

ROOT = os.getcwd()
DATASET_PATH = os.path.join(ROOT, "Datasets")
print(DATASET_PATH)

/data2/Datasets


In [3]:
#Set y values of data to lie between 0 and 1
def normalize_data(dataset, data_min, data_max):
    data_std = (dataset - data_min) / (data_max - data_min)
    test_scaled = data_std * (np.amax(data_std) - np.amin(data_std)) + np.amin(data_std)
    return test_scaled

In [4]:
#Import and pre-process data for future applications
def import_data(train_dataframe, test_dataframe):
    dataset = train_dataframe.values
    dataset = dataset.astype('float32')

    #Include all 12 initial factors (Year ; Month ; Hour ; Day ; Cloud Coverage ; Visibility ; Temperature ; Dew Point ;
    #Relative Humidity ; Wind Speed ; Station Pressure ; Altimeter
    max_test = np.max(dataset[:,6])
    min_test = np.min(dataset[:,6])
    scale_factor = max_test - min_test
    max = np.empty(7)
    min = np.empty(7)

    #Create training dataset
    for i in range(0,6):
        min[i] = np.amin(dataset[:,i],axis = 0)
        max[i] = np.amax(dataset[:,i],axis = 0)
        dataset[:,i] = normalize_data(dataset[:, i], min[i], max[i])

    train_data = dataset[:,0:7]
    train_labels = dataset[:,6]


    # Create test dataset
    dataset = test_dataframe.values
    dataset = dataset.astype('float32')

    for i in range(0, 6):
        dataset[:, i] = normalize_data(dataset[:, i], min[i], max[i])

    test_data = dataset[:, 0:7]
    test_labels = dataset[:, 6]

    return train_data, train_labels, test_data, test_labels, scale_factor


In [5]:
#Construt and return Keras RNN model
def build_model(init_type = 'glorot_uniform', optimizer = 'adam', num_features = 7):
    
    model = Sequential()
    layers = [num_features, 64, 64, 1, 1]
    model.add(keras.layers.LSTM(
        layers[0],
        input_shape = (None, num_features),
        return_sequences=True))
    model.add(keras.layers.Dropout(0.2))

    model.add(keras.layers.LSTM(
        layers[1],
        kernel_initializer = init_type,
        return_sequences=True
        #bias_initializer = 'zeros'
    ))
    model.add(keras.layers.Dropout(0.2))

    model.add(Dense(
        layers[2], activation='tanh',
        kernel_initializer=init_type,
        input_shape = (None, 1)
        ))
    model.add(Dense(
        layers[3]))

    model.add(Activation("softmax"))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    

    #Alternative parameters:
    #momentum = 0.8
    #learning_rate = 0.1
    #epochs = 100
    #decay_rate = learning_rate / 100
    #sgd = keras.optimizers.SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
    #model.compile(loss="binary_crossentropy", optimizer=sgd)
    rms = keras.optimizers.RMSprop(lr=0.002, rho=0.9, epsilon=1e-08, decay=0.01)
    model.compile(loss="mean_squared_error", optimizer=optimizer)
    

    return model

In [6]:
#Save output predictions for graphing and inspection
def write_to_csv(prediction, filename):
    print("Writing to CSV...")
    with open(filename, 'w') as file:
        for i in range(prediction.shape[0]):
            file.write("%.5f" % prediction[i][0][0])
            file.write('\n')
    print("...finished!")

In [7]:
#Return MSE error values of all three data sets based on a single model
def evaluate(model, X_train, Y_train, scale_factor):
    scores = model.evaluate(X_train, Y_train, verbose = 0) 
    print("train: ", model.metrics_names, ": ", scores)

#Calculate MSE between two arrays of values 
def mse(predicted, observed):
    return np.sum(np.multiply((predicted - observed),(predicted - observed)))/predicted.shape[0]

In [8]:
    mpl.use('Agg')

    #Import test data (6027, 13)
    train_dataframe = pandas.read_csv(DATASET_PATH + '/SolarPrediction.csv', sep=",", engine='python', header = None)
    traincsv_data, testcsv_data = train_test_split(train_dataframe, test_size=0.33, random_state = 0)
    train_data, train_labels,test_data, test_labels, scale_factor = import_data(traincsv_data, testcsv_data)
    #X_train, X_test, Y_train, Y_test = train_test_split(train_data, train_labels, test_size=0.33, random_state = 42)
    #X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
    #trainX_data = import_data(train)
    #trainY_labels = train 
    #testX_data = test
    #testY_labels = test 


    time_steps = 1    
    assert(train_data.shape[0] % time_steps == 0)

    X_train = np.reshape(train_data, (train_data.shape[0] // time_steps, time_steps, train_data.shape[1]))
    X_test = np.reshape(test_data, (test_data.shape[0] // time_steps, time_steps, test_data.shape[1]))
    Y_train = np.reshape(train_labels, (train_labels.shape[0] // time_steps, time_steps, 1))
    Y_test = np.reshape(test_labels, (test_labels.shape[0] // time_steps, time_steps, 1)) 


    model = build_model('glorot_uniform', 'adam')

    #Standard vanilla LSTM model

    model_fit_epochs = 100
    print("X_train shape: ",X_train.shape, " Y_train shape: ",Y_train.shape)

    model.fit(
        X_train, Y_train,
        batch_size = 16, epochs = model_fit_epochs)
    trainset_predicted = model.predict(X_train)


    print("Train MSE: ", mse(trainset_predicted, X_train) * scale_factor * scale_factor)
 

    write_to_csv(trainset_predicted,'TrainNewData_prediction.csv')

W0829 19:43:16.968211 139879217338112 deprecation_wrapper.py:119] From /data/anaconda/envs/py36/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0829 19:43:16.984912 139879217338112 deprecation_wrapper.py:119] From /data/anaconda/envs/py36/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0829 19:43:16.987200 139879217338112 deprecation_wrapper.py:119] From /data/anaconda/envs/py36/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0829 19:43:17.167032 139879217338112 deprecation_wrapper.py:119] From /data/anaconda/envs/py36/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.p

X_train shape:  (21899, 1, 7)  Y_train shape:  (21899, 1, 1)
Epoch 1/100
 3248/21899 [===>..........................] - ETA: 23s - loss: 38.5189

KeyboardInterrupt: 

In [29]:
# Export the model

import os
import sys

ROOT = os.getcwd()
OUT = os.path.join(ROOT, 'out')

receiver_tensors = {
    'radiation_ids': tf.placeholder(dtype=tf.float32, shape=[None, 256], name='radiation_ids'),
    'temperature_ids': tf.placeholder(dtype=tf.float32, shape=[None, 256], name='temperature_ids'),
    'pressure_mask': tf.placeholder(dtype=tf.float32, shape=[None, 256], name='pressure_mask'),
    'humidity_ids': tf.placeholder(dtype=tf.float32, shape=[None, 256], name='humidity_ids'),
    'winddirec_mask': tf.placeholder(dtype=tf.float32, shape=[None, 256], name='winddirc_mask'),
    'speed_mask': tf.placeholder(dtype=tf.float32, shape=[None, 256], name='speed_mask')
}

from keras.models import Model
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import tag_constants, signature_constants, signature_def_utils_impl
import keras as k
import numpy as np
from keras import backend as K

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8, allow_growth=False)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

K.set_session(sess)

prediction_signature = tf.saved_model.signature_def_utils.predict_signature_def({"features": receiver_tensors}, {"prediction":model.output})

builder = saved_model_builder.SavedModelBuilder(os.path.join("Model4"))
legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)

# Add the meta_graph and the variables to the builder
builder.add_meta_graph_and_variables(
      sess, [tag_constants.SERVING],
      signature_def_map={
           signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
               prediction_signature,
      },
      )
# save the graph      
builder.save()     

W0827 21:09:14.954859 140304533006080 deprecation.py:323] From /data/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/saved_model/signature_def_utils_impl.py:201: build_tensor_info (from tensorflow.python.saved_model.utils_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


AttributeError: 'dict' object has no attribute 'dtype'

In [34]:
from keras import metrics

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

score, acc = model.evaluate(X_test, Y_test)
print("Test Score:", score * scale_factor)
print("Test accuracy:", acc * scale_factor * scale_factor)


Test Score: -3401.497068211711
Test accuracy: 0.0
