In [None]:
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline

Using TensorFlow backend.


In [None]:
# read the test batches from files created by the Preprocessing_NN_Data notebook
def read_batches(filepath):
    batches = []
    number_read = -1
    while True:
        number_read += 1
        try:
            batch = pd.read_csv(filepath + '_' + str(number_read) + '.csv', index_col=0, parse_dates=True)
            batches.append(batch)
        except FileNotFoundError:
            break
    return batches
            
test_batches = read_batches('data/test/batch')

In [None]:
# convert the dataframes in train_batches to the correct input and output shape (and make it arrays)
def convert_to_keras_input(batches):
    input_batches = []
    output_batches = []
    for batch in batches:
        input_df = batch.iloc[:,1:]
        output_df = batch['cl_kadij_out']
        input_batches.append(input_df.as_matrix()[:,np.newaxis,:])
        output_batches.append(output_df.as_matrix())
    return (input_batches, output_batches)

(input_test_batches, output_test_batches) = convert_to_keras_input(test_batches)

In [None]:
# load the trained model from a file
model_filename = 'models/2layer_lstm_11cells'

model = keras.models.load_model(model_filename + '.h5')
info_file = open(model_filename + '_info.txt')
info = ''
for line in info_file:
    info += line
info_file.close()
print('------- info of read model --------\n')
print(info)

In [None]:
# get the test results:
output_model = []
actual_value = []
basic_prediction = []
output_model_last = []
actual_value_last = []
basic_prediction_last = []
for i in range(0,len(test_batches)):
    batch_output = model.predict(input_test_batches[i], batch_size = len(input_test_batches[i]))
    batch_actual = output_test_batches[i]
    for j in range(0, len(batch_output)):
        output_model.append(batch_output[j].item())
        actual_value.append(batch_actual[j].item())
        basic_prediction.append((input_test_batches[i])[j,0,0])
    output_model_last.append(batch_output[-1].item())
    actual_value_last.append(batch_actual[-1].item())
    basic_prediction_last.append((input_test_batches[i])[-1,0,0])

In [None]:
# load the scaler data from a file and define a function that retransforms the cl_kadij data to the actual values
scaler_data = pd.read_csv('data/scaler_data.csv', index_col=0)

def rescale_data(data,scaler_name):
    data_range = scaler_data.loc[scaler_name, 'data_range_']
    data_min = scaler_data.loc[scaler_name, 'data_min_']
    converted = np.array(data) * data_range + data_min
    return converted
    
def convert_cl_kadij_output(output):
    rescaled_output = rescale_data(output, 'cl_kadij_scaler')
    return np.exp(rescaled_output)

In [None]:
# rescale and transform data back to original values:
output_model_scaled = convert_cl_kadij_output(output_model)
actual_value_scaled = convert_cl_kadij_output(actual_value)
basic_prediction_scaled = convert_cl_kadij_output(basic_prediction)
output_model_last_scaled = convert_cl_kadij_output(output_model_last)
actual_value_last_scaled = convert_cl_kadij_output(actual_value_last)
basic_prediction_last_scaled = convert_cl_kadij_output(basic_prediction_last)

In [None]:
# define some functions to print analysis of test results
def print_MSE(predicted, actual):
    '''calculates the MSE and prints it, returns the MSE'''
    errors = np.abs(predicted - actual)
    squared_errors = errors**2
    MSE = np.average(squared_errors)
    print('MSE:', MSE)
    return MSE

def print_prediction_skill(predicted, actual, basic_predicted, verbose=False):
    '''calculates the prediction skill compared to the basic prediction method of using the same value as yesterday
       the calculated prediction skill is printed and returned
       note that it calls the print_MSE function which will also print the MSE
       use verbose=True to also print the MSE of the basic prediction method'''
    MSE = print_MSE(predicted, actual)
    basic_errors = np.abs(actual - basic_predicted)
    basic_MSE = np.average(basic_errors**2)
    prediction_skill = 1 - MSE / basic_MSE
    if (verbose):
        print('MSE basic method:', basic_MSE)
    print('prediction skill:', prediction_skill)
    return prediction_skill

In [None]:
# print the analysis results for the test data
print('When analysing prediction on all test data:')
print_prediction_skill(output_model_scaled, actual_value_scaled, basic_prediction_scaled, True)
print('When analysing prediction only on last value of each test batch:')
print_prediction_skill(output_model_last_scaled, actual_value_last_scaled, basic_prediction_last_scaled, True);

In [None]:
batch_index = 10
print('batch size', len(test_batches[batch_index]))
batch_output_model = model.predict(input_test_batches[batch_index], batch_size=len(input_test_batches[batch_index]))
batch_output_actual_converted = convert_cl_kadij_output(output_test_batches[batch_index])
batch_output_model_converted = convert_cl_kadij_output(batch_output_model)
plt.plot(test_batches[batch_index].index.values, batch_output_actual_converted, 'b--', label='actual data')
plt.plot(test_batches[batch_index].index.values, batch_output_model_converted, 'y', label='predicted data')
plt.axhline(250, color='red')
plt.legend();

print('\nWhen analysing each data point in this batch:')
print_prediction_skill(batch_output_model_converted, batch_output_actual_converted, True);
if (len(batch_output_actual_converted) >= 2):
    print('\nWhen analysing only the last data point in this batch:')
    prediction_SE = ((batch_output_model_converted[-1] - batch_output_actual_converted[-1])**2)[0]
    print('prediction SE:', prediction_SE)
    basic_SE = (batch_output_actual_converted[-1] - batch_output_actual_converted[-2])**2
    print('basic prediction SE:', basic_SE)
    print('prediction skill based on this single point:', 1 - prediction_SE / basic_SE)

In [None]:
# print the indices of big (>100) batches to use in the cell above
batch_sizes = [len(batch) for batch in test_batches]
print(np.where(np.array(batch_sizes) > 100))