In [1]:
import common_functions
import pandas as pd
from matplotlib import pyplot
import numpy
from sklearn.metrics import mean_squared_error
from math import sqrt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#loading of data
level_readings = pd.read_csv("processed_level.csv", header=0, index_col=0, squeeze=True)
level_readings.fillna(0, inplace=True)
feq = pd.read_csv('del_freq.csv', index_col=0)

  mask |= (ar1 == a)


In [None]:
#configuration variables

test_data_percent = 0.25  #75% of data is used for training and 25% for testing
print_predictions = False

In [11]:
#helper functions

def fit_lstm(train, batch_size, nb_epoch, neurons):
    
    #defining the data and the label
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    
    #constructing the model
    model = Sequential()
    model.add(GRU(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    #epochs
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=batch_size, shuffle=False)
        model.reset_states()
    return model

def train(vessel_id, model_name):
    
    #loading the data
    a = level_readings[level_readings['VESSEL_ID'] == vessel_id].sort_values(by=['ON_DATE_TIME'])
    series = a[['ON_DATE_TIME', 'INST_PRODUCT_AMOUNT' ]]
    series.index = series['ON_DATE_TIME']
    series.drop('ON_DATE_TIME', axis=1, inplace=True)
    
    raw_values = series.values
    raw_values = numpy.squeeze(raw_values)
    diff_values = common_functions.difference(raw_values, 1)
    supervised = common_functions.timeseries_to_supervised(diff_values, 1)
    supervised_values = supervised.values
    
    # test and train data separation
    test_data_num = int(supervised_values.shape[0] * test_data_percent)
    print("Total number of data: %d, trainig_data = %d, testing data %d" %(supervised_values.shape[0],(supervised_values.shape[0]-test_data_num), test_data_num ))
    train, test = supervised_values[0:-test_data_num], supervised_values[-test_data_num:]
    scaler, train_scaled, test_scaled = common_functions.scale(train, test)
    
    # fit the model
    lstm_model = fit_lstm(train_scaled, 1, 5 , 4) 
    
    #saving the model
    lstm_model.save('%s.h5' %(model_name))
    print("saving model in the name of %s.h5" %(model_name))
    
    
    # Vlaidating the model using the test data
    predictions = list()
    for i in range(len(test_scaled)):
        if i == 0:
            # make one-step forecast
            X, y = numpy.array([[0]]), test_scaled[i, -1]
        else: 
            X, y = numpy.array([[predictions[-1]]]), test_scaled[i, -1]
        yhat = common_functions.forecast_lstm(lstm_model, 1, X)
        # invert scaling
        yhat = common_functions.invert_scale(scaler, X, yhat)
        # invert differencing
        yhat = common_functions.inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
        predictions.append(yhat)
        expected = raw_values[len(train) + i]
        if print_predictions:
            print('Predicted=%f, Expected=%f' % (yhat, expected))

    # report performance
    rmse = sqrt(mean_squared_error(raw_values[-test_data_num:], predictions))

    print('Test RMSE: %.3f' % rmse)

In [12]:
#grouping the tanks based on their delivery frequncy value

feq_lis = list (feq.groupby('DELIVERY_FREQUENCY').groups.keys())
feq_dict  = {i : feq[feq['DELIVERY_FREQUENCY'] == i].VESSEL_ID.tolist() for i in feq_lis}
feq_dict_keys = list(feq_dict.keys())
feq_dict_keys = feq_dict_keys[1:21]  #for time being we select the first 21 delivery frequency

In [13]:

# for each of the 21 delivery frequency we create a model
for freq in feq_dict_keys:
    vessel_id = random.choice(feq_dict[freq])
    model_name = 'freq%d_%s' %(freq, vessel_id)
    train(vessel_id, model_name)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Total number of data: 10688, trainig_data = 8016, testing data 2672
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq1_US-7455.h5
Test RMSE: 22549.338
Total number of data: 21987, trainig_data = 16491, testing data 5496
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq2_SP-121.h5
Test RMSE: 3332.752
Total number of data: 23207, trainig_data = 17406, testing data 5801
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq3_UK-28848.h5
Test RMSE: 3327.785
Total number of data: 17571, trainig_data = 13179, testing data 4392
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq4_UK-3689.h5
Test RMSE: 14540.566
Total number of data: 5692, trainig_data = 4269, testing data 1423
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq5_CN-156811.h5
Test RMSE: 5218.658
Total 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq15_SP-30560.h5
Test RMSE: 1976.421
Total number of data: 10642, trainig_data = 7982, testing data 2660
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq16_US-219119.h5
Test RMSE: 4926.720
Total number of data: 5542, trainig_data = 4157, testing data 1385
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq17_BR-218540.h5
Test RMSE: 784.205
Total number of data: 7553, trainig_data = 5665, testing data 1888
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq18_CN-154371.h5
Test RMSE: 1063.939
Total number of data: 23262, trainig_data = 17447, testing data 5815
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name of freq19_UK-6417.h5
Test RMSE: 1364.001
Total number of data: 8130, trainig_data = 6098, testing data 2032
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
saving model in the name o