In [None]:
import pandas as pd
import numpy as np
import os
from math import sqrt
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import EarlyStopping, ModelCheckpoint
import datetime as dt


In [None]:
class LSTM_model:
    
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.model = self.build_model()
    
    def build_model(self):
        model = Sequential()
        model.add(LSTM(units=64, input_shape=self.input_shape, return_sequences=True))
        model.add(LSTM(units=32, return_sequences=False))
        model.add(Dense(units=self.output_shape, activation='linear'))
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model
    
    def train(self, train_X, train_y, test_X, test_y, num_epochs, batch_size):
        # update with your_path
        save_fname = os.path.join('your_path', '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(num_epochs)))

        callbacks = [EarlyStopping(monitor='val_loss', patience=50),
                     ModelCheckpoint(filepath=save_fname, monitor='val_loss',save_best_only=True)]
        history = self.model.fit(train_X, train_y, epochs=num_epochs, batch_size=batch_size, verbose=1,
                             validation_data=(test_X, test_y), callbacks=callbacks)
        self.model.save(save_fname)
        return history
        
    def evaluate(self, test_X, test_y):
        loss = self.model.evaluate(test_X, test_y)
        return loss

    def predict(self, test_X):
        y_pred = self.model.predict(test_X)
        return y_pred


In [None]:
# inputs.csv, outputs.csv -> dataset created with test3.py
X_i = pd.read_csv('inputs.csv', delimiter=',', header=None)
y_i = pd.read_csv('outputs.csv', delimiter=',', header=None).T

In [None]:
look_back = 5
num_samples = X_i.shape[0] - look_back
num_samples_groups = int(num_samples/(look_back))
num_features = 13

X_ = np.zeros((num_samples_groups, look_back, num_features))
y_ = np.zeros((num_samples_groups, 1))

for i in range(0, num_samples, look_back):
    idx = int(i/look_back)
    X_[idx,:,:] = X_i.iloc[i:i+look_back,:]
    y_[idx, 0] = y_i.iloc[idx]


print('Num input samples : ', num_samples + look_back)

train_size = int(0.7 * num_samples_groups)
train_X, train_y = X_[:train_size], y_[:train_size]
test_X, test_y = X_[train_size: ], y_[train_size:]

In [None]:
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

In [None]:
model = LSTM_model((look_back, num_features), 1)

In [None]:
epochs = 10
batch_size = 64

In [None]:
history = model.train(train_X, train_y, test_X, test_y, epochs, batch_size)

In [None]:
predictions = model.predict(test_X)

In [None]:
# Evaluate the model
loss = model.evaluate(test_X, test_y)
print("Mean, std deviation")
print("%.2f%% (+/- %.2f%%)" % (np.mean(loss)*100, np.std(loss)*100))

In [None]:
# Plot history
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.show()

In [None]:
# calculate RMSE
rmse = sqrt(mean_squared_error(test_y, predictions))
print('Test RMSE: %.3f' % rmse)

In [None]:
difference= abs(test_y - predictions)
median_value = np.median(difference)
mean_value = np.mean(difference)

# Plot difference, mean, median
plt.plot(difference, label='Difference')
plt.axhline(mean_value, color='r', linestyle='--', label='Difference Mean Value')
plt.axhline(median_value, color='g', linestyle='--', label='Difference Median')
plt.xlabel('Stop_order')
plt.ylabel('Difference')
plt.legend()

plt.text(0, mean_value, f'Mean: {mean_value: .3f}', color='r', ha='right', va='bottom')
plt.text(0, median_value, f'Median: {median_value:.3f}', color='g', ha='right', va='top')
plt.show()


In [None]:

# Plot actual vs predicted values
plt.plot(test_y, label='Test Data')
plt.plot(predictions, label='Predictions')

plt.xlabel('Stop_order')
plt.ylabel('Ridership')
plt.legend()

plt.show()