In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Example of LSTM

In [None]:
df = pd.read_csv('./international-airline-passengers.csv', sep=';', usecols=[1], engine='python', skipfooter=3)

In [None]:
df.head()

In [None]:
plt.plot(df)
plt.show()

In [None]:
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
# fix random seed for reproducibility
np.random.seed(7)

In [None]:
dataset = df.values
dataset = dataset.astype('float32')

In [None]:
dataset

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
datasetScaled = scaler.fit_transform(dataset)

In [None]:
datasetScaled

In [None]:
# split into train and test sets
train_size = int(len(datasetScaled) * 0.67)
test_size = len(datasetScaled) - train_size
train, test = datasetScaled[0:train_size,:], datasetScaled[train_size:len(datasetScaled),:]
print(len(train), len(test))

In [None]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
# test on the original dataset no scale
X, Y = create_dataset(dataset, 2)
print(X[0:5], Y[0:5])

In [None]:
# prepare the train and test datasets for modeling
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
# shift train predictions for plotting
trainPredictPlot = np.empty_like(datasetScaled)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(datasetScaled)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(datasetScaled)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(datasetScaled))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

# showing the original dataset in blue, 
# the predictions for the training dataset in green, 
# and the predictions on the unseen test dataset in red.

# Apply in VeloVformatted.sample.csv

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
data = pd.read_csv('./VeloVformatted.sample.csv', sep=';')

In [None]:
data.head()

In [None]:
data['ID'].unique()

# Apply in VeloV1001formatted.csv

In [None]:
data = pd.read_csv('./VeloV1001formatted.csv', sep=';')

In [None]:
data

## Apply in just ID Station

In [None]:
data1 = data.copy(deep=True)

In [None]:
data1 = data1.sort_values(['ID', 'time-stamp'])

In [None]:
data1

In [None]:
data1[(data1['ID'] == 12002)].count()

In [None]:
idStation = 1001
_t = data1[(data1['ID'] == idStation)]
_t

### Use 'availabled-bikes'

In [None]:
_t = _t.drop(['ID', 'time-stamp', 'hour', 'day-of-week', 'available-bike-stands'], axis=1)
_t

In [None]:
dataset1 = _t.values
dataset1 = dataset1.astype('float32')
dataset1

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
dataset1Scaled = scaler.fit_transform(dataset1)

In [None]:
dataset1Scaled

In [None]:
train_size = int(len(dataset1Scaled) * 0.67)
test_size = len(dataset1Scaled) - train_size
train, test = dataset1Scaled[0:train_size,:], dataset1Scaled[train_size:len(dataset1Scaled),:]
print(len(train), len(test))

In [None]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
# test on original dataset1
X, Y = create_dataset(dataset1)
print('X: ', X[0:5])
print('Y: ', Y[0:5])

In [None]:
# prepare the train and test datasets for modeling
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
print(trainX.shape, testX.shape)

In [None]:
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [None]:
print(trainX.shape, testX.shape)

### Version 1

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(5, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=500, verbose=2, validation_data=(testX, testY), shuffle=False)

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [None]:
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

In [None]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
from matplotlib.pyplot import figure
figure(num=None, figsize=(20, 20), dpi=80, facecolor='w', edgecolor='k')
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset1Scaled[:1000])
trainPredictPlot[:, :] = np.nan
trainPredictPlot[0:len(trainPredict[:1000])+look_back, :] = trainPredict[:1000]
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset1Scaled[:1000])
testPredictPlot[:, :] = np.nan
testPredictPlot[0:len(trainPredict[:1000])+look_back, :] = testPredict[:1000]
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset1Scaled[:1000]))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

# showing the original dataset in blue, 
# the predictions for the training dataset in green, 
# and the predictions on the unseen test dataset in red.

In [None]:
testPredictPlot[len(trainPredict[:1000])+(look_back*2)+1:len(dataset1Scaled[:1000])-1, :]

## Test file .py

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

Using TensorFlow backend.


In [127]:
def build_model(train, test):
    look_back = 5
    batch_size = 5
    
    # prepare the train and test datasets for modeling
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)

    # reshape input to be [samples, time steps, features]
    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
    testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(5, input_shape=(look_back, 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(trainX, trainY, epochs=50, batch_size = batch_size, verbose=2)
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    testPredict = model.predict(testX)
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])
    testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
    print('Test Score: %.2f RMSE' % (testScore))

    return model

def build_model2(dataset):
    look_back = 5
    batch_size = 5
    
    # prepare the train and test datasets for modeling
    trainX, trainY = create_dataset(dataset, look_back)

    # reshape input to be [samples, time steps, features]
    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))

    # create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(5, input_shape=(look_back, 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(trainX, trainY, epochs=50, batch_size = batch_size, verbose=2)

    return model

def preprocessing(df, station):
    _t = df[(df['ID'] == station)]
    _t = _t.drop(['ID', 'time-stamp', 'hour', 'day-of-week', 'available-bike-stands'], axis=1)
    dataset = _t.values
    dataset = dataset.astype('float32')
    return dataset

def train_test_split(dataset):
    scaler = MinMaxScaler(feature_range=(0, 1))
    datasetScaled = scaler.fit_transform(dataset)
    train_size = int(len(datasetScaled) * 0.67)
    test_size = len(datasetScaled) - train_size
    train, test = datasetScaled[0:train_size,:], datasetScaled[train_size:len(datasetScaled), :]
    return train, test

def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)


def main():
    file = './VeloV1001formatted201502.csv'
    df = pd.read_csv(file, sep=';')
    stations = df['ID'].unique()
    dfCopied = df.copy(deep = True)
    dfCopied = dfCopied.sort_values(['ID', 'time-stamp'])

    d = {}
    for i, station in enumerate(stations):
        print('STATION {}'.format(station))
        _tdf = dfCopied.copy(deep=True)
        dataset = preprocessing(_tdf, station)
    
        train, test = train_test_split(dataset)
        model = build_model(train, test)
        
        #model = build_model2(dataset)
        d = {station: model}
    #print(d)
    return d

In [128]:
d = main()

STATION 1001
Epoch 1/50
 - 7s - loss: 0.0524
Epoch 2/50
 - 5s - loss: 0.0060
Epoch 3/50
 - 5s - loss: 0.0054
Epoch 4/50
 - 5s - loss: 0.0048
Epoch 5/50
 - 5s - loss: 0.0041
Epoch 6/50
 - 5s - loss: 0.0035
Epoch 7/50
 - 5s - loss: 0.0032
Epoch 8/50
 - 5s - loss: 0.0030
Epoch 9/50
 - 5s - loss: 0.0030
Epoch 10/50
 - 5s - loss: 0.0029
Epoch 11/50
 - 5s - loss: 0.0029
Epoch 12/50
 - 5s - loss: 0.0029
Epoch 13/50
 - 5s - loss: 0.0029
Epoch 14/50
 - 5s - loss: 0.0029
Epoch 15/50
 - 5s - loss: 0.0028
Epoch 16/50
 - 5s - loss: 0.0028
Epoch 17/50
 - 5s - loss: 0.0028
Epoch 18/50
 - 5s - loss: 0.0028
Epoch 19/50
 - 5s - loss: 0.0028
Epoch 20/50
 - 5s - loss: 0.0028
Epoch 21/50
 - 5s - loss: 0.0028
Epoch 22/50
 - 5s - loss: 0.0028
Epoch 23/50
 - 5s - loss: 0.0028
Epoch 24/50
 - 5s - loss: 0.0028
Epoch 25/50
 - 5s - loss: 0.0028
Epoch 26/50
 - 5s - loss: 0.0028
Epoch 27/50
 - 5s - loss: 0.0028
Epoch 28/50
 - 5s - loss: 0.0028
Epoch 29/50
 - 5s - loss: 0.0028
Epoch 30/50
 - 5s - loss: 0.0028
Epoch 

NotFittedError: This MinMaxScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.

In [77]:
d

{1001: <keras.engine.sequential.Sequential at 0x1fd9561b780>}

In [78]:
#Prediction
model = d[1001]

In [89]:
t = np.array([[8,8,9,8,10]])
t = np.reshape(t, (t.shape[0], t.shape[1], 1))
t

array([[[ 8],
        [ 8],
        [ 9],
        [ 8],
        [10]]])

In [107]:
np.ceil(model.predict(t)[0][0])

11.0

In [91]:
file = './VeloV1001formatted201502.csv'
df = pd.read_csv(file, sep=';')
df

Unnamed: 0,ID,time-stamp,hour,day-of-week,available-bike-stands,available-bikes
0,1001,2015-02-01 00:01:58+00:00,0,Sunday,2,14
1,1001,2015-02-01 00:02:51+00:00,0,Sunday,3,13
2,1001,2015-02-01 00:07:33+00:00,0,Sunday,3,13
3,1001,2015-02-01 00:08:22+00:00,0,Sunday,4,12
4,1001,2015-02-01 00:12:01+00:00,0,Sunday,3,13
5,1001,2015-02-01 00:22:05+00:00,0,Sunday,3,13
6,1001,2015-02-01 00:32:09+00:00,0,Sunday,3,13
7,1001,2015-02-01 00:37:15+00:00,0,Sunday,4,12
8,1001,2015-02-01 00:38:29+00:00,0,Sunday,4,12
9,1001,2015-02-01 00:38:48+00:00,0,Sunday,5,11


In [122]:
table = [16,13,4,7,13]
start = np.array([table])
for i in range(19):
    t = start
    t = np.reshape(start, (start.shape[0], start.shape[1], 1))
    yhat = round(model.predict(t)[0][0])
    table.append(yhat)
    start = np.array([table[-5:]])

r = np.array(table, dtype=int)
r

array([16, 13,  4,  7, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
       13, 13, 13, 13, 13, 13, 13])

In [118]:
table = [3,3,2,8,3,7,8,9]
table[-5:]

[8, 3, 7, 8, 9]

In [123]:
pas = 600000
timeprevision = []
timestamp = 1549979100000
for i in range(19):
    timeprevision.append(timestamp+i*pas)
    


In [124]:
timeprevision

[1549979100000,
 1549979700000,
 1549980300000,
 1549980900000,
 1549981500000,
 1549982100000,
 1549982700000,
 1549983300000,
 1549983900000,
 1549984500000,
 1549985100000,
 1549985700000,
 1549986300000,
 1549986900000,
 1549987500000,
 1549988100000,
 1549988700000,
 1549989300000,
 1549989900000]