In [11]:
import pandas as pd
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from pandas import Series
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import numpy
from math import sqrt
from sklearn.metrics import mean_squared_error

In [14]:
# Funciones útiles

def timeseries_to_supervised(data, lag=1):
 df = DataFrame(data)
 columns = [df.shift(i) for i in range(1, lag+1)]
 columns.append(df)
 df = concat(columns, axis=1)
 df.fillna(0, inplace=True)
 return df

def parser(x):
 return pd.to_datetime('190'+x, '%Y-%m')

def difference(dataset, interval=1):
 diff = list()
 for i in range(interval, len(dataset)):
  value = dataset[i] - dataset[i - interval]
  diff.append(value)
 return Series(diff)

def scale(train, test):
 # Escalamos a (-1,1)
 scaler = MinMaxScaler(feature_range=(-1, 1))
 scaler = scaler.fit(train)
 # transformamos datos de entrenamiento
 train = train.reshape(train.shape[0], train.shape[1])
 train_scaled = scaler.transform(train)
 # transform datos de test
 test = test.reshape(test.shape[0], test.shape[1])
 test_scaled = scaler.transform(test)
 return scaler, train_scaled, test_scaled

def forecast_lstm(model, batch_size, X):
 X = X.reshape(1, 1, len(X))
 yhat = model.predict(X, batch_size=batch_size)
 return yhat[0,0]

def invert_scale(scaler, X, value):
 new_row = [x for x in X] + [value]
 array = numpy.array(new_row)
 array = array.reshape(1, len(array))
 inverted = scaler.inverse_transform(array)
 return inverted[0, -1]

def inverse_difference(history, yhat, interval=1):
 return yhat + history[-interval]

In [None]:
# Leemos los datos

filename = './shampoo.csv'

series = read_csv(filename, header=0, parse_dates=[0], index_col=0)
series  =series.squeeze('columns')
print(series.head())

series.plot()
pyplot.show()

In [35]:
# Transformar a datos estacionarios
raw_values = series.values
diff_values = difference(raw_values, 1)

# Transformar datos con salida esperada
supervised = timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values

train, test = supervised_values[0:-12], supervised_values[-12:]

scaler, train_scaled, test_scaled = scale(train, test)

X, y = train[:, 0:-1], train[:, -1]
X = X.reshape(X.shape[0], 1, X.shape[1])

In [37]:
batch_size = 1
neurons = 4

model = Sequential()
model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
epochs = 30

model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=0, shuffle=False)

In [None]:
predictions = list()
for i in range(len(test_scaled)):
 X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
 yhat = forecast_lstm(model, 1, X)
 # invert scaling
 yhat = invert_scale(scaler, X, yhat)
 # invert differencing
 yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
 # store forecast
 predictions.append(yhat)
 expected = raw_values[len(train) + i + 1]
 print('Mes=%d, Predicción=%f, Esperado=%f' % (i+1, yhat, expected))

In [None]:
rmse = sqrt(mean_squared_error(raw_values[-12:], predictions))
print('Test RMSE: %.3f' % rmse)
# Observed vs. predicción
pyplot.plot(raw_values[-12:])
pyplot.plot(predictions)
pyplot.show()