In [5]:
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

Using Theano backend.


In [2]:
np.random.seed(7)

In [16]:
def read_data(path):
    df = pd.read_csv(path, parse_dates=['FECHA'], infer_datetime_format=True)
    df['CPRECIO'] = df[' CPRECIO '].map(lambda x: x.strip().replace(",", ""))
    df['CPRECIO'] = df['CPRECIO'].convert_objects(convert_numeric=True)
    df['COSTOPESOS'] = df[' COSTOPESOS ']
    df = df.drop([' CPRECIO ', ' COSTOPESOS '], axis=1)
    cols = df.columns.values 
    cols[-3] = "YEAR"
    df.columns = cols
    return df

def read_test_data(path):
    df = pd.read_csv(path, parse_dates=['FECHA'], infer_datetime_format=True)
    df['CPRECIO'] = df[' CPRECIO ']
    df['COSTOPESOS'] = df[' COSTOPESOS ']
    df = df.drop([' CPRECIO ', ' COSTOPESOS '], axis=1)
    cols = df.columns.values 
    cols[-3] = "YEAR"
    df.columns = cols
    return df

def calculate_extra_cols(df):
    df['total_price'] =  df['CPRECIO'] * df['#UNIDADES'] * df['CTIPOCAM01']
    return df

In [None]:
df = read_data('./BASEVENTAS2010A2015.csv')
df = calculate_extra_cols(df)

# Cleanup all the spaces
df["MARCA"] = df["MARCA"].map(lambda x: x.strip())
df["IDPRODUCTO"] = df["IDPRODUCTO"].map(lambda x: x.strip())

In [18]:
df = df[df["IDPRODUCTO"] == "25967"]
df = df.set_index("FECHA")
ts = df["#UNIDADES"]
ts = ts.resample("W").mean().sort_index().fillna(0)
ts = ts["2013":]

In [275]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(ts.values.reshape(-1, 1))
# dataset = scaler.fit_transform(np.arange(20).reshape(-1, 1))

In [276]:
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [281]:
# convert an array of values into a dataset matrix
# def create_dataset(dataset, look_back=1):
#     dataX, dataY = [], []
#     for i in range(len(dataset)-look_back-1):
#         a = dataset[i:(i + look_back), 0]
#         dataX.append(a)
#         dataY.append(dataset[i + look_back, 0])
#     return np.array(dataX), np.array(dataY)

def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    start = len(dataset) - look_back - 1 - look_back
#     print start
    for i in range(start):
        a = dataset[i + 2:(i + look_back + 2), 0]
#         print "i", i, "i+lb", i + look_back 
        dataX.append(a)
        dataY.append(dataset[i + look_back + 2: (i + look_back*2) + 2, 0])
    return np.array(dataX), np.array(dataY)

# reshape into X=t and Y=t+1
look_back = 4
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [None]:
# create and fit the LSTM network
batch_size = 1
model = Sequential()
model.add(LSTM(5, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True))
model.add(LSTM(5, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dense(4))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, nb_epoch=200, batch_size=1, verbose=2)

In [None]:
trainScore = model.evaluate(trainX, trainY, batch_size=batch_size, verbose=0)
model.reset_states()
print('Train Score: ', scaler.inverse_transform(np.array([[trainScore]])))

testScore = model.evaluate(testX, testY, batch_size=batch_size, verbose=0)
model.reset_states()
print('Test Score: ', scaler.inverse_transform(np.array([[testScore]])))

In [None]:
trainPredict = model.predict(trainX, batch_size=batch_size)
model.reset_states()
testPredict = model.predict(testX, batch_size=batch_size)
model.reset_states()

# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict

In [None]:
# plot baseline and predictions
plt.figure(figsize=(12,8))
plt.plot(dataset, alpha=0.5)
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)

In [None]:
mydata = np.arange(22).reshape(-1,1)
# def create_dataset(dataset, look_back=1):
#     dataX, dataY = [], []
#     for i in range(len(dataset)-look_back-1):
#         a = dataset[i:(i + look_back), 0]
#         dataX.append(a)
#         dataY.append(dataset[i + look_back, 0])
#     return np.array(dataX), np.array(dataY)
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    start = len(dataset) - look_back - 1 - look_back
    print start
    for i in range(start):
        a = dataset[i + 2:(i + look_back + 2), 0]
        print "i", i, "i+lb", i + look_back 
        dataX.append(a)
        dataY.append(dataset[i + look_back + 2: (i + look_back*2) + 2, 0])
    return np.array(dataX), np.array(dataY)
trainX, trainY = create_dataset(mydata.reshape(-1,1), look_back=1)

In [None]:
train_size = int(len(mydata) * 0.67)
test_size = len(mydata) - train_size
train, test = mydata[0:train_size,:], mydata[train_size:len(dataset),:]
print train

In [None]:
trainX, trainY = create_dataset(train, 2)
testX, testY = create_dataset(test, 2)
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [None]:
print trainX
print trainY