# Goals
1. predict for every store individually
- make stationary target by diff, yes/no?
- z score normalization on train data
- predict next 16 values directly vs recursively?
2. predict store individually but with every pair/family as parameter
- needs zscore normalization
- stationary target yes/no?
3. predict all store/family pairs simultaneously
- zscore? maybe not needed
- stationary?

features:
1. time features:
- linear timestamp
- sin/cos of year, check for (week/month) if pattern present
- encoding of weekday, maybe also month
2. oil/holidays/location should be ok


In [None]:
import tensorflow as tf
tf.random.set_seed(42)
import numpy as np
np.random.seed(42)

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from datetime import datetime
import plotly.offline as pyo
from plotly import subplots
import plotly.graph_objects as go
import statsmodels.api as sm
import statsmodels.tsa.api as smt

from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_log_error


from keras.models import Model
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import LSTM, Dense,Input,concatenate
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras import regularizers


from baseFunctions import *
from data_helpers import processData6

In [None]:
data, propDicts, flippedPropDicts = processData6()

# feature engineering

aggregated data
- there is some linear trend 

In [None]:
dailyData = data.groupby('date')['sales'].sum()
dec = sm.tsa.seasonal_decompose(dailyData,period = 12, model = 'additive').plot()
plt.show()

In [None]:
plot_periodogram(dailyData, 365, n_domFreq=30)

# strong frequencies     => TimePeriod 
# 52 (weekly) 365/52     = 7 days
# 24 (biweekly) 365/24   = 15 days (half-month)
# 104 (halfweek) 365/104 = 3.5 = 3.5 days 
# 12 (monthly)  365/12   = 30 days
# 6 (bimonthly)          = 60 days
# 4 (quarters)           = 90 days
# 3 (thirds)             = 120 days
# 2 (half-year)          = 182
# 1 (yearly)             = 365 

In [None]:
data1 = data.copy()

# add linear time
data1['linear_time'] = (data1['date'] - data1['date'].iloc[0]).dt.days +1
data1['day_of_year'] = data1['date'].dt.day_of_year

data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=1, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=2, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=3, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=4, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=6, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=12, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=104, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=24, feature='day_of_year', referenceTimespan = 365)
data1, periodicfeat = addFourierFeature(data1, n_splits = 6, frequency=52, feature='day_of_year', referenceTimespan = 365)

data1['weekday'] = data1['date'].dt.weekday
data1['month'] = data1['date'].dt.month


# individual prediction

In [None]:
trainF = [
       #'store_nbr', 'family', 
       #'sales',
       'onpromotion', 
       #'dataT',
       #'city', 'state', 'type', 'cluster',
       'dcoilwtico', 'holidayType',
       'description', 'transferred', 
       'linear_time', 'day_of_year',
       'day_of_year_f1_0', 'day_of_year_f1_60', 'day_of_year_f1_120',
       'day_of_year_f1_180', 'day_of_year_f1_240', 'day_of_year_f1_300',
       'day_of_year_f2_0', 'day_of_year_f2_60', 'day_of_year_f2_120',
       'day_of_year_f2_180', 'day_of_year_f2_240', 'day_of_year_f2_300',
       'day_of_year_f3_0', 'day_of_year_f3_60', 'day_of_year_f3_120',
       'day_of_year_f3_180', 'day_of_year_f3_240', 'day_of_year_f3_300',
       'day_of_year_f4_0', 'day_of_year_f4_60', 'day_of_year_f4_120',
       'day_of_year_f4_180', 'day_of_year_f4_240', 'day_of_year_f4_300',
       'day_of_year_f6_0', 'day_of_year_f6_60', 'day_of_year_f6_120',
       'day_of_year_f6_180', 'day_of_year_f6_240', 'day_of_year_f6_300',
       'day_of_year_f12_0', 'day_of_year_f12_60', 'day_of_year_f12_120',
       'day_of_year_f12_180', 'day_of_year_f12_240', 'day_of_year_f12_300',
       'day_of_year_f104_0', 'day_of_year_f104_60', 'day_of_year_f104_120',
       'day_of_year_f104_180', 'day_of_year_f104_240', 'day_of_year_f104_300',
       'day_of_year_f24_0', 'day_of_year_f24_60', 'day_of_year_f24_120',
       'day_of_year_f24_180', 'day_of_year_f24_240', 'day_of_year_f24_300',
       'day_of_year_f52_0', 'day_of_year_f52_60', 'day_of_year_f52_120',
       'day_of_year_f52_180', 'day_of_year_f52_240', 'day_of_year_f52_300',
       'weekday', 'month'
       ]

train0 = trainF + ['sales']

In [None]:
train = data1.loc[(data1.dataT == 'train') & (data1.store_nbr == 1) & (data1.family == 3)] # family 18

n_predictedValues = 16
look_back = 100

sequence0 = []
sequence1 = []
labels = []

for i in range(train.shape[0]-look_back-n_predictedValues):
    startS0 = i
    endS0 = startS0 + look_back
    endS1 = endS0 + n_predictedValues
    sequence0.append(train[train0].iloc[startS0:endS0])
    sequence1.append(train[trainF].iloc[endS0:endS1])
    labels.append(train['sales'].iloc[endS0:endS1])
sequence0, sequence1, labels = np.array(sequence0), np.array(sequence1), np.array(labels)



tf.keras.utils.set_random_seed(42)

n_features = len(train0)

input1 = Input(shape=(look_back, n_features))
input2 = Input(shape=(n_predictedValues, n_features-1))

lstm1 = LSTM(64, activation='relu', return_sequences=True, kernel_regularizer=regularizers.l2(0.001))(input1)
lstm2 = LSTM(64, activation='relu', return_sequences=True, kernel_regularizer=regularizers.l2(0.001))(input2)

lstm1 = LSTM(64, activation='relu', return_sequences=False, kernel_regularizer=regularizers.l2(0.001))(lstm1)
lstm2 = LSTM(64, activation='relu', return_sequences=False, kernel_regularizer=regularizers.l2(0.001))(lstm2)

#lstm2 = Dense(n_predictedValues, activation='relu')(lstm2)
x = tf.keras.layers.concatenate([lstm1, lstm2])
x = Dense(128, activation='relu')(x)
output = Dense(n_predictedValues, activation='relu')(x)

# Define the model
model = Model(inputs=[input1, input2], outputs=output)

# Compile and train the model
model.compile(optimizer='adam', loss=tf.keras.losses.MSLE, metrics=['mae']) 

n_splits = 10
tscv = TimeSeriesSplit(n_splits=n_splits)

for train_index, test_index in tscv.split(sequence0):
    X_train = [sequence0[train_index],sequence1[train_index]]
    X_test  = [sequence0[test_index], sequence1[test_index]]
    y_train, y_test = labels[train_index], labels[test_index]

    model.fit(X_train, y_train, epochs=5, batch_size=32,validation_data=(X_test, y_test))

forecast = model.predict(X_train, verbose=False)
rmsleTrain = np.sqrt(mean_squared_log_error(forecast, y_train))
forecast = model.predict(X_test, verbose=False)
rmsleTest = np.sqrt(mean_squared_log_error(forecast, y_test))
print('errors:  ', rmsleTrain, rmsleTest,X_train[0].shape)

In [None]:
forecast = model.predict(X_train, verbose=False)
rmsleTrain = np.sqrt(mean_squared_log_error(forecast, y_train))
forecast = model.predict(X_test, verbose=False)
rmsleTest = np.sqrt(mean_squared_log_error(forecast, y_test))
print('errors:  ', rmsleTrain, rmsleTest, X_train[0].shape)

# predict in one big dataframe

In [None]:
grouped = (data1.loc[data1.dataT == 'train'].pivot(index='date', columns=['store_nbr', 'family']))#.transpose#()