Make use of text in the item names.  Ths notebook does not adjust for seasonality.

In [None]:
import pandas as pd
import os

PATH_CSV = '/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast'

df_sales = pd.read_csv(os.path.join(PATH_CSV,"sales_train.csv"))
df_items = pd.read_csv(os.path.join(PATH_CSV,"items.csv"))
df_resturant = pd.read_csv(os.path.join(PATH_CSV,"resturants.csv"))

df_sales.date = pd.to_datetime(df_sales.date, errors='coerce') 

## Load the Glove Embeddings

In [None]:
!wget -c "https://nlp.stanford.edu/data/glove.6B.zip"
!unzip glove.6B.zip

In [None]:
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec

glove_file = 'glove.6B.300d.txt'
tmp_file = get_tmpfile("test_word2vec.txt")
_ = glove2word2vec(glove_file, tmp_file)
w2vec_model = KeyedVectors.load_word2vec_format(tmp_file)

## NLP Demo

In [None]:
w2vec_model.most_similar(positive=['woman', 'king'], negative=['man'])

In [None]:
w2vec_model['dog'].shape

Perform NLP Prediction

In [None]:
def process_title(model, name):
  v = None
  i = 0
  for word in name.split(' '):
    word = word.lower()
    if word == 'vegi': word = "vegetable"
    if word == 'smoothy': word = "malt"
    i+=1
    if v is None and word in model:
      v=model[word].copy()
    elif word in model:
      v+=model[word]
  v/=i
  return v

item_lookup = {}
for i, name in zip(list(df_items.id),list(df_items.name)):
  v = process_title(w2vec_model,name)
  item_lookup[i] = v

#r = process_title(w2vec_model, 'breaded fish with vegetables meal')
#print(r)
print(len(item_lookup))

Utility function to create sequences.

In [None]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    # Current timestep (t=0)
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    # Target timestep (t=lag)
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # Drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

Join the items and sales tables so that we can look up the store id for each item.

In [None]:
df_items2 = df_items[['id','store_id']]
df_train = df_sales.merge(df_items2,left_on='item_id',right_on='id')
df_train[['date','item_id','item_count','store_id']]

df_train = df_train.sort_values('date').groupby(['item_id', 'store_id', 'date'], as_index=False)
df_train = df_train.agg({'item_count':['mean']})
df_train.columns = ['item', 'store', 'date', 'sales']
df_train.head()

In [None]:
df_train['dow'] = df_train['date'].dt.dayofweek
df_train['doy'] = df_train['date'].dt.dayofyear

df_train

Build the sequence data.

In [None]:
window = 29
future_span = 30
series = series_to_supervised(df_train.drop('date', axis=1), window=window, lag=future_span)
series.head()

Remove sequences that did not have enough data.

In [None]:

# Remove edge cases, where there were not enough values to complete a series
last_item = 'item(t-%d)' % window
last_store = 'store(t-%d)' % window
last_dow = 'dow(t-%d)' % window
last_doy = 'doy(t-%d)' % window
series = series[(series['store(t)'] == series[last_store])]
series = series[(series['item(t)'] == series[last_item])]
#series = series[(series['dow(t)'] == series[last_dow])]
#series = series[(series['doy(t)'] == series[last_doy])]

We will predict with sales, and our engineered features.

In [None]:
import numpy as np
from keras.utils.np_utils import to_categorical   

def drop_column(df, col):
  columns_to_drop = [('%s(t+%d)' % (col, future_span))]
  for i in range(window, 0, -1):
      columns_to_drop += [('%s(t-%d)' % (col, i))]
  df.drop(columns_to_drop, axis=1, inplace=True, errors='ignore')
  df.drop([f"{col}(t)"], axis=1, inplace=True, errors='ignore')

def cat_seq(df, col):
  return to_categorical(df[col].values)

# Label
labels_col = 'sales(t+%d)' % future_span
labels = series[labels_col]
series.drop(labels_col, axis=1, inplace=True)
series.drop('item(t+%d)' % future_span, axis=1, inplace=True)
series.drop('store(t+%d)' % future_span, axis=1, inplace=True)
series.drop('dow(t+%d)' % future_span, axis=1, inplace=True)
series.drop('doy(t+%d)' % future_span, axis=1, inplace=True)

# Get sales sequences
series2 = series.copy()
drop_column(series2, "item")
drop_column(series2, "store")
drop_column(series2, "dow")
drop_column(series2, "doy")
sales_series = series2.values

# Day of week as a number
series2 = series.copy()
drop_column(series2, "item")
drop_column(series2, "store")
drop_column(series2, "doy")
drop_column(series2, "sales")
dow_series = series2.values

# Get day of year sequences
series2 = series.copy()
drop_column(series2, "item")
drop_column(series2, "store")
drop_column(series2, "dow")
drop_column(series2, "sales")
doy_series = series2.values

# Day of year
t1 = sales_series.reshape(sales_series.shape + (1,))
t2 = dow_series.reshape(dow_series.shape + (1,)) 
t3 = doy_series.reshape(doy_series.shape + (1,))

# Create predictors (x)
vec_size = w2vec_model['test'].shape[0]

lst = []
for item in list(series['item(t-1)']):
  lst.append(item_lookup[item])

x1 = np.concatenate([t1,t2,t3],axis=2)
x2 = np.concatenate(lst).reshape((series.shape[0],vec_size))

x = [x1,x2]

In [None]:
print(t1.shape)
print(t2.shape)
print(t3.shape)

Extract the predictors (x sequences) and the label (future prediction)

In [None]:
TEST_SIZE = 0.4

mask = np.random.random(size=x[0].shape[0])<TEST_SIZE

X_train = []
X_valid = []

for subx in x:
  X_train.append(subx[~mask])
  X_valid.append(subx[mask])

Y_train = labels.values[~mask]
Y_valid = labels.values[mask]

print('Train set shape x1:', X_train[0].shape)
print('Train set shape x2:', X_train[1].shape)
print('Validation set shape x1:', X_valid[0].shape)
print('Validation set shape x2:', X_valid[1].shape)

Construct the neural network.

In [None]:
import tensorflow as tf 
from keras.models import Sequential, Model
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed, Flatten, Dropout, concatenate, Input
import keras

epochs = 500
batch = 256
lr = 0.0003
adam = tf.keras.optimizers.Adam(lr)

model = Sequential()

A1 = Input(shape=(X_train[0].shape[1], X_train[0].shape[2]),name='A1')
A2 = Conv1D(filters=64, kernel_size=8, activation='relu')(A1)
A3 = MaxPooling1D(pool_size=2)(A2)
A4 = Flatten()(A3)
A5 = Dense(50, activation='relu')(A4)
A6 = Dropout(0.2)(A5)

B1 = Input(shape=X_train[1].shape[1],name='B1')
B2 = Dense(16, activation='relu',name='B2')(B1)

M1 = concatenate([A6,B2])
M2 = Dense(1,name='M2')(M1)

model = Model(inputs=[A1, B1],outputs=[M2])
model.compile(loss='mse', optimizer=adam)
model.summary()

Fit the neural network.

In [None]:
from keras.callbacks import EarlyStopping

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, 
        verbose=1, mode='auto', restore_best_weights=True)

cnn_history = model.fit(X_train, Y_train, callbacks=[monitor],
    validation_data=(X_valid, Y_valid), epochs=epochs, verbose=2)

Predict and evaluate the validation data.

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

cnn_train_pred = model.predict(X_train)
cnn_valid_pred = model.predict(X_valid)
print('Train rmse:', np.sqrt(mean_squared_error(Y_train, cnn_train_pred)))
print('Validation rmse:', np.sqrt(mean_squared_error(Y_valid, cnn_valid_pred)))

Plot the training curve.

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure()
plt.plot(cnn_history.history['loss'], label='Train loss')
plt.plot(cnn_history.history['val_loss'], label='Validation loss')
fig.legend()
fig.suptitle('CNN')
plt.xlabel("Epochs")
plt.ylabel("MSE")

plt.show()