# Preprocessing
---  

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
# from keras.preprocessing.sequence import TimeseriesGenerator
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tensorflow.python.keras.utils.data_utils import Sequence
from tensorflow.keras.model.sequence import TimeseriesGenerator
from sklearn.decomposition import PCA

ModuleNotFoundError: No module named 'tensorflow.keras.model'

In [2]:
plt.style.use('ggplot')
plt.rc('patch', force_edgecolor=True,edgecolor='black')
plt.rc('hist', bins='auto')

In [3]:
train = pd.read_csv('wrangled_data/training_post_wrangle.csv')

In [4]:
features = ['date_block_num', 'mean_item', 'mode_item', 'item_name', 'item_category_id', 'item_category_name', 'shop_name']

target = ['item_cnt_month']

In [5]:
train.columns

Index(['shop_id', 'item_id', 'year', 'month', 'item_cnt_month',
       'date_block_num', 'mean_item', 'mode_item', 'item_name',
       'item_category_id', 'item_category_name', 'shop_name'],
      dtype='object')

In [6]:
train['date'] = pd.to_datetime(train[['year', 'month']].assign(DAY=1)).apply(lambda x: x.strftime('%Y-%m'))
train.drop(columns=['year', 'month'], inplace=True)
train.set_index(['shop_id', 'item_id', 'date'], inplace=True)
train.sort_index(inplace=True)

In [7]:
ind = train.index
X = train[features]
y = train[target]

In [8]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y)

In [9]:
TimeseriesGenerator(X, y, length=2, sampling_rate=1, batch_size=1)[0]

(array([[[0.        , 0.00215582, 0.        , 0.00135336, 0.48192771,
          0.48192771, 0.        ],
         [0.03030303, 0.00215582, 0.        , 0.00135336, 0.48192771,
          0.48192771, 0.        ]]]),
 array([[0.00967033]]))

In [10]:
X_train, X_tune, y_train, y_tune = train_test_split(X, y, test_size=0.20, random_state=123, shuffle=False)

In [58]:
win_length = 8
batch_size = 4
num_features = len(features)

train_generator = tf.keras.preprocessing.sequence.TimeseriesGenerator(X_train, y_train, length=win_length, sampling_rate=1, batch_size=batch_size)
tune_generator = tf.keras.preprocessing.sequence.TimeseriesGenerator(X_tune, y_tune, length=win_length, sampling_rate=1, batch_size=batch_size)

In [59]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128, input_shape=(win_length, num_features), return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.LSTM(128, return_sequences=True))
model.add(tf.keras.layers.LeakyReLU(alpha=0.5))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.LSTM(64, return_sequences=False))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(1))

In [60]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 8, 128)            69632     
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 8, 128)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 8, 128)            131584    
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 8, 128)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 8, 128)            0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)               

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                     patience=2,
                                                     mode='min')

model.compile(loss=tf.losses.MeanSquaredError(),
             optimizer=tf.optimizers.Adam(),
             metrics=[tf.metrics.MeanAbsoluteError()])

history = model.fit_generator(train_generator, epochs=50,
                             validation_data=tune_generator,
                             shuffle=False,
                             callbacks=[early_stopping])

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 2882335 steps, validate for 720583 steps
Epoch 1/50
  14873/2882335 [..............................] - ETA: 58:38:23 - loss: 6.1322e-07 - mean_absolute_error: 3.6419e-04

In [None]:
model.evaluate_generator(tune_generator, verbose=0)

In [None]:
predictions = model.predict_generator(tune_generator)

predictions.shape[0]