In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import pandas as pd
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from keras.utils import to_categorical
import tensorflow.keras.layers as k
from scipy import stats

Using TensorFlow backend.


In [3]:
# Data was generated using R so run this portion to convert RDS to pandas dataframe
pandas2ri.activate()
readRDS = robjects.r['readRDS']
data = readRDS('demand_wide.RDS')
data = pandas2ri.ri2py(data)

In [198]:
windowSize = 52
batchSize = 128
testSize = 12

In [209]:
def custom_data_gen(data, batchSize, probs, windowSize, testSize):
    while True:
        
        # Sample rows according to their scale. The larger the scale, the higher the probability
        # of sampling form that observation
        # Then randomly sample a window fixed to length windowSize / testSize
        rows = np.random.choice(range(0, data.shape[0]), size=batchSize, p=data["sampling_probability"])
        all_cols = [col for col in data if col.startswith('2')]
        start = np.random.randint(0, len(all_cols) - windowSize - testSize, size=1)
        train_cols = all_cols[int(start):int(start + windowSize)]
        test_cols = all_cols[int(start + windowSize):int(start + windowSize + testSize)]
        
        # Store number and subclass will be passed to embedding layers
        stores_train = data.iloc[rows,]["store_number"].tolist()
        stores_train = np.repeat(stores_train, windowSize)
        stores_train = np.reshape(stores_train, (batchSize, windowSize))
        
        biz_cd_train = data.iloc[rows,]["biz_cd_int"].tolist()
        biz_cd_train = np.repeat(biz_cd_train, windowSize)
        biz_cd_train = np.reshape(biz_cd_train, (batchSize, windowSize))
        
        # Scale factor is used to scale training dabiz_cdta and will be passed into model
        # to be used to rescale the network output
        scale_factor = data.iloc[rows,]["scale_factor"].tolist()
        
        week_train = [map(int, [week[-2:] for week in train_cols])]
        week_train = np.repeat(week_train, batchSize, axis=0)
        week_train = np.reshape(week_train, (batchSize, windowSize))

        # week_test = to_categorical([map(int, [week[-2:] for week in test_cols])], num_classes=53)
        # week_test = np.reshape(np.repeat(week_test, batchSize, axis=0), (batchSize, testSize, 53))
        
        x_data = data.iloc[rows,][train_cols].div(data.iloc[rows,]["scale_factor"], axis = 0).values
        x_data = np.reshape(x_data, (batchSize, windowSize, 1))
        # x_data = np.concatenate((x_data, week_train), axis=2)
        
        y_data = data.iloc[rows,][test_cols].values
        y_data = np.reshape(y_data, (batchSize, testSize, 1))
#         y_data = np.concatenate((y_data, week_test), axis=2)
        
        yield [x_data, np.array(biz_cd_train), np.array(stores_train), np.array(week_train), np.array(scale_factor), y_data], y_data

In [200]:
data.head()

Unnamed: 0,product_id,store_number,biz_cd,201601,201602,201603,201604,201605,201606,201607,...,201721,201722,201723,201724,201725,201726,total_sales,scale_factor,sampling_probability,biz_cd_int
1,1967548.0,667.0,260.15.1,0.0,2.0,3.0,0.0,2.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,131.0,2.039683,8.287731e-07,255
2,1967550.0,195.0,260.15.5,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,55.0,1.436508,5.836885e-07,259
3,1967724.0,7.0,260.10.2,0.0,0.0,0.0,2.0,2.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,119.0,1.944444,7.900755e-07,239
4,1967724.0,23.0,260.10.2,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,130.0,2.031746,8.255483e-07,239
5,1967724.0,31.0,260.10.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,43.0,1.34127,5.449909e-07,239


array([[52,  1,  2],
       [52,  1,  2]])

In [162]:
def custom_loss(y_true, y_pred):
    y_pred = y_pred + 1e-7
    y_true = y_true + 1e-7

    # Need to extract input vectors and set the shapes of tensors
    mu = tf.reshape(y_pred[:,:,0], [-1])
    alpha = tf.reshape(y_pred[:,:,1], [-1])
    scale = tf.reshape(y_pred[:,:,2], [-1])
    y_true = tf.reshape(y_true[:,:,0], [-1])

    # need to rescale mu and alpha
    mu = mu * scale
    alpha = alpha / tf.sqrt(scale)
    
    # Using tf probability to calculate log loss
    loss = tfp.distributions.NegativeBinomial(mu, alpha).log_prob(y_true)

    return -tf.reduce_sum(loss, axis=-1)

In [201]:
# Set all input layers and embeddings
input_demand = k.Input(shape = (windowSize, 1), name = 'demand')

input_week = k.Input(shape = (windowSize,), name = 'week')
week = k.Embedding(input_dim = 53, output_dim = 5)(input_week)

input_store = k.Input(shape = (windowSize,), name = 'store')
store = k.Embedding(input_dim = (int(data['store_number'].max() + 1)), output_dim = 25)(input_store)

input_biz_cd = k.Input(shape = (windowSize,), name = 'biz_cd')
biz_cd = k.Embedding(input_dim=(int(data['biz_cd_int'].max() + 1)), output_dim = 25)(input_biz_cd)

# Inputs to the decoder, y_data during training, and samples during prediciton
decoder_input = k.Input(shape=(testSize,1))

# Scale factor will be passed directly to the output
input_scale_factor = k.Input(shape = (1,), name = 'scale_factor')
scale_factor = k.RepeatVector(testSize)(input_scale_factor)

# Merge layers to pass into encoder layer
merged = k.concatenate([input_demand, store, biz_cd, week])

In [202]:
# Encoder and embedding layers
encoder = k.LSTM(units = 128, activation = 'tanh', return_sequences=True)(merged)
encoder, state_h, state_c = k.LSTM(128, return_state=True, activation = 'tanh')(encoder)
encoder_states = [state_h, state_c]

In [203]:
# Decoder LSTM with predictions
decoder_lstm = k.LSTM(128, return_sequences=True, return_state=True)
decoder, _, _ = decoder_lstm(decoder_input, initial_state=encoder_states)
predictions = k.LSTM(128, return_sequences=True, activation='tanh')(decoder)
predictions = k.TimeDistributed(k.Dense(256, activation = 'relu'))(predictions)
predictions = k.TimeDistributed(k.Dense(2, activation = 'softplus'))(predictions)
output = k.concatenate([predictions, scale_factor])

In [206]:
# Append the scale factor to the end to be used in loss function for training
model = tf.keras.models.Model([input_demand, input_biz_cd, input_store, input_week, input_scale_factor, decoder_input], outputs = output)
model.compile(optimizer='adam', loss = custom_loss)

In [210]:
train_gen = custom_data_gen(data=data, batchSize=batchSize, 
                                              probs=data["sampling_probability"].tolist(), 
                                              testSize=testSize, windowSize=windowSize)
test_gen = custom_data_gen(data=data, batchSize=1, 
                                              probs=data["sampling_probability"].tolist(), 
                                              testSize=testSize, windowSize=windowSize)

In [211]:
model.fit_generator(generator=train_gen, steps_per_epoch= data.shape[0] / batchSize, epochs=1)

Epoch 1/1

KeyboardInterrupt: 

In [138]:
test = next(test_gen)
pred = model.predict(test[0]) 
check = test[1]
check

array([[[22.],
        [ 2.],
        [ 1.],
        [ 8.],
        [ 9.],
        [ 4.],
        [ 8.],
        [ 1.],
        [13.],
        [ 8.],
        [ 4.],
        [ 0.]]])

In [139]:
mu = pred[:,:,0] * pred[:,:,2]
alpha = (pred[:,:,1] / np.sqrt(pred[:,:,2]))
mu = np.reshape(mu, -1)
alpha = np.reshape(alpha, -1)

In [141]:
mu

array([3.6005523e+00, 2.4367089e+00, 1.8613114e+00, 6.0634184e+00,
       5.8134408e+00, 4.6657891e+00, 6.6204052e+00, 1.6582885e+00,
       7.4597311e+00, 6.3818827e+00, 4.4113550e+00, 7.8171300e-04],
      dtype=float32)

In [143]:
outcomes = tfp.distributions.NegativeBinomial(mu, alpha)
sess = tf.Session()
sess.run(outcomes.sample())

array([33.,  1.,  2.,  9.,  9., 11., 16.,  4.,  4., 21.,  6.,  0.],
      dtype=float32)

In [146]:
encoder_model = tf.keras.models.Model([input_demand, encoder_states])