In [1]:
# import initializer
import kernel_init

In [2]:
# import packages
from django.db.models import Sum, Q
from datetime import timedelta, datetime
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from purchasing.models import *
from purchasing.serializers import *

tf.random.set_seed(123)

from tensorflow.python.client import device_lib
print([x.name for x in device_lib.list_local_devices()])
print([gpu for gpu in tf.config.experimental.list_physical_devices('GPU')])
print(tf.config.get_visible_devices())


['/device:CPU:0']
[]
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [3]:
# query requested dates and total daily costs
qs = Purchase.objects.filter(~Q(requested=None)).values('requested').order_by('requested').annotate(daily_total=Sum('total_cost'))

# extract daily total costs values
dtc = [float(row['daily_total']) for row in qs]
dates = [d['requested'] for d in qs]

# sum each value n with sum(n-1) values
dt = []
running_total = 0
for i in range(len(dtc)):
    running_total += dtc[i]
    dt.append(running_total)


# normalize between 0 --> 1
#dt_norm = (dt - np.min(dt)) / (np.max(dt) - np.min(dt))

# split into training/validation datasets
split = int(len(dt) * 0.8)

dt_train = dt[:split]
dt_val = dt[split:-1]

print(f"Training samples: {len(dt_train)}")
print(f"Validation samples: {len(dt_val)}")

Training samples: 1405
Validation samples: 351


In [4]:
WINDOW = 30  # look-back
HORIZON = 1  # look-forward

def create_sequences(data, window, horizon):
    """
    Generate data sequences.
    """
    x, y = [], []

    for i in range(len(data)):
        x_end = i + window

        if x_end > len(data) - 1:
            break

        x.append(data[i:x_end])
        y.append(data[x_end])
    
    x = np.expand_dims(x, axis=1)

    print(f"x shape= {np.shape(x)}  y shape= {np.shape(y)}")
    return x, np.asarray(y)

x_train, y_train = create_sequences(dt_train, WINDOW, HORIZON)
x_val, y_val = create_sequences(dt_val, WINDOW, HORIZON)

x shape= (1375, 1, 30)  y shape= (1375,)
x shape= (321, 1, 30)  y shape= (321,)


In [5]:
# add checkpoint callback
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    './purchase_lstm_forecast_model.h5',
    verbose=0,
    save_best=True,
    save_weights_only=False,
    save_best_only=True,
    mode='min'
)

In [6]:
# construct the LSTM model
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(1, WINDOW), activation='relu'),
    #tf.keras.layers.LSTM(128, activation='relu'),
    #tf.keras.layers.Dense(64, activation='relu'),
    #tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(HORIZON)
])

# compile model
model.compile(loss='mae', optimizer='adam', metrics=['mae', 'mse'])

model.summary()

history = model.fit(x_train,
                    y_train,
                    epochs=500,
                    batch_size=32,
                    validation_data=(x_val, y_val),
                    callbacks=[checkpoint])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               81408     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 81,537
Trainable params: 81,537
Non-trainable params: 0
_________________________________________________________________
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Ep

In [7]:
trained_model = tf.keras.models.load_model('./purchase_lstm_forecast_model.h5')

predictions = []
print(np.shape(x_train[0]))
# predict on training data
for val in tqdm(x_train):
    yhat = trained_model.predict(np.expand_dims(val, axis=0), verbose=0)
    predictions.append(yhat)

# predict on validation data
for val in tqdm(x_val):
    yhat = trained_model.predict(np.expand_dims(val, axis=0), verbose=0)
    predictions.append(yhat)

predictions = [p[0][0] for p in predictions]

(1, 30)


100%|██████████| 1375/1375 [00:56<00:00, 24.36it/s]
100%|██████████| 321/321 [00:12<00:00, 24.97it/s]


In [None]:
# create forecast
forecast = []
forecast_dates = []

# predict on the last values in validation data, generate initial forecast
for i in tqdm(range(WINDOW)):

    xvals = x_val[-1][0][i:]

    for j in range(len(forecast)):
        xvals = np.insert(xvals, j, forecast[j])
    xvals = np.expand_dims(xvals, axis=0)

    yhat = trained_model.predict(np.expand_dims(xvals, axis=0), verbose=0)
    forecast.append(yhat[0,0])

forecast_dates.append(dates[-1] + timedelta(days=1))
for i in range(WINDOW-1):
    forecast_dates.append(forecast_dates[-1] + timedelta(days=1))

for i in tqdm(range(365)):
    # take last N previous forecast values
    prev_vals = forecast[-WINDOW:]
    prev_vals = np.expand_dims(prev_vals, axis=0)


    yhat = trained_model.predict(np.expand_dims(prev_vals, axis=0), verbose=0)
    forecast.append(yhat[0,0])
    forecast_dates.append(forecast_dates[-1] + timedelta(days=1))

In [None]:
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

fig = go.Figure()
fig.add_trace(go.Scatter(x=dates, y=dt, name='Actual', line=dict(color='#1f77b4')))
fig.add_trace(
    go.Scatter(x=dates[WINDOW:], y=predictions, name='Predicted', line=dict(color='#d62728', dash="dash")))
fig.add_trace(
    go.Scatter(x=forecast_dates, y=forecast, name='Forecast', line=dict(color='#2ca02c', dash="dash")))

fig.update_layout(
    template='plotly_dark',
    title={'text': "TSW Cumulative Spending", 'y':0.9,'x':0.5,'xanchor': 'center','yanchor': 'top'},
    xaxis_title="Date",
    yaxis_title="Total Spent ($)")

fig.show()

In [None]:
# calculate RMSE
actual = np.asarray(np.concatenate((y_train, y_val)))
predicted = np.asarray(predictions)

MSE = np.mean((predicted - actual)**2)
RMSE = np.sqrt(MSE)
print('mse -->', MSE)
print('rmse -->', RMSE)

In [None]:
# export the saved model
import os
import tempfile

MODEL_DIR = tempfile.gettempdir()
VERSION = 1

export_path = os.path.join(MODEL_DIR, f"purchase_lstm_model_v{VERSION}")
print(f"Export model to ---> {export_path}")

tf.keras.models.save_model(
    trained_model,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)
