# Prep


In [None]:
!pip install nbeats-keras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nbeats-keras
  Downloading nbeats_keras-1.8.0-py3-none-any.whl (7.3 kB)
Collecting protobuf<=3.20
  Downloading protobuf-3.20.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
Collecting keract
  Downloading keract-4.5.1-py3-none-any.whl (12 kB)
Collecting tensorflow
  Downloading tensorflow-2.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m588.3/588.3 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<=3.20
  Downloading protobuf-3.19.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m53.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorboar

In [None]:
import warnings
import numpy as np
import pandas as pd
from nbeats_keras.model import NBeatsNet as NBeatsKeras

In [None]:
# read in and prepare data
# temp = pd.read_csv("drive/MyDrive/final_sales.csv", parse_dates=["DATE"])
temp = pd.read_csv("final_sales.csv", parse_dates=["DATE"])
temp_dropped = temp.drop(['Unnamed: 0'], axis=1).drop_duplicates()
temp_dropped["DATE"] = pd.to_datetime(temp_dropped["DATE"])

cols = temp_dropped.columns.to_list()

temp_grouped = temp_dropped.groupby(cols).size().reset_index(name="count")
# store final df as tire_sales else notify if duplicates exist
if temp_grouped.query("count > 1").empty: tire_sales = temp_grouped.drop(["count"], axis=1).set_index("DATE") # drop count col
else: print("ERROR - df contains duplicates")

In [None]:
tire_sales.head(3)

Unnamed: 0_level_0,STORE_ID,TRAN_ID,ARTICLE_ID,INDIV_ID,VEHICLE_ID,UNITS,SALES
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-08-27,27,880109810,97980,251674389.0,420737946,4.0,508.52
2015-08-29,27,880109830,97793,255394873.0,420739446,4.0,438.6
2017-02-13,27,880111400,97759,272208431.0,420738203,2.0,206.4


In [None]:
tire_sales = tire_sales.astype({'ARTICLE_ID':'int'})

In [None]:
# get article ids for looping & set for testing
aIDs = tire_sales["ARTICLE_ID"].unique()
saIDs = set(aIDs)

In [None]:
dfs_split_units = {}

for article in aIDs:
    # filter by article & sum units based on date
    temp_new = tire_sales.query(f"ARTICLE_ID == {article}")["UNITS"].groupby("DATE").sum()
    temp_final = temp_new.to_frame()
    dfs_split_units.update({f"{article}": temp_final})

In [None]:
sample = np.array(dfs_split_units["97759"]["UNITS"])

In [None]:
tire_sales

Unnamed: 0_level_0,STORE_ID,TRAN_ID,ARTICLE_ID,INDIV_ID,VEHICLE_ID,UNITS,SALES
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-08-27,27,880109810,97980,251674389.0,420737946,4.0,508.52
2015-08-29,27,880109830,97793,255394873.0,420739446,4.0,438.60
2017-02-13,27,880111400,97759,272208431.0,420738203,2.0,206.40
2017-07-11,27,880111790,15505,286667070.0,420738774,1.0,110.07
2017-11-11,27,880112220,1408,257299213.0,420737715,4.0,324.16
...,...,...,...,...,...,...,...
2018-10-28,794381,990794910,96161,560751647.0,979462746,1.0,54.99
2018-10-28,794381,990794920,96161,560761006.0,979462748,1.0,54.99
2018-10-29,794381,990794990,92302,560830933.0,980863213,4.0,175.96
2018-10-29,794381,990795000,7099717,560833263.0,980863218,2.0,121.98


In [None]:
len(dfs_split_units)

435

# nbeats-keras

In [None]:
sample.size

1298

In [None]:
# Set the input sequence length and output sequence length
input_seq_length = 30
output_seq_length = 1

# Set the step size for the sliding window
step_size = 21

# Initialize empty arrays for x_train, y_train, x_test, and y_test
x_train, y_train, x_test, y_test = [], [], [], []

# Loop through the time series data and create input sequences using the sliding window approach
for i in range(len(sample) - input_seq_length - output_seq_length):
    # Define the start and end indices for the input and output sequences
    start = i
    end = i + input_seq_length
    
    # Define the start and end indices for the output sequence
    output_start = end
    output_end = output_start + output_seq_length
    
    # Extract the input and output sequences
    sequence = sample[start:end]
    output = sample[output_start:output_end]
    
    # Append the input and output sequences to the appropriate arrays
    if i % step_size == 0:
        x_test.append(sequence)
        y_test.append(output)
    else:
        x_train.append(sequence)
        y_train.append(output)

# Convert the arrays to numpy arrays
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

# Print the shapes of the arrays
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

x_train shape: (1206, 30)
y_train shape: (1206, 1)
x_test shape: (61, 30)
y_test shape: (61, 1)


In [None]:
def train_test_split(data, input_seq_length, output_seq_length, num_test_samples):
  # Initialize empty arrays for x_train, y_train, x_test, and y_test
  x_train, y_train, x_test, y_test = [], [], [], []

  # Calculate the step size for the sliding window
  step_size = (len(data) - input_seq_length - output_seq_length - num_test_samples) // (num_test_samples - 1)

  # Loop through the time series data and create input sequences using the sliding window approach
  for i in range(len(data) - input_seq_length - output_seq_length):
      # Define the start and end indices for the input and output sequences
      start = i
      end = i + input_seq_length
      
      # Define the start and end indices for the output sequence
      output_start = end
      output_end = output_start + output_seq_length
      
      # Extract the input and output sequences
      sequence = data[start:end]
      output = data[output_start:output_end]
      
      # Append the input and output sequences to the appropriate arrays
      if len(x_test) < num_test_samples and i % step_size == 0:
          x_test.append(sequence)
          y_test.append(output)
      else:
          x_train.append(sequence)
          y_train.append(output)

  # Convert the arrays to numpy arrays
  x_train = np.array(x_train)
  y_train = np.array(y_train)
  x_test = np.array(x_test)
  y_test = np.array(y_test)

  return(x_train, y_train, x_test, y_test)

In [None]:
time_steps = 30
output_steps = 1
test_size = len(x_test)

In [None]:
import random
sample_10 = dict(random.sample(dfs_split_units.items(), 10))

since Python 3.9 and will be removed in a subsequent version.
  sample_10 = dict(random.sample(dfs_split_units.items(), 10))


In [None]:
sep_predictions = {}
oct_predictions = {}
for article in dfs_split_units.keys():
  x_train, y_train, x_test, y_test = train_test_split(dfs_split_units[article], input_seq_length = 30, output_seq_length = 1, num_test_samples = 61)
  # Print the shapes of the arrays
  print("x_train shape:", x_train.shape)
  print("y_train shape:", y_train.shape)
  print("x_test shape:", x_test.shape)
  print("y_test shape:", y_test.shape)
  time_steps, output_steps = 30, 1
  model_keras = NBeatsKeras(backcast_length=time_steps, forecast_length=output_steps,
                          stack_types=(NBeatsKeras.GENERIC_BLOCK, NBeatsKeras.GENERIC_BLOCK),
                          nb_blocks_per_stack=2, thetas_dim=(4, 4), share_weights_in_stack=True,
                          hidden_layer_units=64)
  model_keras.compile(loss='mae', optimizer='adam')
  model_keras.fit(x_train, y_train, epochs=20, batch_size=128)
  predictions_keras_forecast = model_keras.predict(x_test)
  np.testing.assert_equal(predictions_keras_forecast.shape, (test_size, model_keras.forecast_length, output_steps))
  # Save the first 30 predictions sum to sep_predictions
  sep_sum = np.sum(predictions_keras_forecast[:30])
  sep_predictions[article] = sep_sum

  # Save the last 31 predictions sum to oct_predictions
  oct_sum = np.sum(predictions_keras_forecast[30:])
  oct_predictions[article] = oct_sum
  # predictions[article] = predictions_keras_forecast

x_train shape: (1206, 30, 1)
y_train shape: (1206, 1, 1)
x_test shape: (61, 30, 1)
y_test shape: (61, 1, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
x_train shape: (1205, 30, 1)
y_train shape: (1205, 1, 1)
x_test shape: (61, 30, 1)
y_test shape: (61, 1, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
x_train shape: (1206, 30, 1)
y_train shape: (1206, 1, 1)
x_test shape: (61, 30, 1)
y_test shape: (61, 1, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 1



x_train shape: (885, 30, 1)
y_train shape: (885, 1, 1)
x_test shape: (61, 30, 1)
y_test shape: (61, 1, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 20/20
x_train shape: (400, 30, 1)
y_train shape: (400, 1, 1)
x_test shape: (61, 30, 1)
y_test shape: (61, 1, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
x_train shape: (734, 30, 1)
y_train shape: (734, 1, 1)
x_test shape: (61, 30, 1)
y_test shape: (61, 1, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
x_train shape: (1184, 30, 1)
y_train shape: (1184, 1, 1)
x_test shape: (61, 30, 1)
y_test shape: (61, 1, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 

In [None]:
import json

In [None]:
len(sep_predictions)

435

In [None]:
sep_predictions

{'97980': 3223.4924,
 '97793': 3126.5078,
 '97759': 8858.812,
 '15505': 6365.607,
 '1408': 2814.8015,
 '15284': 1954.9436,
 '7099616': 3445.8794,
 '26784': 2466.5781,
 '136043': 554.7965,
 '123888': 9202.781,
 '1374': 990.39087,
 '146107': 1823.1649,
 '826': 4641.247,
 '114674': 9426.5,
 '136094': 4773.78,
 '95872': 2209.7559,
 '114572': 7192.3457,
 '85876': 6041.001,
 '3429': 5583.159,
 '114589': 4535.081,
 '136128': 1462.3864,
 '3945': 4052.9614,
 '97725': 5357.7134,
 '7099618': 6476.786,
 '122817': 2265.19,
 '831': 2455.9946,
 '122630': 6923.9688,
 '106497': 2833.2808,
 '830': 1989.1594,
 '11680': 1054.7239,
 '189701': 803.5467,
 '97946': 6708.9116,
 '2832': 3185.2776,
 '3431': 7802.179,
 '123905': 3763.6714,
 '98099': 2773.564,
 '452': 653.8242,
 '140344': 2109.5427,
 '3724': 6061.565,
 '92319': 3885.0645,
 '136060': 2133.0337,
 '189752': 1879.108,
 '92302': 3988.1025,
 '122596': 8109.4937,
 '95804': 1726.0015,
 '106310': 1299.5598,
 '3438': 4035.7815,
 '4013': 2487.5535,
 '817': 2

In [None]:
sep_predictions_float = {key: float(value) for key, value in sep_predictions.items()}

# Save the dictionary to a JSON file
with open("sep_predictions_nbeats_v1.json", "w") as json_file:
    json.dump(sep_predictions_float, json_file)

In [None]:
oct_predictions_float = {key: float(value) for key, value in oct_predictions.items()}

# Save the dictionary to a JSON file
with open("oct_predictions_nbeats_v1.json", "w") as json_file:
    json.dump(oct_predictions_float, json_file)

In [None]:
len(oct_predictions)

420

In [None]:
# Read the JSON file and load it into a dictionary
with open("sep_predictions_nbeats_v1.json", "r") as json_file:
    data = json.load(json_file)

# Print the contents of the dictionary
data

{'97980': 3175.0419921875,
 '97793': 3620.4296875,
 '97759': 8750.13671875,
 '15505': 6172.67626953125,
 '1408': 2864.09521484375,
 '15284': 1764.736083984375,
 '7099616': 3371.05126953125,
 '26784': 2424.91943359375,
 '136043': 631.087890625,
 '123888': 9467.1630859375,
 '1374': 807.6353149414062,
 '146107': 1861.336669921875,
 '826': 4776.60302734375,
 '114674': 9565.505859375,
 '136094': 4985.3505859375,
 '95872': 1997.447509765625,
 '114572': 7060.14453125,
 '85876': 5881.130859375,
 '3429': 5579.31005859375,
 '114589': 4839.1923828125,
 '136128': 1521.0672607421875,
 '3945': 4139.58251953125,
 '97725': 5074.072265625,
 '7099618': 6622.1494140625,
 '122817': 2283.767578125,
 '831': 2403.85400390625,
 '122630': 7256.2392578125,
 '106497': 2897.533203125,
 '830': 2091.6025390625,
 '11680': 961.3426513671875,
 '189701': 779.7874145507812,
 '97946': 6741.4677734375,
 '2832': 3125.68798828125,
 '3431': 8260.0361328125,
 '123905': 4332.3701171875,
 '98099': 2774.463134765625,
 '452': 683

In [None]:
model_keras = NBeatsKeras(backcast_length=time_steps, forecast_length=output_steps,
                          stack_types=(NBeatsKeras.GENERIC_BLOCK, NBeatsKeras.GENERIC_BLOCK),
                          nb_blocks_per_stack=2, thetas_dim=(4, 4), share_weights_in_stack=True,
                          hidden_layer_units=64)

In [None]:
model_keras.compile(loss='mae', optimizer='adam')

In [None]:
model_keras.fit(x_train, y_train, epochs=20, batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fb2fc3e97c0>

In [None]:
model_keras.save('n_beats_model.h5')

In [None]:
predictions_keras_forecast = model_keras.predict(x_test)



In [None]:
np.testing.assert_equal(predictions_keras_forecast.shape, (test_size, model_keras.forecast_length, output_steps))

In [None]:
predictions_keras_forecast

array([[[360.4608 ]],

       [[301.23315]],

       [[342.2871 ]],

       [[306.05032]],

       [[317.39606]],

       [[557.7794 ]],

       [[397.83755]],

       [[378.71625]],

       [[226.26947]],

       [[179.89369]],

       [[161.55035]],

       [[257.53595]],

       [[379.7172 ]],

       [[274.90842]],

       [[345.9582 ]],

       [[246.55396]],

       [[289.24838]],

       [[218.2739 ]],

       [[289.46457]],

       [[473.06818]],

       [[298.99887]],

       [[549.31683]],

       [[253.34076]],

       [[354.4953 ]],

       [[270.3095 ]],

       [[301.37143]],

       [[304.81256]],

       [[246.66466]],

       [[291.22574]],

       [[267.18283]],

       [[402.1438 ]],

       [[275.60788]],

       [[362.5596 ]],

       [[320.01453]],

       [[246.164  ]],

       [[290.6402 ]],

       [[268.6595 ]],

       [[176.276  ]],

       [[249.12073]],

       [[350.38022]],

       [[269.11707]],

       [[322.07446]],

       [[279.81506]],

       [[31