In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [6]:
df = pd.read_excel('state_month_overdose.xlsx')
df['Deaths'] = df['Deaths'].apply(lambda x: 0 if x == 'Suppressed' else int(x))
df['Month'] = pd.to_datetime(df['Month'])
df = df.groupby(['Month']).agg({'Deaths': 'sum'}).reset_index()

  df['Month'] = pd.to_datetime(df['Month'])


In [7]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset.iloc[i:(i+look_back)].values
        dataX.append(a)
        dataY.append(dataset.iloc[i + look_back]) 
    return np.array(dataX), np.array(dataY)

In [8]:
def ci_overlap_percentage(pred1, pred2, ci1, ci2):
    overlap_count = 0
    for i in range(len(pred1)):
        if (pred1[i] - ci1[i] <= pred2[i] + ci2[i]) and (pred1[i] + ci1[i] >= pred2[i] - ci2[i]):
            overlap_count += 1
    return (overlap_count / len(pred1)) * 100

In [9]:
validation_periods = [
    ('2019-11-01', '2020-01-01'), #if training will be up until 2019-12-01
    ('2019-09-01', '2020-01-01'),
    ('2019-07-01', '2020-01-01'),
    ('2019-01-01', '2020-01-01'),
    ('2018-07-01', '2020-01-01'),
    ('2018-01-01', '2020-01-01')
]

In [10]:
look_back_periods = range(1, 12, 2)  # 1, 3, 5, ..., 11 months look-back

In [11]:
def generate_forecast(model, initial_sequence, look_back, num_predictions=12):
    predictions = []
    for_model = initial_sequence
    
    for _ in range(num_predictions):
        # Generate the next prediction
        pred = model.predict(for_model)
        predictions.append(pred[0][0])
        
        # Update the input for the next prediction
        # We need to construct a new input array of the same shape as the original input
        new_input = np.append(for_model[:, 1:], pred[0][0])  # Shift and append the new prediction
        for_model = new_input.reshape((1, look_back, 1))

    return np.array(predictions)

---

In [12]:
val_start, val_end = validation_periods[0]
print(val_start)
print(val_end)

look_back = 1

2019-11-01
2020-01-01


In [13]:
train = df[df['Month'] <= val_start] #<
val = df[(df['Month'] >= val_start) & (df['Month'] <= val_end)]
test = df[df['Month'] >= '2020-01-01'] #>

In [14]:
trainX, trainY = create_dataset(train['Deaths'], look_back)
valX, valY = create_dataset(val['Deaths'], look_back)
testX, testY = create_dataset(test['Deaths'], look_back)

In [15]:
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1], 1)
valX = valX.reshape(valX.shape[0], valX.shape[1], 1)
testX = testX.reshape(testX.shape[0], testX.shape[1], 1)

LSTM

In [16]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=1)

I0000 00:00:1731613820.977810   24750 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-11-14 14:50:21.346261: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  super().__init__(**kwargs)


Epoch 1/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 13077976.0000
Epoch 2/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12357675.0000
Epoch 3/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 10448189.0000
Epoch 4/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 7794471.5000
Epoch 5/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 6114251.5000
Epoch 6/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4242643.5000
Epoch 7/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2467637.0000
Epoch 8/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1360500.7500
Epoch 9/100
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 666493.1875
Epoch 10/100
[1m58/58[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7fa4c207d960>

In [17]:
trainPredict = model.predict(trainX)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step


In [18]:
val_initial_sequence = np.array(train['Deaths'][-look_back:]).reshape((look_back, 1))
val_initial_sequence = np.array([val_initial_sequence])

valPredict = generate_forecast(model, val_initial_sequence, look_back, num_predictions = len(valX))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step


In [19]:
valPredict

array([4590.4517, 4621.0967], dtype=float32)

In [20]:
val_initial_sequence

array([[[4560]]])

In [21]:
test_initial_sequence = np.array([[valPredict[-1]]])
test_initial_sequence = np.array([test_initial_sequence])

In [22]:
test_initial_sequence

array([[[4621.0967]]], dtype=float32)

In [23]:
testPredict = generate_forecast(model, test_initial_sequence, look_back, num_predictions = len(testX))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


In [24]:
len(trainPredict) + len(valPredict) + len(testPredict)

71

In [25]:
len(trainX) + len(valX) + len(testX)

71

In [26]:
lstm_mape = mean_absolute_percentage_error(testY, testPredict)
lstm_mse = mean_squared_error(testY, testPredict)
lstm_rmse = np.sqrt(lstm_mse)

In [27]:
combined_array = [0] * look_back + trainPredict.flatten().tolist() + valPredict.flatten().tolist() + testPredict.flatten().tolist()

In [28]:
len(combined_array)

72

In [29]:
df['LSTM Predictions'] = combined_array[:len(df)]

In [30]:
df.head()

Unnamed: 0,Month,Deaths,LSTM Predictions
0,2015-01-01,2771,0.0
1,2015-02-01,2627,2790.106445
2,2015-03-01,2907,2645.193115
3,2015-04-01,2822,2926.96875
4,2015-05-01,2850,2841.429688


SARIMA

In [31]:
sarima_model = SARIMAX(train['Deaths'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12),
                               enforce_stationarity=False,
                               enforce_invertibility=False)
sarima_result = sarima_model.fit(disp=False)



In [32]:
sarima_predictions = sarima_result.predict(start=0, end=df.shape[0]-1, dynamic=False)

In [33]:
len(sarima_predictions)

72

In [34]:
sarima_predictions

0        0.000000
1     3287.408524
2     2713.683959
3     3013.761134
4     2823.915542
         ...     
67    5233.423121
68    5108.951026
69    5371.184387
70    5370.918004
71    5661.691398
Name: predicted_mean, Length: 72, dtype: float64

In [35]:
df['SARIMA Predictions'] = sarima_predictions

In [36]:
df.head()

Unnamed: 0,Month,Deaths,LSTM Predictions,SARIMA Predictions
0,2015-01-01,2771,0.0,0.0
1,2015-02-01,2627,2790.106445,3287.408524
2,2015-03-01,2907,2645.193115,2713.683959
3,2015-04-01,2822,2926.96875,3013.761134
4,2015-05-01,2850,2841.429688,2823.915542


In [37]:
sarimaTestPredict = df[df['Month'] > '2020-01-01']['SARIMA Predictions']

In [38]:
len(sarimaTestPredict)

11

In [39]:
len(testY)

11

In [40]:
testPredict

array([4651.936 , 4682.9707, 4714.202 , 4745.632 , 4777.2607, 4809.09  ,
       4841.121 , 4873.356 , 4905.7944, 4938.439 , 4971.2905],
      dtype=float32)

In [41]:
sarimaTestPredict

61    4606.981610
62    5160.839908
63    4822.279130
64    4988.292756
65    4998.173961
66    5167.440608
67    5233.423121
68    5108.951026
69    5371.184387
70    5370.918004
71    5661.691398
Name: SARIMA Predictions, dtype: float64

In [42]:
sarima_mape = mean_absolute_percentage_error(testY, sarimaTestPredict)
sarima_mse = mean_squared_error(testY, sarimaTestPredict)
sarima_rmse = np.sqrt(sarima_mse)

In [43]:
def calculate_confidence_intervals(predictions, alpha=0.05):
    # Calculate mean and standard deviation
    mean_pred = np.mean(predictions)
    std_pred = np.std(predictions)
    
    # Calculate the z-score for the confidence level
    z_score = 1.96  # for 95% confidence
    margin_of_error = z_score * (std_pred / np.sqrt(len(predictions)))
    
    lower_bound = predictions - margin_of_error
    upper_bound = predictions + margin_of_error
    
    return lower_bound, upper_bound

# Calculate confidence intervals
lower_bound_test, upper_bound_test = calculate_confidence_intervals(testPredict)
lower_bound_sarima, upper_bound_sarima = calculate_confidence_intervals(sarimaTestPredict)

In [44]:
def calculate_overlap(lower1, upper1, lower2, upper2):
    # Initialize overlap count
    overlap_count = 0

    for l1, u1, l2, u2 in zip(lower1, upper1, lower2, upper2):
        # Check for overlap
        if u1 >= l2 and l1 <= u2:
            overlap_count += 1

    # Calculate percent overlap
    percent_overlap = (overlap_count / len(lower1)) * 100
    return percent_overlap

# Calculate percent overlap
percent_overlap = calculate_overlap(lower_bound_test, upper_bound_test, lower_bound_sarima, upper_bound_sarima)

print(f'Percent Overlap: {percent_overlap:.2f}%')

Percent Overlap: 18.18%


---

In [45]:
look_back_periods[0]

1

In [47]:
for val_start, val_end in validation_periods:
    for look_back in [look_back_periods[0]]:
        df = df.copy()
        
        # Split data
        train = df[df['Month'] <= val_start]
        val = df[(df['Month'] >= val_start) & (df['Month'] <= val_end)]
        test = df[df['Month'] >= '2020-01-01']
        
        # Create datasets for LSTM
        trainX, trainY = create_dataset(train['Deaths'], look_back)
        valX, valY = create_dataset(val['Deaths'], look_back)
        testX, testY = create_dataset(test['Deaths'], look_back)
        
        trainX = trainX.reshape(trainX.shape[0], trainX.shape[1], 1)
        valX = valX.reshape(valX.shape[0], valX.shape[1], 1)
        testX = testX.reshape(testX.shape[0], testX.shape[1], 1)
        
        # Build and train initial LSTM model on the training data
        model = Sequential()
        model.add(LSTM(50, activation='relu', input_shape=(look_back, 1)))
        model.add(Dense(1))
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)
        
        val_initial_sequence = np.array(train['Deaths'][-look_back:]).reshape((look_back, 1))
        val_initial_sequence = np.array([val_initial_sequence])
        valPredict = generate_forecast(model, val_initial_sequence, look_back, num_predictions=len(valX))
        
        # Retrain the LSTM model on both training and validation data
        combined_train_val = pd.concat([train, val], axis=0)
        combinedX, combinedY = create_dataset(combined_train_val['Deaths'], look_back)
        combinedX = combinedX.reshape(combinedX.shape[0], combinedX.shape[1], 1)
        model.fit(combinedX, combinedY, epochs=100, batch_size=1, verbose=0)
        
        test_initial_sequence = np.array([[valPredict[-1]]])
        test_initial_sequence = np.array([test_initial_sequence])
        testPredict = generate_forecast(model, test_initial_sequence, look_back, num_predictions=len(testX))
        
        # LSTM metrics
        lstm_mape = mean_absolute_percentage_error(testY, testPredict)
        lstm_mse = mean_squared_error(testY, testPredict)
        lstm_rmse = np.sqrt(lstm_mse)
        
        combined_array = [0] * look_back + trainPredict.flatten().tolist() + valPredict.flatten().tolist() + testPredict.flatten().tolist()
        
        df['LSTM Predictions'] = combined_array[:len(df)]
        
        # SARIMA model retraining on combined training + validation data
        sarima_model = SARIMAX(combined_train_val['Deaths'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12),
                               enforce_stationarity=False, enforce_invertibility=False)
        sarima_result = sarima_model.fit(disp=False)
        
        sarima_predictions = sarima_result.predict(start=0, end=df.shape[0]-1, dynamic=False)
        df['SARIMA Predictions'] = sarima_predictions
        sarimaTestPredict = df[df['Month'] > '2020-01-01']['SARIMA Predictions']
        
        # SARIMA metrics
        sarima_mape = mean_absolute_percentage_error(testY, sarimaTestPredict)
        sarima_mse = mean_squared_error(testY, sarimaTestPredict)
        sarima_rmse = np.sqrt(sarima_mse)
        
        # Calculate CI overlap
        lower_bound_test, upper_bound_test = calculate_confidence_intervals(testPredict)
        lower_bound_sarima, upper_bound_sarima = calculate_confidence_intervals(sarimaTestPredict)
        
        ci_overlap = calculate_overlap(lower_bound_test, upper_bound_test, lower_bound_sarima, upper_bound_sarima)
        
        results.append({
            'Validation Period': f"{val_start} to {val_end}",
            'Look-back': look_back,
            'LSTM MAPE': lstm_mape,
            'LSTM MSE': lstm_mse,
            'LSTM RMSE': lstm_rmse,
            'SARIMA MAPE': sarima_mape,
            'SARIMA MSE': sarima_mse,
            'SARIMA RMSE': sarima_rmse,
            'CI Overlap %': ci_overlap
        })

results_df = pd.DataFrame(results)
print(results_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m-1s[0m -761199us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [48]:
results_df

Unnamed: 0,Validation Period,Look-back,LSTM MAPE,LSTM MSE,LSTM RMSE,SARIMA MAPE,SARIMA MSE,SARIMA RMSE,CI Overlap %
0,2019-11-01 to 2020-01-01,1,0.191844,1843045.0,1357.58803,0.114675,975519.6,987.683959,9.090909
1,2019-09-01 to 2020-01-01,1,0.219031,2250785.0,1500.261569,0.155781,1324804.0,1151.001302,9.090909
2,2019-07-01 to 2020-01-01,1,0.190362,1823551.0,1350.389158,0.156734,1337892.0,1156.672851,27.272727
3,2019-01-01 to 2020-01-01,1,0.2616,2979196.0,1726.034696,0.169823,1521660.0,1233.555764,0.0
4,2018-07-01 to 2020-01-01,1,0.124625,976211.3,988.034061,0.169374,1530380.0,1237.085348,0.0
5,2018-01-01 to 2020-01-01,1,0.16394,1504679.0,1226.653689,0.171116,1597333.0,1263.856518,81.818182


In [49]:
for val_start, val_end in validation_periods:
    for look_back in [look_back_periods[0]]:
        print(val_start, val_end, look_back)

2019-11-01 2020-01-01 1
2019-09-01 2020-01-01 1
2019-07-01 2020-01-01 1
2019-01-01 2020-01-01 1
2018-07-01 2020-01-01 1
2018-01-01 2020-01-01 1
