In [133]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

In [134]:
df = pd.read_excel('state_month_overdose.xlsx')
df['Deaths'] = df['Deaths'].apply(lambda x: 0 if x == 'Suppressed' else int(x))
df['Month'] = pd.to_datetime(df['Month'])
df = df.groupby(['Month']).agg({'Deaths': 'sum'}).reset_index()

  df['Month'] = pd.to_datetime(df['Month'])


In [135]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset.iloc[i:(i+look_back)].values
        dataX.append(a)
        dataY.append(dataset.iloc[i + look_back]) 
    return np.array(dataX), np.array(dataY)

In [136]:
def ci_overlap_percentage(pred1, pred2, ci1, ci2):
    overlap_count = 0
    for i in range(len(pred1)):
        if (pred1[i] - ci1[i] <= pred2[i] + ci2[i]) and (pred1[i] + ci1[i] >= pred2[i] - ci2[i]):
            overlap_count += 1
    return (overlap_count / len(pred1)) * 100

In [137]:
validation_periods = [
    ('2019-07-01', '2020-01-01'), #if x will be up until 2019-12-01
    ('2019-01-01', '2020-01-01'),
    ('2018-07-01', '2020-01-01'),
    ('2018-01-01', '2020-01-01')
]

In [138]:
look_back_periods = range(1, 12, 2)  # 1, 3, 5, ..., 11 months look-back

In [140]:
def generate_forecast(model, initial_sequence, look_back, num_predictions=12):
    predictions = []
    for_model = initial_sequence
    
    for _ in range(num_predictions):
        # Generate the next prediction
        pred = model.predict(for_model)
        predictions.append(pred[0][0])
        
        # Update the input for the next prediction
        # We need to construct a new input array of the same shape as the original input
        new_input = np.append(for_model[:, 1:], pred[0][0])  # Shift and append the new prediction
        for_model = new_input.reshape((1, look_back, 1))

    return np.array(predictions)

---

In [9]:
val_start, val_end = validation_periods[0]
print(val_start)
print(val_end)

look_back = 1

2019-07-01
2020-01-01


In [10]:
train = df[df['Month'] <= val_start] #<
val = df[(df['Month'] >= val_start) & (df['Month'] <= val_end)]
test = df[df['Month'] >= '2020-01-01'] #>

In [11]:
trainX, trainY = create_dataset(train['Deaths'], look_back)
valX, valY = create_dataset(val['Deaths'], look_back)
testX, testY = create_dataset(test['Deaths'], look_back)

In [12]:
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1], 1)
valX = valX.reshape(valX.shape[0], valX.shape[1], 1)
testX = testX.reshape(testX.shape[0], testX.shape[1], 1)

LSTM

In [13]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=1)

I0000 00:00:1730824698.362692    1206 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-11-05 11:38:18.727157: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  super().__init__(**kwargs)


Epoch 1/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 13261591.0000
Epoch 2/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 10364362.0000
Epoch 3/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 8576221.0000
Epoch 4/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6467400.0000
Epoch 5/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4206611.5000
Epoch 6/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2119505.0000
Epoch 7/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1036148.1250
Epoch 8/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 515551.5312
Epoch 9/100
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 173375.3594
Epoch 10/100
[1m54/54[0m [32m━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f8954709240>

In [14]:
trainPredict = model.predict(trainX)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step


In [15]:
val_initial_sequence = np.array(train['Deaths'][-look_back:]).reshape((look_back, 1))
val_initial_sequence = np.array([val_initial_sequence])

valPredict = generate_forecast(model, val_initial_sequence, look_back, num_predictions = len(valX))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


In [16]:
valPredict

array([4291.1777, 4286.3623, 4281.554 , 4276.753 , 4271.9585, 4267.171 ],
      dtype=float32)

In [17]:
val_initial_sequence

array([[[4296]]])

In [18]:
test_initial_sequence = np.array([[valPredict[-1]]])
test_initial_sequence = np.array([test_initial_sequence])

In [19]:
test_initial_sequence

array([[[4267.171]]], dtype=float32)

In [20]:
testPredict = generate_forecast(model, test_initial_sequence, look_back, num_predictions = len(testX))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


In [21]:
len(trainPredict) + len(valPredict) + len(testPredict)

71

In [103]:
len(trainX) + len(valX) + len(testX)

71

In [22]:
lstm_mape = mean_absolute_percentage_error(testY, testPredict)
lstm_mse = mean_squared_error(testY, testPredict)
lstm_rmse = np.sqrt(lstm_mse)

In [86]:
combined_array = [0] * look_back + trainPredict.flatten().tolist() + valPredict.flatten().tolist() + testPredict.flatten().tolist()

In [87]:
len(combined_array)

72

In [88]:
df['LSTM Predictions'] = combined_array[:len(df)]

In [89]:
df.head()

Unnamed: 0,Month,Deaths,LSTM Predictions
0,2015-01-01,2771,0.0
1,2015-02-01,2627,2768.382324
2,2015-03-01,2907,2624.590576
3,2015-04-01,2822,2904.185547
4,2015-05-01,2850,2819.30835


SARIMA

In [90]:
sarima_model = SARIMAX(train['Deaths'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12),
                               enforce_stationarity=False,
                               enforce_invertibility=False)
sarima_result = sarima_model.fit(disp=False)



In [146]:
sarima_predictions = sarima_result.predict(start=0, end=df.shape[0]-1, dynamic=False)

In [147]:
len(sarima_predictions)

72

In [148]:
sarima_predictions

0        0.000000
1     3630.673241
2     2858.607115
3     3149.680874
4     2864.502013
         ...     
67    3901.206374
68    3874.325643
69    3735.902648
70    3794.419578
71    4068.913517
Name: predicted_mean, Length: 72, dtype: float64

In [110]:
df['SARIMA Predictions'] = sarima_predictions

In [111]:
df.head()

Unnamed: 0,Month,Deaths,LSTM Predictions,SARIMA Predictions
0,2015-01-01,2771,0.0,0.0
1,2015-02-01,2627,2768.382324,3658.063885
2,2015-03-01,2907,2624.590576,2867.387808
3,2015-04-01,2822,2904.185547,3161.099411
4,2015-05-01,2850,2819.30835,2867.412151


In [125]:
sarimaTestPredict = df[df['Month'] > '2020-01-01']['SARIMA Predictions']

In [126]:
len(sarimaTestPredict)

11

In [127]:
len(testY)

11

In [128]:
testPredict

array([4262.3906, 4257.617 , 4252.8506, 4248.091 , 4243.338 , 4238.592 ,
       4233.8525, 4229.12  , 4224.3945, 4219.676 , 4214.964 ],
      dtype=float32)

In [129]:
sarimaTestPredict

61    -6666.380236
62    -7366.392136
63    -8988.150941
64   -10002.051857
65   -11116.787159
66   -11970.138650
67   -15326.935577
68   -18776.622331
69   -21580.325838
70   -24952.238144
71   -27448.493486
Name: SARIMA Predictions, dtype: float64

In [130]:
sarima_mape = mean_absolute_percentage_error(testY, sarimaTestPredict)
sarima_mse = mean_squared_error(testY, sarimaTestPredict)
sarima_rmse = np.sqrt(sarima_mse)

In [131]:
def calculate_confidence_intervals(predictions, alpha=0.05):
    # Calculate mean and standard deviation
    mean_pred = np.mean(predictions)
    std_pred = np.std(predictions)
    
    # Calculate the z-score for the confidence level
    z_score = 1.96  # for 95% confidence
    margin_of_error = z_score * (std_pred / np.sqrt(len(predictions)))
    
    lower_bound = predictions - margin_of_error
    upper_bound = predictions + margin_of_error
    
    return lower_bound, upper_bound

# Calculate confidence intervals
lower_bound_test, upper_bound_test = calculate_confidence_intervals(testPredict)
lower_bound_sarima, upper_bound_sarima = calculate_confidence_intervals(sarimaTestPredict)

In [132]:
def calculate_overlap(lower1, upper1, lower2, upper2):
    # Initialize overlap count
    overlap_count = 0

    for l1, u1, l2, u2 in zip(lower1, upper1, lower2, upper2):
        # Check for overlap
        if u1 >= l2 and l1 <= u2:
            overlap_count += 1

    # Calculate percent overlap
    percent_overlap = (overlap_count / len(lower1)) * 100
    return percent_overlap

# Calculate percent overlap
percent_overlap = calculate_overlap(lower_bound_test, upper_bound_test, lower_bound_sarima, upper_bound_sarima)

print(f'Percent Overlap: {percent_overlap:.2f}%')

Percent Overlap: 0.00%


---

In [152]:
look_back_periods[0]

1

In [156]:
results = []

for val_start, val_end in validation_periods:
    for look_back in [look_back_periods[0]]:
        df = df.copy()
        
        # Split data
        train = df[df['Month'] <= val_start] #<
        val = df[(df['Month'] >= val_start) & (df['Month'] <= val_end)]
        test = df[df['Month'] >= '2020-01-01'] #>
        
        # Create datasets for LSTM
        trainX, trainY = create_dataset(train['Deaths'], look_back)
        valX, valY = create_dataset(val['Deaths'], look_back)
        testX, testY = create_dataset(test['Deaths'], look_back)
        
        trainX = trainX.reshape(trainX.shape[0], trainX.shape[1], 1)
        valX = valX.reshape(valX.shape[0], valX.shape[1], 1)
        testX = testX.reshape(testX.shape[0], testX.shape[1], 1)
        
        # Build and train LSTM model
        model = Sequential()
        model.add(LSTM(50, activation='relu', input_shape=(look_back, 1)))
        model.add(Dense(1))
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)
        
        trainPredict = model.predict(trainX)
        
        # Forecast predictions using recursive LSTM model forecasting
        val_initial_sequence = np.array(train['Deaths'][-look_back:]).reshape((look_back, 1))
        val_initial_sequence = np.array([val_initial_sequence])
        
        # Forecast validation predictions
        valPredict = generate_forecast(model, val_initial_sequence, look_back, num_predictions = len(valX))
        
        
        # Forecast test predictions
        test_initial_sequence = np.array([[valPredict[-1]]])
        test_initial_sequence = np.array([test_initial_sequence])
        
        testPredict = generate_forecast(model, test_initial_sequence, look_back, num_predictions = len(testX))
        
        
        # Calculate LSTM metrics
        lstm_mape = mean_absolute_percentage_error(testY, testPredict)
        lstm_mse = mean_squared_error(testY, testPredict)
        lstm_rmse = np.sqrt(lstm_mse)
        
        combined_array = [0] * look_back + trainPredict.flatten().tolist() + valPredict.flatten().tolist() + testPredict.flatten().tolist()
        
        df['LSTM Predictions'] = combined_array[:len(df)]
        
        # Build and fit SARIMA model
        sarima_model = SARIMAX(train['Deaths'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12),
                               enforce_stationarity=False,
                               enforce_invertibility=False)
        sarima_result = sarima_model.fit(disp=False)
        
        sarima_predictions = sarima_result.predict(start=0, end=df.shape[0]-1, dynamic=False)
        
        df['SARIMA Predictions'] = sarima_predictions
        
        sarimaTestPredict = df[df['Month'] > '2020-01-01']['SARIMA Predictions']
        
        sarima_mape = mean_absolute_percentage_error(testY, sarimaTestPredict)
        sarima_mse = mean_squared_error(testY, sarimaTestPredict)
        sarima_rmse = np.sqrt(sarima_mse)
        
        # Calculate 95% CI overlap percentage
        lower_bound_test, upper_bound_test = calculate_confidence_intervals(testPredict)
        lower_bound_sarima, upper_bound_sarima = calculate_confidence_intervals(sarimaTestPredict)
        
        ci_overlap = calculate_overlap(lower_bound_test, upper_bound_test, lower_bound_sarima, upper_bound_sarima)
        
        # Append results
        results.append({
            'Validation Period': f"{val_start} to {val_end}",
            'Look-back': look_back,
            'LSTM MAPE': lstm_mape,
            'LSTM MSE': lstm_mse,
            'LSTM RMSE': lstm_rmse,
            'SARIMA MAPE': sarima_mape,
            'SARIMA MSE': sarima_mse,
            'SARIMA RMSE': sarima_rmse,
            'CI Overlap %': ci_overlap
        })
        
    # Convert results to DataFrame for easier visualization
    results_df = pd.DataFrame(results)
    results_df

  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

  warn('Too few observations to estimate starting parameters%s.'
  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

  warn('Too few observations to estimate starting parameters%s.'
  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

  warn('Too few observations to estimate starting parameters%s.'


In [157]:
results_df

Unnamed: 0,Validation Period,Look-back,LSTM MAPE,LSTM MSE,LSTM RMSE,SARIMA MAPE,SARIMA MSE,SARIMA RMSE,CI Overlap %
0,2019-07-01 to 2020-01-01,1,0.209829,2104583.0,1450.718234,3.554651,481628800.0,21946.042968,0.0
1,2019-01-01 to 2020-01-01,1,0.21843,2239934.0,1496.640929,0.385955,5699164.0,2387.292121,0.0
2,2018-07-01 to 2020-01-01,1,0.315563,4092845.0,2023.078023,0.236099,2459211.0,1568.187025,0.0
3,2018-01-01 to 2020-01-01,1,0.231469,2447509.0,1564.45173,0.331365,4448019.0,2109.032708,0.0


In [155]:
for val_start, val_end in validation_periods:
    for look_back in [look_back_periods[0]]:
        print(val_start, val_end, look_back)

2019-07-01 2020-01-01 1
2019-01-01 2020-01-01 1
2018-07-01 2020-01-01 1
2018-01-01 2020-01-01 1
