In [46]:
import pandas as pd
import numpy as np 
import os
import plotly.express as px
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
from matplotlib import pyplot
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error
import random
from statsmodels.tsa.statespace.sarimax import SARIMAX

# For investigating timeseries data
from sklearn import preprocessing
from sklearn.model_selection import ParameterGrid
from prophet import Prophet
from statsmodels.tsa.seasonal import seasonal_decompose

# For modeling
from neuralforecast import NeuralForecast
from neuralforecast.models import LSTM, NHITS, RNN
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
import xgboost
from prophet import Prophet
#from tqdm import tqdm
import tqdm
from itertools import product

### Reading Data

In [4]:
# Reading Data
base_path =  os.getcwd()
file_name = 'Traffic_Data.xlsx'
total_path = base_path + '//Data//' 
df = pd.read_excel(total_path + file_name, sheet_name='Sheet1')
df.head(10)


Unnamed: 0,State,Region,STATIONS,CMILES,PMILES,Month,Month_2,Year,Date
0,Connecticut,Northeast,14,2546,2432,November,11,2023,2023-11-01
1,Maine,Northeast,130,1177,1148,November,11,2023,2023-11-01
2,Massachusetts,Northeast,227,5148,5013,November,11,2023,2023-11-01
3,New Hampshire,Northeast,150,1062,1034,November,11,2023,2023-11-01
4,New Jersey,Northeast,73,6569,6339,November,11,2023,2023-11-01
5,New York,Northeast,110,9144,8825,November,11,2023,2023-11-01
6,Pennsylvania,Northeast,57,8610,8408,November,11,2023,2023-11-01
7,Rhode Island,Northeast,26,661,659,November,11,2023,2023-11-01
8,Vermont,Northeast,35,543,531,November,11,2023,2023-11-01
9,Delaware,South Atlantic,0,923,900,November,11,2023,2023-11-01


### Filtering for States

In [5]:
states_of_interest = ['Oregon', 'Washington', 'California']

# Filtering for states of interest
df = df[df['State'].isin(states_of_interest)]
df = df.sort_values(by = ['Date'], ascending=True).reset_index()

for val in states_of_interest:
    # Replacing outlier January 2023 value
    mean_val = df[(df['Month'] == 'January') &
              (df['Year'] != 2023) &
              (df['State'] == val)]['CMILES'].mean()
    df['CMILES'] = np.where((df['Month'] == 'January') & (df['Year'] == 2023) & (df['State'] == val), mean_val, df['CMILES'])

df.head(10)

Unnamed: 0,index,State,Region,STATIONS,CMILES,PMILES,Month,Month_2,Year,Date
0,3058,Washington,West,130,4586.0,4383,January,1,2019,2019-01-01
1,3056,Oregon,West,153,2823.0,2769,January,1,2019,2019-01-01
2,3049,California,West,154,25741.0,25643,January,1,2019,2019-01-01
3,2998,California,West,136,23627.0,24424,February,2,2019,2019-02-01
4,3007,Washington,West,158,4089.0,4474,February,2,2019,2019-02-01
5,3005,Oregon,West,143,2393.0,2566,February,2,2019,2019-02-01
6,2947,California,West,164,28243.0,27745,March,3,2019,2019-03-01
7,2956,Washington,West,166,5245.0,5233,March,3,2019,2019-03-01
8,2954,Oregon,West,153,3088.0,3104,March,3,2019,2019-03-01
9,2905,Washington,West,170,5404.0,5229,April,4,2019,2019-04-01


### Plotting Data

In [6]:
fig = px.scatter(df.reindex(), x="Date", y="CMILES", color = 'State',
                  trendline="lowess",trendline_options=dict(frac=0.1),
                  title = 'Miles Driven by Time - Oregon')
fig.show()

### Aggregatting Data to Quarterly Level

In [7]:
aggregated_df = aggregate_to_quarterly(df)
aggregated_df.head(10)

Unnamed: 0,Quarterly_Date,State,CMILES
0,2019-03-01,California,25870.333333
1,2019-03-01,Oregon,2768.0
2,2019-03-01,Washington,4640.0
3,2019-06-01,California,30096.0
4,2019-06-01,Oregon,3223.666667
5,2019-06-01,Washington,5775.333333
6,2019-09-01,California,29158.0
7,2019-09-01,Oregon,3460.0
8,2019-09-01,Washington,5721.0
9,2019-12-01,California,32030.0


In [11]:
fig = px.scatter(aggregated_df, x="Quarterly_Date", y="CMILES", color = 'State',
                  trendline="lowess",trendline_options=dict(frac=0.1),
                  title = 'Miles Driven by Time - Oregon')
fig.show()

## Fitting Prophet Model

Now we'll fit prophet models.

In [82]:
def aggregate_to_quarterly(input_df):

    '''Takes input dataframe aggregated at monthly level and
    aggregates up to quarterly level, returns transformed dataframe'''

    q1 = [1, 2, 3]
    q2 = [4, 5, 6]
    q3 = [7, 8, 9]
    q4 = [10, 11, 12]
    quarterly_date = []
    for index, row in df.iterrows():
        new_month = '0'
        if row['Month_2'] in q1:
            new_month = '03'
        elif row['Month_2'] in q2:
            new_month = '06'
        elif row['Month_2'] in q3:
            new_month = '09'
        else:
            new_month = '12'
        new_date = str(row['Year']) + '-' + new_month + '-01'
        quarterly_date.append(new_date)
    input_df['Quarterly_Date'] = quarterly_date
    input_df['Quarterly_Date'] = pd.to_datetime(input_df.Quarterly_Date, format='%Y-%m-%d')
    input_df = input_df[['Quarterly_Date', 'State', 'CMILES']]

    # Now aggregating up to the quarterly_date, State level
    output_df = input_df.groupby(by = ['Quarterly_Date', 'State']).mean().reset_index()
    
    return output_df

    



def evaluate_prophet(df, states_of_interest, window = 1, split = 0.5, date_col = 'Quarterly_Date'):

    '''
    Trains prophet model using input dataframe for each of the specified factor levels
    and forecasts out a number of timesteps defined by the window input.
    '''
    total_observations = len(df[df['State'] == states_of_interest[0]])
    train_len = int(total_observations * split)
    models = {}
    for val in states_of_interest:

        # performing stepforward validation on each series
        predictions = []
        actuals = []
        for i in range(train_len, total_observations):
            # Generating series for 1 factor level and creating time
            series_of_interest = df[df['State'] == val].rename(columns= {date_col: 'ds', 'CMILES': 'y'})
            series_of_interest = series_of_interest.sort_values(by = ['ds'], ascending = True).reset_index()[['ds', 'y']]
            split_series = series_of_interest[:i]

            # Fitting model for series
            my_model = Prophet()
            my_model.fit(split_series)

            # Generating future dataset and makign predictions
            future_dates = my_model.make_future_dataframe(periods = 12)
            forecast = my_model.predict(future_dates)

            # Returning prediction
            yhat = forecast['yhat'][0]
            predictions.append(yhat)
            test = series_of_interest['y'].to_list()
            actuals.append(test[i])

            # Plotting results
            #my_model.plot(forecast, uncertainty=True)

        # saving model
        models[val] = [predictions, actuals]
    return models


def evaluate_neural_forecast(df, states_of_interest, window = 1, split = 0.5, date_col = 'Quarterly_Date'):

    '''
    Trains nn model using input dataframe for each of the specified factor levels
    and forecasts out a number of timesteps defined by the window input.
    '''
    total_observations = len(df[df['State'] == states_of_interest[0]])
    train_len = int(total_observations * split)
    models = {}
    for val in states_of_interest:

        # performing stepforward validation on each series
        predictions_lstm = []
        predictions_nhits = []
        actuals = []
        for i in range(train_len, total_observations):
            # Generating series for 1 factor level and creating time
            series_of_interest = df[df['State'] == val].rename(columns= {date_col: 'ds', 'CMILES': 'y'}).reset_index()[['ds', 'y']]
            series_of_interest['unique_id'] = val
            split_series = series_of_interest[:i]
            

            # Fitting model for series
            model = [LSTM(h=window,                    # Forecast horizon
                        max_steps=100,                # Number of steps to train
                        scaler_type='standard',       # Type of scaler to normalize data
                        encoder_hidden_size=32,       # Defines the size of the hidden state of the LSTM
                        decoder_hidden_size=32,)]
            nf = NeuralForecast(models=model, freq='M')
            nf.fit(df=split_series, verbose=False)

            # Making predictions
            Y_hat_df = nf.predict()

            # Returning prediction
            predictions_lstm.append(Y_hat_df['LSTM'][0])
            test = series_of_interest['y'].to_list()
            actuals.append(test[i])

            # Plotting results
            #my_model.plot(forecast, uncertainty=True)

        # saving model
        models[val] = [predictions_lstm, predictions_nhits, actuals]
    return models


def format_and_graph(input_dict, dates,  model_name, date_col = 'Quarterly_Date', y_val = 'CMILES'):

    '''Takes input dictionary containing predicted versus actual values, 
    generates a timeseries graph and calculates MAPE'''


    colors = {'Washington':'royalblue',
             'Oregon': 'firebrick',
              'California': 'green' }
    fig = go.Figure()

    states = list(input_dict.keys())
    mapes = []
    for state in states:
        predicted = input_dict[state][0]
        actual = input_dict[state][1]
        mape_calc = mean_absolute_percentage_error(predicted, actual[-len(predicted):])
        mapes.append(mape_calc)

        fig.add_trace(go.Scatter(x=dates, y=actual,
                    mode='lines',
                    name='{state} actual'.format(state = state),
                    line = dict(color=colors[state])))
        
        fig.add_trace(go.Scatter(x=dates[-len(predicted):], y=predicted,
                    mode='lines',
                    name='{state} predicted, mape: {mape}'.format(state = state, mape = mape_calc),
                    line = dict(color=colors[state], dash = 'dash')))
        # Edit the layout
    fig.update_layout(title='Predicted Versus Actual, {model}, mape: {mape}'.format(model = model_name, mape = sum(mapes)/len(mapes)),
                   xaxis_title=date_col,
                   yaxis_title=y_val)
    fig.show()


def evaluate_prophet_multi_timestep(df, states_of_interest, window = 8, date_col = 'Quarterly_Date'):
    '''
    Trains prophet model using input dataframe for each of the specified factor levels
    and forecasts out multiple timesteps(default 8), used to evaluate accuracy along entire prediction window

    INPUTS:
        df: dataframe, contains historic data
        states_of_interest: list, contains keys for different factor levels
        window: int, how many steps to forecast out, default 8
        date_col: str, contains name of dataframe column with date data
    OUTPUTS:
        models: dict, contains predicted and actual values in list form for each factor level
    '''

    total_observations = len(df[df['State'] == states_of_interest[0]])
    train_len = total_observations - window
    models = {}
    for val in states_of_interest:
        predictions = []
        actuals = []
        series_of_interest = df[df['State'] == val].rename(columns= {date_col: 'ds', 'CMILES': 'y'}).reset_index()[['ds', 'y']]
        series_of_interest['unique_id'] = val
        split_series = series_of_interest[:train_len]

        # Fitting model for series
        my_model = Prophet()
        my_model.fit(split_series)

        # Generating future dataset and makign predictions
        future_dates = my_model.make_future_dataframe(periods = 12)[:window]
        forecast = my_model.predict(future_dates)
        # Returning prediction
        yhat = forecast['yhat'].to_list()
        predictions = yhat
        test = series_of_interest['y'].to_list()
        actuals = test

        # saving model
        models[val] = [predictions, actuals]
    return models

def find_optimal_ARIMA(input_df, states_of_interest,response_var, order_list):
    '''
    finds best ARIMA modal using input dataframe for each of the specified factor levels

    INPUTS:
        input_df: dataframe, contains historic data
        states_of_interest: list, contains keys for different factor levels
        response_var: str, contains response variable in dataframe
        order: tuple, contains p, q, i variables that we want to iterate over for ARIMA
    OUTPUTS:
        models: dict, contains predicted and actual values in list form for each factor level
    '''

    all_scores = []
    for val in states_of_interest:
        results = []
        for order in tqdm.notebook.tqdm(order_list):
            try:
                model = SARIMAX(
                    input_df[input_df['State'] == val][response_var],
                    order = (order[0], order[1], order[2]),
                    simple_differencing=False).fit(disp = False)
                aic = model.aic
                results.append([order, aic])
            except:
                continue
        print(results)
        results_df = pd.DataFrame(results).rename(columns = {0: '(p,d,q)', 1: 'AIC'})
        print(results_df)
        #results_df.columns = ['(p,d,q)', 'AIC']
        # sorting results
        #results_df = results_df.sort_values(by = 'AIC', ascending = True).reset_index(drop = True)
        results_df['Series'] = val
        all_scores.append(results_df)
    all_scores_df = pd.concat(all_scores)
    return all_scores_df


def evaluate_ARIMA_multi_timestep(input_df, parameters, states_of_interest, window = 8, date_col = 'Quarterly_Date'):
    '''
    Trains ARIMA model using input dataframe for each of the specified factor levels
    and forecasts out multiple timesteps(default 8), used to evaluate accuracy along entire prediction window

    INPUTS:
        df: dataframe, contains historic data
        parameters: tuple, contains p, q, i
        states_of_interest: list, contains keys for different factor levels
        window: int, how many steps to forecast out, default 8
        date_col: str, contains name of dataframe column with date data
    OUTPUTS:
        models: dict, contains predicted and actual values in list form for each factor level
    '''
    best_p = 3#parameters[0]
    best_i = 0#parameters[1]
    best_q = 2#parameters[2]
    total_observations = len(df[df['State'] == states_of_interest[0]])
    train_len = total_observations - window
    models = {}
    for val in states_of_interest:
        predictions = []
        actuals = []
        series_of_interest = df[df['State'] == val].rename(columns= {date_col: 'ds', 'CMILES': 'y'}).reset_index()[['ds', 'y']]
        series_of_interest['unique_id'] = val
        split_series = series_of_interest[:train_len]

        # Fitting model for series
        my_model = SARIMAX(
                    split_series['y'],
                    order = (best_p, best_i, best_q),
                    simple_differencing=False).fit(disp = False)

        # Returning prediction
        yhat = my_model.get_prediction(0, window)
        yhat = yhat.predicted_mean.iloc[-window:]
        print(yhat)
        predictions = yhat
        test = series_of_interest['y'].to_list()
        actuals = test

        # saving model
        models[val] = [predictions, actuals]
    return models

In [9]:
input_dict_prophet = evaluate_prophet(aggregated_df, states_of_interest,  1, 0.5)


11:35:32 - cmdstanpy - INFO - Chain [1] start processing
11:35:33 - cmdstanpy - INFO - Chain [1] done processing
11:35:33 - cmdstanpy - INFO - Chain [1] start processing
11:35:33 - cmdstanpy - INFO - Chain [1] done processing
11:35:33 - cmdstanpy - INFO - Chain [1] start processing
11:35:37 - cmdstanpy - INFO - Chain [1] done processing
11:35:37 - cmdstanpy - INFO - Chain [1] start processing
11:35:37 - cmdstanpy - INFO - Chain [1] done processing
11:35:38 - cmdstanpy - INFO - Chain [1] start processing
11:35:38 - cmdstanpy - INFO - Chain [1] done processing
11:35:38 - cmdstanpy - INFO - Chain [1] start processing
11:35:38 - cmdstanpy - INFO - Chain [1] done processing
11:35:38 - cmdstanpy - INFO - Chain [1] start processing
11:35:39 - cmdstanpy - INFO - Chain [1] done processing
11:35:39 - cmdstanpy - INFO - Chain [1] start processing
11:35:39 - cmdstanpy - INFO - Chain [1] done processing
11:35:39 - cmdstanpy - INFO - Chain [1] start processing
11:35:39 - cmdstanpy - INFO - Chain [1]

In [20]:
# Creating input variables
total_observations = len(aggregated_df[aggregated_df['State'] == states_of_interest[0]])
train_len = int(total_observations * 0.5)
dates = aggregated_df[aggregated_df['State'] == states_of_interest[0]]['Quarterly_Date'].to_list()
model_name = 'Prophet'

# Now evaluating prophet performance
format_and_graph(input_dict_prophet, dates,  model_name)

[2768.112860177107, 2768.205959370267, 2767.635888598884, 2767.804166539986, 2762.839309766387, 2770.9477012878806, 2772.3388888479153, 2764.147574613571, 2748.452174243592, 2743.7800359626594]
[3305.3333333333335, 2821.3333333333335, 2701.6666666666665, 3029.6666666666665, 3296.3333333333335, 2813.3333333333335, 2681.0, 3204.0, 3417.3333333333335, 2953.6666666666665]
[4640.247670681609, 4639.843906402548, 4640.040074090161, 4519.805786927155, 4560.193612213659, 4535.048992338033, 4561.481188456509, 4446.329893393924, 4540.555561456806, 4553.002387555789]
[5573.666666666667, 4704.666666666667, 4382.0, 5296.0, 5924.666666666667, 4578.333333333333, 4248.666666666667, 5207.333333333333, 5682.0, 4660.0]
[25872.285508382796, 25869.26443293764, 25642.279306965444, 25833.584250121286, 26004.836643820665, 25966.923928009084, 26243.011740725837, 25584.557051956275, 26059.599339273365, 26085.94521887006]
[27718.333333333332, 28096.666666666668, 26694.333333333332, 28719.333333333332, 29570.66666

### Evaluating Prophet Model over Multiple Steps

In [32]:
prophet_multi_step = evaluate_prophet_multi_timestep(aggregated_df, states_of_interest)

11:49:33 - cmdstanpy - INFO - Chain [1] start processing
11:49:36 - cmdstanpy - INFO - Chain [1] done processing
11:49:36 - cmdstanpy - INFO - Chain [1] start processing
11:49:37 - cmdstanpy - INFO - Chain [1] done processing
11:49:37 - cmdstanpy - INFO - Chain [1] start processing
11:49:37 - cmdstanpy - INFO - Chain [1] done processing


In [34]:
# Creating input variables
total_observations = len(aggregated_df[aggregated_df['State'] == states_of_interest[0]])
train_len = total_observations -8
dates = aggregated_df[aggregated_df['State'] == states_of_interest[0]]['Quarterly_Date'].to_list()
model_name = 'Prophet'

# Now evaluating prophet performance
format_and_graph(prophet_multi_step, dates,  model_name)

### Finding Best ARIMA Model

In [64]:
# normal variables
p = range(0, 4, 1)
d = range(0,2,1)
q = range(0, 4, 1)
# Combining parameters
parameters = product(p,d, q)
states_of_interest = ['Oregon', 'Washington', 'California']

# Training models
results_df = find_optimal_ARIMA(aggregated_df, states_of_interest,'CMILES', parameters)
results_df = results_df.sort_values(by = ['AIC'])
best_model = results_df['(p,d,q)'][0]
# viewing results
print(results_df.sort_values(by = ['AIC']))

0it [00:00, ?it/s]


An unsupported index was provided and will be ignored when e.g. forecasting.


An unsupported index was provided and will be ignored when e.g. forecasting.


An unsupported index was provided and will be ignored when e.g. forecasting.


An unsupported index was provided and will be ignored when e.g. forecasting.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


An unsupported index was provided and will be ignored when e.g. forecasting.


An unsupported index was provided and will be ignored when e.g. forecasting.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals


An unsupported index was provided and will be ignored when e.g. forecasting.


An unsupported index was provided and will be ignored when e.g. forecasting.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


An unsupported index was provided and will be 

[[(0, 0, 0), 378.65259219990946], [(0, 0, 1), 357.869610503551], [(0, 0, 2), 340.55660566060163], [(0, 0, 3), 332.23626223626013], [(0, 1, 0), 283.5135159078603], [(0, 1, 1), 279.67534452399366], [(0, 1, 2), 277.76832874078366], [(0, 1, 3), 278.7522657991318], [(1, 0, 0), 304.1852157637528], [(1, 0, 1), 299.0656254703221], [(1, 0, 2), 297.96878045912104], [(1, 0, 3), 344.58767991876545], [(1, 1, 0), 285.49072296314034], [(1, 1, 1), 280.6601072758117], [(1, 1, 2), 279.6334438296898], [(1, 1, 3), 279.5674146250593], [(2, 0, 0), 306.04313407883416], [(2, 0, 1), 308.17666748216027], [(2, 0, 2), 301.92633200617746], [(2, 0, 3), 328.80301629485814], [(2, 1, 0), 276.3993440333977], [(2, 1, 1), 274.43385091105796], [(2, 1, 2), 273.74589739146916], [(2, 1, 3), 274.233903604533], [(3, 0, 0), 295.62961524895], [(3, 0, 1), 295.0059514552027], [(3, 0, 2), 12.0], [(3, 0, 3), 298.9509761184035], [(3, 1, 0), 274.21722174469625], [(3, 1, 1), 275.7247109776909], [(3, 1, 2), 273.1379713723349], [(3, 1, 3

0it [00:00, ?it/s]

[]
Empty DataFrame
Columns: []
Index: []


0it [00:00, ?it/s]

[]
Empty DataFrame
Columns: []
Index: []
      (p,d,q)         AIC  Series
26  (3, 0, 2)   12.000000  Oregon
30  (3, 1, 2)  273.137971  Oregon
22  (2, 1, 2)  273.745897  Oregon
28  (3, 1, 0)  274.217222  Oregon
23  (2, 1, 3)  274.233904  Oregon
21  (2, 1, 1)  274.433851  Oregon
29  (3, 1, 1)  275.724711  Oregon
20  (2, 1, 0)  276.399344  Oregon
6   (0, 1, 2)  277.768329  Oregon
31  (3, 1, 3)  278.025997  Oregon
7   (0, 1, 3)  278.752266  Oregon
15  (1, 1, 3)  279.567415  Oregon
14  (1, 1, 2)  279.633444  Oregon
5   (0, 1, 1)  279.675345  Oregon
13  (1, 1, 1)  280.660107  Oregon
4   (0, 1, 0)  283.513516  Oregon
12  (1, 1, 0)  285.490723  Oregon
25  (3, 0, 1)  295.005951  Oregon
24  (3, 0, 0)  295.629615  Oregon
10  (1, 0, 2)  297.968780  Oregon
27  (3, 0, 3)  298.950976  Oregon
9   (1, 0, 1)  299.065625  Oregon
18  (2, 0, 2)  301.926332  Oregon
8   (1, 0, 0)  304.185216  Oregon
16  (2, 0, 0)  306.043134  Oregon
17  (2, 0, 1)  308.176667  Oregon
19  (2, 0, 3)  328.803016  Oregon
3   (0,

## Evaluating ARIMA over Multiple Timesteps

In [83]:
arima_multi_step = evaluate_ARIMA_multi_timestep(aggregated_df, best_model, states_of_interest, window = 8, date_col = 'Quarterly_Date')


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.



1    2809.177362
2    2471.890519
3    2862.136682
4    3235.151979
5    3170.044405
6    3306.177411
7    3627.726926
8    3489.876045
Name: predicted_mean, dtype: float64



Non-invertible starting MA parameters found. Using zeros as starting parameters.



1    4561.062042
2    4102.967256
3    5077.976315
4    5525.789884
5    6035.919712
6    5789.179870
7    5772.146650
8    5656.266534
Name: predicted_mean, dtype: float64
1    25574.113755
2    23923.030070
3    27400.426402
4    29703.177312
5    28011.687708
6    29427.002784
7    29735.744469
8    29596.756296
Name: predicted_mean, dtype: float64


In [84]:
# Creating input variables
total_observations = len(aggregated_df[aggregated_df['State'] == states_of_interest[0]])
train_len = total_observations -8
dates = aggregated_df[aggregated_df['State'] == states_of_interest[0]]['Quarterly_Date'].to_list()
model_name = 'Prophet'

# Now evaluating prophet performance
format_and_graph(arima_multi_step, dates,  model_name)

### Now Evaluating NN Forecast

In [None]:

parameters = results_df['(p,d,q)'][0]
input_dict_lstm = evaluate_neural_forecast(df, states_of_interest,1, 0.5)

In [None]:
# Creating input variables
total_observations = len(df[df['State'] == states_of_interest[0]])
train_len = int(total_observations * 0.5)
dates = df[df['State'] == states_of_interest[0]]['Date'].to_list()
model_name = 'LSTM'

# Now evaluating prophet performance
format_and_graph(input_dict_lstm, dates,  model_name)