In this notebook I show how to calculate the confidence interval in an ARIMA model

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

df = pd.read_csv('../input/us-police-shootings/shootings.csv')
df.head()

In [None]:
# convert to time series 

df['date'] = pd.to_datetime(df['date'])

df_ts = df.set_index('date').resample('w')['manner_of_death'].count()

In [None]:
with plt.style.context('fivethirtyeight'):
    df_ts.plot(figsize = (12, 8))
    plt.ylabel('# of Shootings')
    plt.title('Number of Shootings per Week')

In [None]:
len(df_ts)

In [None]:
# split into train and test
train, test = df_ts[:200], df_ts[200:]

# make and fit model
model = ARIMA(train, order=(1,1,1))
model_fit = model.fit()

# get forecast
result = model_fit.get_forecast(len(test))

In [None]:
# look at the confidence interval 
result.conf_int()

In [None]:
# calculate the upper confidence interval for 2018-11-04
result.predicted_mean['2018-11-04'] + (1.96 * result.se_mean['2018-11-04'])

In [None]:
# calculate the lower confidence interval for 2018-1104
result.predicted_mean['2018-11-04'] - (1.96 * result.se_mean['2018-11-04'])

In [None]:
result.conf_int().loc['2018-11-04']

We see that these 2 values are really close to the confidence interval that the ARIMA model produces (within 3 decimal places)