In [None]:
import numpy as np, pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_predict
import matplotlib.pyplot as plt

In [None]:
plt.rcParams.update({'figure.figsize':(9,7), 'figure.dpi':120})

### WHO Data

In [None]:
# Import data
df = pd.read_csv('./who_maternal_mortality.csv')

In [None]:
df= df[df["Country"]=="World"]

In [None]:
df_transform = pd.melt(df, id_vars='Country', value_vars=df.columns[1:]).rename(columns={"variable":"year"})

In [None]:
from statsmodels.tsa.stattools import adfuller
from numpy import log
result = adfuller(df_transform.value.dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
result = adfuller(df_transform.value.dropna(), autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'n_lags: {result[1]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
    print('Critial Values:')
    print(f'   {key}, {value}')  

The p-value is obtained is greater than significance level of 0.05 and the ADF statistic is lower than any of the critical values.

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, axes = plt.subplots(figsize=(10,7))
plt.plot(df_transform.value.dropna());
plt.title('Random');

visualizing the series shows that it is not stationary

In [None]:
# Original Series
fig, axes = plt.subplots(4, 2, sharex=True)
axes[0, 0].plot(df_transform.value); axes[0, 0].set_title('Original Series')
plot_acf(df_transform.value, ax=axes[0, 1])

# 1st Differencing
axes[1, 0].plot(df_transform.value.diff()); axes[1, 0].set_title('1st Order Differencing')
plot_acf(df_transform.value.diff().dropna(), ax=axes[1, 1])

# 2nd Differencing
axes[2, 0].plot(df_transform.value.diff().diff()); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df_transform.value.diff().diff().dropna(), ax=axes[2, 1])


# 3rd Differencing
axes[3, 0].plot(df_transform.value.diff().diff().diff()); axes[3, 0].set_title('3rd Order Differencing')
plot_acf(df_transform.value.diff().diff().diff().dropna(), ax=axes[3, 1])
plt.show()

#### Finding AR Term

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True)
axes[0].plot(df_transform.value.diff().diff().diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim=(0,5))
plot_pacf(df_transform.value.diff().diff().diff().dropna(), ax=axes[1], method='ywm', lags=6)
plt.show()

0 and 1 are above the significant lines, so p can be 1

#### Finding MA Term

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True)
axes[0].plot(df_transform.value.diff().diff().diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim=(0,1.2))
plot_acf(df_transform.value.diff().diff().diff().dropna(), ax=axes[1])

plt.show()

Only 0 is well above the significan line, hence q = 1

In [None]:
df_transform.index = pd.date_range(start='2000', end='2018', freq='A')

In [None]:
# 1,1,1 ARIMA Model (p,d,q)
model = ARIMA(df_transform.value, order=(0,2,0))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
# Plot residual errors
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()

In [None]:
fig, ax = plt.subplots()
ax = df_transform.plot(ax=ax)
ge = plot_predict(model_fit, '2000', '2070', ax=ax)

plt.show()

In [None]:
forecast = model_fit.predict(start='2018', end='2070')
print(forecast)

### IHME Data

In [None]:
# Import data
df = pd.read_csv('./ihme_2022_maternal_mortality.csv')

In [None]:
from statsmodels.tsa.stattools import adfuller
from numpy import log
result = adfuller(df.value.dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

#### Finding Difference Order

In [None]:
# Original Series
fig, axes = plt.subplots(4, 2, sharex=True)
axes[0, 0].plot(df.value); axes[0, 0].set_title('Original Series')
plot_acf(df.value, ax=axes[0, 1])

# 1st Differencing
axes[1, 0].plot(df.value.diff()); axes[1, 0].set_title('1st Order Differencing')
plot_acf(df.value.diff().dropna(), ax=axes[1, 1])

# 2nd Differencing
axes[2, 0].plot(df.value.diff().diff()); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df.value.diff().diff().dropna(), ax=axes[2, 1])


# 3rd Differencing
axes[3, 0].plot(df.value.diff().diff().diff()); axes[3, 0].set_title('3rd Order Differencing')
plot_acf(df.value.diff().diff().diff().dropna(), ax=axes[3, 1])
plt.show()

<b>We can see that the differencing of order 2 is helping us make the series stationary, so let's choose d = 2.</b>

#### Finding AR Term

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True)
axes[0].plot(df.value.diff().diff()); axes[0].set_title('2nd Differencing')
axes[1].set(ylim=(0,5))
plot_pacf(df.value.diff().diff().dropna(), ax=axes[1], method='ywm', lags=10)
plt.show()

so,  p = 0.

#### Finding MA Term

In [None]:
fig, axes = plt.subplots(1, 2, sharex=True)
axes[0].plot(df.value.diff().diff()); axes[0].set_title('2nd Differencing')
axes[1].set(ylim=(0,1.2))
plot_acf(df.value.diff().diff().dropna(), ax=axes[1])

plt.show()

i chose the MA term q = 0.

In [None]:
df.index = pd.date_range(start='1990', end='2031', freq='A')

In [None]:
# 1,1,1 ARIMA Model
model = ARIMA(df.value, order=(0,2,0))
model_fit = model.fit()
print(model_fit.summary())

In [None]:
# Plot residual errors
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()

In [None]:
fig, ax = plt.subplots()
ax = df[["value"]].plot(ax=ax)
ge = plot_predict(model_fit, '1990', '2060', ax=ax)

plt.show()

In [None]:
forecast = model_fit.predict(start='2031', end='2070')
print(forecast)

In [None]:
%%capture
!jupyter nbconvert --to html --no-input --no-prompt forecast_maternal_mortality_rate.ipynb