In [10]:
# import all the necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [11]:
# load the dataset
data = pd.read_csv('female-births.csv', index_col = 'Date', parse_dates = True)
data.index.freq = 'D'

In [12]:
print(data)

            Births
Date              
1959-01-01      35
1959-01-02      32
1959-01-03      30
1959-01-04      31
1959-01-05      44
...            ...
1959-12-27      37
1959-12-28      52
1959-12-29      48
1959-12-30      55
1959-12-31      50

[365 rows x 1 columns]


In [13]:
# display all the indexes of the dataset
print(data.index)

DatetimeIndex(['1959-01-01', '1959-01-02', '1959-01-03', '1959-01-04',
               '1959-01-05', '1959-01-06', '1959-01-07', '1959-01-08',
               '1959-01-09', '1959-01-10',
               ...
               '1959-12-22', '1959-12-23', '1959-12-24', '1959-12-25',
               '1959-12-26', '1959-12-27', '1959-12-28', '1959-12-29',
               '1959-12-30', '1959-12-31'],
              dtype='datetime64[ns]', name='Date', length=365, freq='D')


In [14]:
# display the first five rows of the dataset
print(data.head())

            Births
Date              
1959-01-01      35
1959-01-02      32
1959-01-03      30
1959-01-04      31
1959-01-05      44


In [15]:
# size of the dataset
print(data.shape)

(365, 1)


In [16]:
# To check if any NULL Values are present in the dataset
print(data.isnull().sum())

Births    0
dtype: int64


# So from the above observation, it is clear that there are no NULL Values in the dataset

In [19]:
# ignore all unnecessary warnings
import warnings
warnings.filterwarnings("ignore")

# AUTOARIMA FOR FEMALE BIRTHS

In [21]:
from pmdarima import auto_arima

In [22]:
stepwise_auto = auto_arima(data['Births'],
                          start_p = 1, start_q = 1, max_p = 3, max_q = 3, m = 12,
                          start_P = 0, seasonal = True,
                          d = None, D = 1, trace = True,
                          error_action = 'ignore',
                          suppress_warnings = True,
                          stepwise = True)

Performing stepwise search to minimize aic
 ARIMA(1,0,1)(0,1,1)[12] intercept   : AIC=inf, Time=1.36 sec
 ARIMA(0,0,0)(0,1,0)[12] intercept   : AIC=2662.069, Time=0.03 sec
 ARIMA(1,0,0)(1,1,0)[12] intercept   : AIC=2530.513, Time=0.29 sec
 ARIMA(0,0,1)(0,1,1)[12] intercept   : AIC=inf, Time=0.83 sec
 ARIMA(0,0,0)(0,1,0)[12]             : AIC=2660.366, Time=0.03 sec
 ARIMA(1,0,0)(0,1,0)[12] intercept   : AIC=2653.014, Time=0.07 sec
 ARIMA(1,0,0)(2,1,0)[12] intercept   : AIC=2483.445, Time=1.81 sec
 ARIMA(1,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=1.85 sec
 ARIMA(1,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=0.63 sec
 ARIMA(0,0,0)(2,1,0)[12] intercept   : AIC=2492.040, Time=2.28 sec
 ARIMA(2,0,0)(2,1,0)[12] intercept   : AIC=2484.558, Time=1.26 sec
 ARIMA(1,0,1)(2,1,0)[12] intercept   : AIC=2484.848, Time=1.47 sec
 ARIMA(0,0,1)(2,1,0)[12] intercept   : AIC=2484.730, Time=1.72 sec
 ARIMA(2,0,1)(2,1,0)[12] intercept   : AIC=2486.114, Time=2.97 sec
 ARIMA(1,0,0)(2,1,0)[12]             : 

In [23]:
# give the summary for the arima model
stepwise_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,365.0
Model:,"SARIMAX(2, 0, 0)x(0, 1, [1, 2], 12)",Log Likelihood,-1210.703
Date:,"Fri, 30 Jun 2023",AIC,2431.405
Time:,17:16:58,BIC,2450.738
Sample:,01-01-1959,HQIC,2439.098
,- 12-31-1959,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.1754,0.057,3.094,0.002,0.064,0.287
ar.L2,0.1035,0.056,1.850,0.064,-0.006,0.213
ma.S.L12,-0.9951,0.055,-18.062,0.000,-1.103,-0.887
ma.S.L24,0.1237,0.051,2.423,0.015,0.024,0.224
sigma2,52.8926,3.387,15.616,0.000,46.254,59.531

0,1,2,3
Ljung-Box (L1) (Q):,0.1,Jarque-Bera (JB):,18.53
Prob(Q):,0.76,Prob(JB):,0.0
Heteroskedasticity (H):,1.03,Skew:,0.28
Prob(H) (two-sided):,0.87,Kurtosis:,3.97


# From the above observation, we can see that the d value is 0 which means no differencing is needed and this model is a stationary model