In [13]:
import matplotlib.pyplot as plt
import autograd.numpy as np
import autograd.scipy.stats as sps_autograd
from autograd import grad, hessian
from statsmodels.tsa.arima_process import ArmaProcess
from scipy.optimize import minimize
from scipy.linalg import toeplitz
import pandas as pd

In [None]:
"""
Simulate ARMA(1, 1) model
"""
a = 0.5
b = 0.2
# Define AR and MA coefficients
ar = np.array([1, -a])  
ma = np.array([1, -b])        

# Create ARMA process object
arma_process = ArmaProcess(ar, ma)

# Simulate 500 samples
N = 10000
y = arma_process.generate_sample(nsample=N, scale=2) # scale is the variance of the white noise


# Initialize the parameters
theta = np.array([a, b])
init_sigma2 = 100
obj, sigma2_hat = auto_grad(theta, init_sigma2)  

np.sqrt(sigma2_hat)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 1)

In [None]:
def auto_grad(theta, init_sigma2):
    a, b = theta
    k = 2
    sigma2 = init_sigma2  # CHANGE PARAMETER HERE

    # Simple operations that autograd can handle
    F = np.array([[a, 1.0], [0.0, 0.0]])
    G = np.array([[1.0], [-b]])
    H = np.array([[1.0, 0.0]])
    # Q = np.eye(2)
    dF = np.array([
        [[1, 0], [0, 0]],  # First parameter (AR coefficient)
        [[0, 0], [0, 0]]   # Second parameter (MA coefficient)
        ])
    dF.reshape(2, 2, 2)
    dG = np.array([
        [[0], [0]],  # First parameter (AR coefficient)
        [[0], [-1.0]]   # Second parameter (MA coefficient)
        ])
    dG.reshape(2, 2, 1)

    g = np.array([1.0, a - b, a * (a - b)])

    C = np.array([
        sigma2 * (1 - 2 * a * b + b**2) / (1 - a**2),
        a * (sigma2 * (1 - 2 * a * b + b**2) / (1 - a**2)) - sigma2 * b,
        a * (a * (sigma2 * (1 - 2 * a * b + b**2) / (1 - a**2)) - sigma2 * b)
    ])

    V = np.array([
        [C[0], -b * g[0]],
        [-b * g[0], b**2 * sigma2]
    ])

    dV = np.array([
        [[(2 * sigma2 * (a-b) * (1 - a*b)) / (1 - a**2)**2, 0],
        [0, 0]],
        [[2 * sigma2 * (b-a) / (1 - a**2), -1],
        [-1, 2 * b * sigma2]]
    ])

    # Initialize the x and dx
    x = np.zeros((k, 1))
    dx = np.zeros((2, 2, 1))

    # Dictionary to store the values
    dict = {
        'analytical_grad': [],
        'log_likelihood': []
    }

    sigma2_hat_sum = 0.0
    dsigma2_hat_sum = np.array([[0.0], [0.0]]).reshape(2, 1, 1)
    # 
    for t in range(N):
        # Set the current sample size
        n = t + 1
        

        # 1. Predict
        # Predict one-step-ahead state predictive density of x_{t}
        x_predict = F @ x
        V_predict = F @ V @ F.T + G @ G.T

        # Compute forecast error and one-step-ahead predictive variance
        e_t = y[t] - (H @ x_predict)[0, 0]
        r_t = (H @ V_predict @ H.T)[0, 0]


        GdGT = np.array([G @ dG.T[0][i].reshape(1,2) for i in range(2)])

        # Kalman filter for gradient
        dx_predict = F @ dx + dF @ x
        dV_predict = F @ dV @ F.T + dF @ V @ F.T + F @ V @ dF.T + dG @ G.T + GdGT

        # Calculate de_t and dr_t as tensor(2,1,1)
        de_t = -H @ dx_predict
        dr_t = H @ dV_predict @ H.T

        # Update sigma2 hat and gradient of sigma2 hat
        sigma2_hat_sum += e_t**2 / r_t
        sigma2_hat = sigma2_hat_sum / n
        dsigma2_hat_sum += (2 * e_t * de_t) /r_t - (e_t**2 * dr_t) / (r_t**2)
        dsigma2_hat = dsigma2_hat_sum / n


        # 2. Update
        # Kalman gain
        K = V_predict @ H.T / r_t

        # Update current state and covariance
        x = x_predict + K * e_t
        V = (np.eye(k) - K @ H) @ V_predict

        dK = (dV_predict @ H.T / r_t) - (V_predict @ H.T / r_t**2) @ dr_t
        dx = dx_predict + K @ de_t + dK * e_t
        dV = dV_predict - dK @ H @ V_predict - K @ H @ dV_predict
        
        # Compute sigma2_hat and gradient of the log-likelihood
        log_likelihood = -0.5 * (np.log(2 * np.pi) 
                                + np.log(sigma2_hat) 
                                + np.log(r_t) + e_t**2 / (r_t * sigma2_hat))  
        
        """
        analytical_grad = - (e_t * de_t) / (sigma2_hat * r_t) \
                                + (e_t**2 * dr_t) / (2 * sigma2_hat * r_t**2) \
                                - dr_t / (2 * r_t)
        """
        
        analytical_grad = - 0.5 * (dsigma2_hat / sigma2_hat 
                                    + dr_t / r_t 
                                    + (2 * e_t * sigma2_hat * r_t * de_t - e_t**2 * r_t * dsigma2_hat - e_t**2 * sigma2_hat * dr_t) / (sigma2_hat**2 * r_t**2)
                                    )
        

        dict['analytical_grad'].append(analytical_grad.flatten())
        dict['log_likelihood'].append(log_likelihood)

    # Return the dictionary
    return dict, sigma2_hat



In [71]:
# Initialize the parameters
theta = np.array([-0.2, 0.1])
init_sigma2 = 0.5
obj, sigma2_hat = auto_grad(theta, init_sigma2)  

# Dataframe to store the results
df = pd.DataFrame(columns=['t', 'analytical_grad', 'autograd', 'equal'])

for t in range(N):
    # Analytical gradient
    analytical_grad = obj['analytical_grad'][t]

    # Autograd
    obj_func_likelihood = lambda param: auto_grad(param, init_sigma2)[0]['log_likelihood'][t]
    grad_obj_func_likelihood = grad(obj_func_likelihood)
    auto_grad_val = grad_obj_func_likelihood(theta)

    # Store the results
    df.loc[t, 't'] = t
    df.loc[t, 'analytical_grad'] = analytical_grad
    df.loc[t, 'autograd'] = auto_grad_val
    df.loc[t, 'equal'] = np.allclose(analytical_grad, auto_grad_val)

# Check the results
print(sum(df['equal']) == N)
df.tail()


True


Unnamed: 0,t,analytical_grad,autograd,equal
95,95,"[0.49507189378486227, -0.5522801086596363]","[0.49507189378486244, -0.5522801086596362]",True
96,96,"[0.5969777415824398, -0.6797712832811074]","[0.5969777415824399, -0.6797712832811076]",True
97,97,"[0.0995772364982111, -0.16071296360609078]","[0.09957723649821103, -0.16071296360609078]",True
98,98,"[0.00413524151394562, -0.06638362821102806]","[0.004135241513945483, -0.06638362821102811]",True
99,99,"[-0.9431008807668196, 0.6206769294591853]","[-0.9431008807668194, 0.6206769294591852]",True


In [57]:
# Initialize the parameters
theta = np.array([a, b])
init_sigma2 = 1
obj, sigma2_hat = auto_grad(theta, init_sigma2)  

np.sqrt(sigma2_hat)

np.float64(1.996055280378177)

In [118]:
np.eye(2)

array([[1., 0.],
       [0., 1.]])