In [73]:
import numpy as np
from scipy import optimize as opt, linalg as la
from scipy.stats import norm
from matplotlib import pyplot as plt
plt.style.use('ggplot')

In [2]:
time_series_a = np.loadtxt('time_series_a.txt')
time_series_b = np.loadtxt('time_series_b.txt')
time_series_c = np.loadtxt('time_series_c.txt')

# Problem 1

In [78]:
def arma_likelihood(time_series, phis=np.array([]), 
                    thetas=np.array([]), mu=0., sigma=1.):
    """
    Return the log-likelihood of the ARMA model parameters, given the time
    series.
    
    Parameters
    ----------
    time_series : ndarray of shape (n,1)
        The time series in question, z_t
    phis : ndarray of shape (p,)
        The phi parameters
    thetas : ndarray of shape (q,)
        The theta parameters
    mu : float
        The parameter mu
    sigma : float
        The standard deviation of the a_t random variables
    
    Returns
    -------
    log_likelihood : float
        The log-likelihood of the model
    """
    z = time_series - mu
    
    # get parameters for the sizes of everything
    p = phis.shape[0]
    q = thetas.shape[0]
    r = np.max([p,q+1])
    d = z.ndim
    
    if phis.size>0:
        phis = np.concatenate((phis,np.zeros(r-p)))
    if thetas.size>0:
        thetas = np.concatenate((thetas,np.zeros(r-1-q)))
    
    # set up necessary matrices
    bottom = np.concatenate((np.eye(r-1),np.zeros(r-1).reshape((r-1,1))),
                            axis=1) # this is the bottom of the F matrix
    F = np.concatenate((phis.reshape((1,r)),bottom),axis=0)
    H = np.concatenate(([1],thetas)).reshape((1,r))
    Q = np.zeros((r,r))
    Q[0,0] = sigma**2
    #print(F)
    #print(H)
    #print(Q)
    
    # 
    xkp = np.zeros(r)
    flatPkp = la.solve(np.eye(r**2) - np.kron(F,F), Q.flatten())
    Pkp = flatPkp.reshape((r,r))
    
    loglikelihood = 0
    
    for k in range(0,len(z)):
        # Update
        yk = z[k] - H.dot(xkp)
        Sk = H.dot(Pkp.dot(H.T))
        Kk = la.solve(Sk.T, H.dot(Pkp.T)).T
        
        loglikelihood += norm.logpdf(z[k], H.dot(xkp), np.sqrt(H.dot(Pkp.dot(H.T))))
        
        xku = xkp + Kk.dot(yk)
        Pku = (np.eye(r) - Kk.dot(H)).dot(Pkp)

        # Predict
        xkp = F.dot(xku)
        Pkp = F.dot(Pku.dot(F.T)) + Q

    return loglikelihood

In [79]:
arma_likelihood(time_series_a, np.array([0.9]), mu=17., sigma=.4)

array([[-77.60354497]])

# Problem 2

In [84]:
def arma_fit(time_series):
    """
    Return the ARMA model that minimizes AICc for the given time series,
    subject to p,q <= 3.
    
    Parameters
    ----------
    time_series : ndarray of shape (n,1)
        The time series in question, z_t
        
    Returns
    -------
    phis : ndarray of shape (p,)
        The phi parameters
    thetas : ndarray of shape (q,)
        The theta parameters
    mu : float
        The parameter mu
    sigma : float
        The standard deviation of the a_t random variables
    """
    n = time_series.shape[0]
    pstar, qstar = -1, -1
    min_aicc = 9999
    for p in range(1,4):
        for q in range(1,4):            
            def f(x):
                return -1*arma_likelihood(time_series, phis=x[:p], 
                                          thetas=x[p:p+q], 
                                          mu=x[-2], sigma=x[-1])
            k = p + q + 2
            x0 = np.zeros(p+q+2)
            x0[-2] = time_series.mean()
            x0[-1] = time_series.std()
            sol = opt.fmin(f,x0,maxiter=10000,maxfun=10000, 
                           disp=False, retall=False)
            aicc = 2*k*(1 + (k+1)/(n-k)) + 2*f(sol)
            #print(p,q,aicc)
            if aicc < min_aicc:
                parameters = sol
                pstar = p
                qstar = q
                min_aicc = aicc
    return (parameters[:pstar], 
            parameters[pstar:pstar+qstar], 
            parameters[-2], 
            parameters[-1])

In [None]:
paramsA = arma_fit(time_series_a)
print(('Time Series A:\np - {}\n'+
       'q - {}\nmu - {}\nsigma - {}').format(paramsA[0], paramsA[1],
                                             paramsA[2], paramsA[3]))

In [None]:
paramsB = arma_fit(time_series_b)
print(('Time Series B:\np - {}\n'+
       'q - {}\nmu - {}\nsigma - {}').format(paramsB[0], paramsB[1],
                                             paramsB[2], paramsB[3]))



In [None]:
paramsC = arma_fit(time_series_c)
print(('Time Series C:\np - {}\n'+
       'q - {}\nmu - {}\nsigma - {}').format(paramsC[0], paramsC[1],
                                             paramsC[2], paramsC[3]))

# Problem 4

In [None]:
def arma_forecast(time_series, phis=array([]), thetas=array([]), mu=0., 
                  sigma=1., future_periods=20):
    """
    Return forecasts for a time series modeled with the given ARMA model.
    
    Parameters
    ----------
    time_series : ndarray of shape (n,1)
        The time series in question, z_t
    phis : ndarray of shape (p,)
        The phi parameters
    thetas : ndarray of shape (q,)
        The theta parameters
    mu : float
        The parameter mu
    sigma : float
        The standard deviation of the a_t random variables
    future_periods : int
        The number of future periods to return
        
    Returns
    -------
    e_vals : ndarray of shape (future_periods,)
        The expected values of z for times n+1, ..., n+future_periods
    sigs : ndarray of shape (future_periods,)
        The standard deviations of z for times n+1, ..., n+future_periods
    """
    