# Time series demo 2: mystery AR(p) model on synthetic data

**Guest lecture**

Columbia IEOR 4729 : _Model Based Trading: Theory and Practice_

Q McCallum (http://qethanm.cc)

In [None]:
import numpy as np
import statsmodels.api as sm
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
## %matplotlib inline

In [None]:
## Not setting random.seed() this time, to keep this sample honest.
np.random.seed( 4729 )

In [None]:
## We'll build our AR model from one of the following sets of parameters.
## ("Which parameters?" We'll choose them at random, below.)

possible_ar_parameters = [
    np.array( [ 0.75 , -0.25 ] ) ,
    np.array( [ 0.50 , 0.35 ] ) ,
    np.array( [ 0.50 , -0.35 ] ) ,
    np.array( [ 0.75 , -0.25 , 0.15 ] ) ,
    np.array( [ 0.75 , -0.55 , -0.25 , 0.15 ] ) ,
    np.array( [ 0.75 , 0.55 , -0.60 ] ) ,
    np.array( [ 0.60 , -0.50 , -0.4 ] ) ,
    np.array( [ 0.50 , 0.35 ] ) ,
]
    

def get_mystery_parameters():
    ## choose the list index a random
    list_index = np.random.randint( low=0 , high=len( possible_ar_parameters ) )
    
    ## pull the the AR model parameters, based on the list index
    result = possible_ar_parameters[ list_index ]
    
    return result

In [None]:
ar_parameters_mystery = get_mystery_parameters()

## just for demo purposes, so we can see some sample parameters:
print( ar_parameters_mystery )

In [None]:
## Doing it for real this time.  We won't print the parameters
## just yet, to keep ourselves honest.

ar_parameters_mystery = get_mystery_parameters()

In [None]:
## Uncomment these for debugging:
## print( ar_p_mystery )
## print( ar_parameters_mystery )

In [None]:
## remember, we can't just pass in our mystery AR model parameters as-is;
## we need to first pass in a 1 and then the _negative_ of the AR model 
## parameters:

armaprocess_params_ar = [ 1 ]
armaprocess_params_ar.extend( 0 - ar_parameters_mystery )

In [None]:
## still passing in `[1]` for the MA parameters because we still know that
## this is _some_ kind of autoregressive model.  In a real-world situation,
## we may not have such a luxury.

process_ar_mystery = sm.tsa.ArmaProcess(
    ar = armaprocess_params_ar ,
    ma = [ 1 ]
)

In [None]:
y_ar_mystery = process_ar_mystery.generate_sample(
    500 ,
    burnin = 100
)

In [None]:
## take a quick look at the values
y_ar_mystery[:10]

In [None]:
## Well, how does the raw data look?
_ = pd.Series( y_ar_mystery ).plot(
    title = "AR(?) data" ,
    figsize = ( 20 , 6 )
)

In [None]:
## What do our diagnostics plots tell us?
_ = sm.graphics.tsa.plot_acf( y_ar_mystery )
_ = sm.graphics.tsa.plot_pacf( y_ar_mystery )


In [None]:
ar_p_to_try = [
    (5,0) ,
    (4,0) ,
    (3,0) ,
    (2,0) ,
    (1,0) ,
]

param_search_results = []

print( "(Remember: lowest AIC wins)" )

for ar_p in ar_p_to_try :
    print( "trying parameters: {}".format( ar_p ) )
    try:
        model_testing = sm.tsa.ARMA( y_ar_mystery , ar_p ).fit( trend="nc" , disp=0 )

        print( "model params: {}".format( model_testing.params ) )
        print( "AIC:     {}".format( model_testing.aic ) )
        print( "BIC:     {}".format( model_testing.bic ) )
    except Exception as e:
        print( "problem testing model: {}".format( e ) )
    print()

In [None]:
## based on the above:
testing_params = ( 2 , 0 )

fit_ar_mystery = sm.tsa.ARMA( y_ar_mystery , testing_params ).fit( trend="nc" , disp=0 )

In [None]:
fit_ar_mystery.summary()

So ... what kind of model was this, really?

In [None]:
print( ar_parameters_mystery )

In [None]:
_ = pd.DataFrame(
    {
        "y_ar"   : y_ar_mystery , 
        "model"  : fit_ar_mystery.fittedvalues
    }
).plot(
    title = "AR(?) series: reality (y_ar) vs prediction (model)" ,
    figsize = ( 20 , 6 )
)

In [None]:
## Remember to check your model's residuals, too: do they look like white noise?
_ = sm.graphics.tsa.plot_acf( fit_ar_mystery.resid )
_ = sm.graphics.tsa.plot_pacf( fit_ar_mystery.resid )