In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import sys
import scipy 
from scipy.stats import ortho_group
import pdb
import matplotlib.pyplot as plt
from glob import glob
import importlib

In [3]:
from statsmodels import tsa

In [4]:
sys.path.append('../..')

In [5]:
from neurosim.models.ssr import StateSpaceRealization as SSR
from subspaces import estimate_autocorrelation, SubspaceIdentification, IteratedStableEstimator

### Model Order Identification

In [6]:
from tqdm import tqdm
from sklearn.model_selection import KFold
from pyuoi.linear_model.var import VAR, _form_var_problem

In [38]:
state_dim = 20
obs_dim = 10

model_reps = 10
trajectory_reps = 10

N = [int(5e2), int(1e3), int(5e3), int(1e4), int(5e4)]
# Test (1) How the different model selection criteria perform in MOE identification 
# (2) Whether forward vs. reverse time estimates give systematically better predictions
# (3) OLS vs. Ridge vs. IteratedStability in terms of fit to cross-correlation matrices

n_folds = 5

results = []

for i in tqdm(range(model_reps)):
    A = np.random.normal(scale=1/(1.7 * np.sqrt(state_dim)), size=(state_dim, state_dim))
    while max(np.abs(np.linalg.eigvals(A))) > 0.99:
        A = np.random.normal(scale=1/(1.7 * np.sqrt(state_dim)), size=(state_dim, state_dim))

    C = scipy.stats.ortho_group.rvs(state_dim)[:, 0:obs_dim].T
    ssr = SSR(A=A, B=np.eye(A.shape[0]), C=C)
    ccm0 = ssr.autocorrelation(5)
    for j, n in tqdm(enumerate(N)):
        for k in range(trajectory_reps):
            y = ssr.trajectory(n)
            fold_idx = 0
            for train_idxs, test_idxs in KFold(5).split(np.arange(y.shape[0])):
                ccm1 = estimate_autocorrelation(y[train_idxs], 5)
                ccm2 = estimate_autocorrelation(y[test_idxs], 5)

                result = {}
                result['fold_idx'] = fold_idx
                result['N'] = n
                result['trajectory_rep'] = k
                result['model_rep'] = i
                result['true_params'] = (A, C)
                result['autocorr_true'] = ccm0
                result['autocorr_train'] = ccm1
                result['autocorr_test'] = ccm2


                for var_order in [1, 2, 3]:
                    varmodel = VAR(order=var_order, estimator='ols')
                    varmodel.fit(y)
                    result['coef'] = varmodel.coef_

                for model_order in np.arange(15, 27, 2):
                    ssid = SubspaceIdentification(T=3, estimator=IteratedStableEstimator)

                    A, C, Cbar, L0, Q, R, S = ssid.identify(y[train_idxs], order=model_order)

                    result['coef'] = (A, C, Cbar, L0, Q, R, S)
                    results.append(result)

  0%|          | 0/10 [00:00<?, ?it/s]
Rows of VAR matrix processed: 10it [00:00, 3271.43it/s]

Rows of VAR matrix processed: 10it [00:00, 2415.38it/s]

Rows of VAR matrix processed: 10it [00:00, 1923.29it/s]

Rows of VAR matrix processed: 10it [00:00, 2799.75it/s]

Rows of VAR matrix processed: 10it [00:00, 2685.04it/s]

Rows of VAR matrix processed: 10it [00:00, 2246.43it/s]

Rows of VAR matrix processed: 10it [00:00, 3977.91it/s]

Rows of VAR matrix processed: 10it [00:00, 2447.80it/s]

Rows of VAR matrix processed: 10it [00:00, 2033.01it/s]

Rows of VAR matrix processed: 10it [00:00, 3005.59it/s]

Rows of VAR matrix processed: 10it [00:00, 2744.07it/s]

Rows of VAR matrix processed: 10it [00:00, 2275.93it/s]

Rows of VAR matrix processed: 10it [00:00, 3324.59it/s]

Rows of VAR matrix processed: 10it [00:00, 2639.75it/s]

Rows of VAR matrix processed: 10it [00:00, 2190.01it/s]

Rows of VAR matrix processed: 10it [00:00, 3731.26it/s]

Rows of VAR matrix processed: 10it [00:00, 2762.3

KeyboardInterrupt: 

In [7]:
from em import StableStateSpaceML
from statsmodels.tsa.statespace.varmax import VARMAX
import time

In [8]:
state_dim = 20
obs_dim = 10

model_reps = 1
trajectory_reps = 10

N = [int int(5e3)]
# Test (1) How the different model selection criteria perform in MOE identification 
# (2) Whether forward vs. reverse time estimates give systematically better predictions
# (3) OLS vs. Ridge vs. IteratedStability in terms of fit to cross-correlation matrices

n_folds = 5

results = []

for i in tqdm(range(model_reps)):
    A = np.random.normal(scale=1/(1.7 * np.sqrt(state_dim)), size=(state_dim, state_dim))
    while max(np.abs(np.linalg.eigvals(A))) > 0.99:
        A = np.random.normal(scale=1/(1.7 * np.sqrt(state_dim)), size=(state_dim, state_dim))

    C = scipy.stats.ortho_group.rvs(state_dim)[:, 0:obs_dim].T
    ssr = SSR(A=A, B=np.eye(A.shape[0]), C=C)
    ccm0 = ssr.autocorrelation(5)
    for j, n in tqdm(enumerate(N)):
        for k in range(trajectory_reps):
            y = ssr.trajectory(n)
            fold_idx = 0
            for train_idxs, test_idxs in KFold(5).split(np.arange(y.shape[0])):
                ccm1 = estimate_autocorrelation(y[train_idxs], 5)
                ccm2 = estimate_autocorrelation(y[test_idxs], 5)

                result = {}
                result['fold_idx'] = fold_idx
                result['N'] = n
                result['trajectory_rep'] = k
                result['model_rep'] = i
                result['true_params'] = (A, C)
                result['autocorr_true'] = ccm0
                result['autocorr_train'] = ccm1
                result['autocorr_test'] = ccm2

                for model_order in range(5, 10):
                    result['order'] = model_order
                    try:   
                        t0 = time.time()                 
                        ssm = StableStateSpaceML(max_iter=50)
                        ssm.fit(y[train_idxs], state_dim=model_order)
                        result['coef'] = (ssm.A, ssm.C, ssm.R, ssm.x0, ssm.Sigma0)
                        print(time.time() - t0)
                    except:
                        result['coef'] = 'Fitting failed'
                    results.append(result)

  0%|          | 0/1 [00:00<?, ?it/s]

E step: 1.354347
M step: 0.147017
E step: 1.374358
M step: 0.141715
E step: 1.329966
M step: 0.105737
E step: 1.337321
M step: 0.107113
E step: 1.340536
M step: 0.099195
E step: 1.358083
M step: 0.098696
E step: 1.358243
M step: 0.145554
E step: 1.392288
M step: 0.103665
E step: 1.369696
M step: 0.098907
E step: 1.372622
M step: 0.107252
E step: 1.378311
M step: 0.132406
E step: 1.300604
M step: 0.148490
E step: 1.374848
M step: 0.105130
E step: 1.351195
M step: 0.126753
E step: 1.347877
M step: 0.105191
E step: 1.423412
M step: 0.100155


In [None]:
state_dim = 20
obs_dim = 10

model_reps = 10
trajectory_reps = 10

N = [int(5e2), int(1e3), int(5e3), int(1e4), int(5e4)]
# Test (1) How the different model selection criteria perform in MOE identification 
# (2) Whether forward vs. reverse time estimates give systematically better predictions
# (3) OLS vs. Ridge vs. IteratedStability in terms of fit to cross-correlation matrices

n_folds = 5

results_varma = []

for i in tqdm(range(model_reps)):
    A = np.random.normal(scale=1/(1.7 * np.sqrt(state_dim)), size=(state_dim, state_dim))
    while max(np.abs(np.linalg.eigvals(A))) > 0.99:
        A = np.random.normal(scale=1/(1.7 * np.sqrt(state_dim)), size=(state_dim, state_dim))

    C = scipy.stats.ortho_group.rvs(state_dim)[:, 0:obs_dim].T
    ssr = SSR(A=A, B=np.eye(A.shape[0]), C=C)
    ccm0 = ssr.autocorrelation(5)
    for j, n in tqdm(enumerate(N)):
        for k in range(trajectory_reps):
            y = ssr.trajectory(n)
            fold_idx = 0
            for train_idxs, test_idxs in KFold(5).split(np.arange(y.shape[0])):
                ccm1 = estimate_autocorrelation(y[train_idxs], 5)
                ccm2 = estimate_autocorrelation(y[test_idxs], 5)

                result = {}
                result['fold_idx'] = fold_idx
                result['N'] = n
                result['trajectory_rep'] = k
                result['model_rep'] = i
                result['true_params'] = (A, C)
                result['autocorr_true'] = ccm0
                result['autocorr_train'] = ccm1
                result['autocorr_test'] = ccm2

                for order_tup in [(1, 1), (1, 2), (2, 1), (2, 2)]:
                    result['order'] = order_tup

                    try:
                        varmamodel = VARMAX(endog=y[train_idxs], order=order_tup).fit(maxiter=50)
                        result['coef'] = (varmamodel.coefficient_matrices_var, varmamodel.coefficient_matrices_vma)
                    except:
                        result['coef'] = 'Fit failed'                
                    results_varma.append(result)



RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =          265     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.50258D+01    |proj g|=  1.01948D-01


 This problem is unconstrained.



           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
  265      2      4      1     0     0   5.162D-02   1.497D+01
  F =   14.967203114235780     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 


 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =          365     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.47839D+01    |proj g|=  1.45992D-01

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
  365      2      4      1     0     0   4.043D-02   1.472D+01
  F =   14.715014917451599     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 


 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =          365     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.47251D+01    |proj g|=  5.96925D-02


0it [00:14, ?it/s]
  0%|          | 0/10 [00:14<?, ?it/s]


KeyboardInterrupt: 