In [None]:
%matplotlib inline
import os
os.chdir("/home/mackelab/Desktop/Projects/Stitching/code/pyRRHDLDS/core")
import ssm_scripts
import ssm_fit

from subtracking import Grouse, calc_subspace_proj_error
from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid.icml_scripts import run_default
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import gen_data, gen_pars, draw_data 

import time
import scipy as sp
from scipy import stats
from scipy import linalg as la
import numpy as np
import matplotlib.pyplot as plt
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import draw_data
from ssm_scripts import setup_fit_lds

def principal_angle(A, B):
    "A and B must be column-orthogonal."    
    A = np.atleast_2d(A).T if (A.ndim<2) else A
    B = np.atleast_2d(B).T if (B.ndim<2) else B
    A = la.orth(A)
    B = la.orth(B)
    svd = la.svd(A.T.dot(B))
    return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


ps = np.array([1e2, 1e3, 1e4, 1e5],dtype=int)
data_path = '/home/mackelab/Desktop/Projects/Stitching/results/icml_e2/'

dtype=np.float32
mmap, verbose = False, True
whiten = False

###################################
# select simulation setup here !  #
i = 1                             #
p,n,T = ps[i],10,30000            #
snr = (1.0, 1.0)                  #
###################################

max_iter_EM = 100

nr = 0 # number of real eigenvalues
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.95, 0.99, 0.95, 0.99
ev_r = np.linspace(eig_m_r, eig_M_r, nr)


data_path = data_path + 'p' + str(p) + '/'
for rnd_seed in range(20,30):

    np.random.seed(rnd_seed)
    nc, nc_u = n - nr, (n - nr)//2
    ev_c = np.exp(2 * 1j * np.pi * np.random.vonmises(mu=0, kappa=1000, size=nc_u))
    ev_c = np.linspace(eig_m_c, eig_M_c, (n - nr)//2) * ev_c
    pars_true = gen_pars(p,n, nr, ev_r, ev_c, snr, whiten, dtype=dtype)
    pars_true['d'], pars_true['mu0'], pars_true['V0'] = np.zeros(p), np.zeros(n), pars_true['Pi'].copy()
    pars_true['C'] = la.orth(pars_true['C']) * np.sqrt(p) / np.sqrt(n)
    pars_true['R'] = np.asarray(np.random.uniform(size=p, low=snr[0], high=snr[1]), dtype=dtype)
    x,y = draw_data(pars_true,T, dtype=dtype)
    y -= y.mean(axis=0)

    idx_a = np.sort(np.random.choice(p, 1000, replace=False)) if p > 1000 else np.arange(p)
    idx_b = idx_a.copy()

    sso = True
    obs_scheme = ObservationScheme(p=p, T=T, 
                                    sub_pops= (np.arange(0,int(0.55*p)),np.arange(int(0.45*p),p)), 
                                    obs_pops=(0,1), 
                                    obs_time=(T//2,T))
    obs_scheme.comp_subpop_stats()    

    plt.subplot(1,2,1)
    plt.imshow(pars_true['A'], interpolation='None')
    plt.colorbar()
    plt.subplot(1,2,2)
    plt.imshow(np.corrcoef(x.T), interpolation='None')
    del x
    plt.colorbar()
    plt.show()
    
    # start fitting    
    rnd_seed_fit = np.random.get_state()    
    pars_init={
        'C' : np.asarray(la.orth(np.random.normal(size=(p,n))),dtype=dtype) * np.sqrt(p) / np.sqrt(n),
    }

    # settings for GROUSE
    a_grouse = 0.1
    tracker = Grouse(p, n, a_grouse )
    tracker.U = pars_init['C'].copy()
    max_epoch_size = 100
    max_iter_grouse = 300
    get_obs = obs_scheme.gen_get_observed()

    # fit GROUSE
    print('\n - GROUSE')
    tracker.step = a_grouse
    ct = 1.
    error = np.zeros((max_iter_grouse, n+1))
    t = time.time()

    for i in range(max_iter_grouse):
        if np.mod(i,max_iter_grouse//10) == 0:
            print('finished % ' + str((100*i)//max_iter_grouse))
        idx = np.random.permutation(T)
        idx = idx[:max_epoch_size] if len(idx) > max_epoch_size else idx
        for j in range(len(idx)):
            obs_idx =  np.zeros((p,1), dtype=bool)
            obs_idx[get_obs(idx[j])] = True
            tracker.consume(y[idx[j],:].reshape(-1,1), obs_idx)
            ct += 1     
            tracker.step = a_grouse / ct

        error[i] = np.hstack((calc_subspace_proj_error(pars_true['C'], tracker.U), principal_angle(pars_true['C'], tracker.U)))
    t = time.time() - t
    pars_est_g = {'C' : tracker.U.copy()}

    print('final proj. error (est.): ', str(error[-1][0]))
    print(principal_angle(pars_est_g['C'], pars_true['C']))

    plt.subplot(1,2,1)
    plt.plot(error[:,1:])
    plt.title('subspace proj. error (GROUSE)')
    plt.subplot(1,2,2)
    plt.loglog(error[:,1:])
    plt.title('subspace proj. error (GROUSE)')
    plt.show()

    traces_g = [error.copy()]
    ts_g = [t]            

    print('filtering data') 
    obs_scheme.gen_mask_from_scheme()
    tracker = Grouse(p, n, 0. )
    tracker.U = pars_est_g['C'].copy()
    x_g = np.zeros((T,n))
    for t in range (T):
        x_g[t,:] = tracker._project(y[t,:].reshape(p,1), obs_scheme.mask[t,:].reshape(p,1)).reshape(-1)
    obs_scheme.mask = None    
    obs_scheme.use_mask = False

    lag_range_g = np.arange(20)
    kl_ = np.max(lag_range_g) + 1
    print('extracting dynamics parameters') 
    pars_est_g['X'] = np.vstack([np.cov(x_g[m:-(kl_+1)+m, :].T, x_g[:-(kl_+1), :].T)[:n,n:] for m in lag_range_g])
    pars_est_g['A'] = np.linalg.lstsq(pars_est_g['X'][:(len(lag_range_g)-1)*n,:], pars_est_g['X'][n:len(lag_range_g)*n,:])[0]
    pars_est_g['Pi'] = (pars_est_g['X'][:n,:] + pars_est_g['X'][:n,:].T)/2 
    ev_est = np.linalg.eigvals(pars_est_g['A'])
    del x_g                


In [None]:
%matplotlib inline
import os
os.chdir("/home/mackelab/Desktop/Projects/Stitching/code/pyRRHDLDS/core")
import ssm_scripts
import ssm_fit

from subtracking import Grouse, calc_subspace_proj_error
from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid.icml_scripts import run_default
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import gen_data, gen_pars, draw_data 

import time
import scipy as sp
from scipy import stats
from scipy import linalg as la
import numpy as np
import matplotlib.pyplot as plt
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import draw_data
from ssm_scripts import setup_fit_lds

def principal_angle(A, B):
    "A and B must be column-orthogonal."    
    A = np.atleast_2d(A).T if (A.ndim<2) else A
    B = np.atleast_2d(B).T if (B.ndim<2) else B
    A = la.orth(A)
    B = la.orth(B)
    svd = la.svd(A.T.dot(B))
    return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


ps = np.array([1e2, 1e3, 1e4, 1e5],dtype=int)
data_path = '/home/mackelab/Desktop/Projects/Stitching/results/icml_e2/'

dtype=np.float32
mmap, verbose = False, True
whiten = False

###################################
# select simulation setup here !  #
i = 2                             #
p,n,T = ps[i],10,30000            #
snr = (1.0, 1.0)                  #
###################################

max_iter_EM = 100

nr = 0 # number of real eigenvalues
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.95, 0.99, 0.95, 0.99
ev_r = np.linspace(eig_m_r, eig_M_r, nr)


data_path = data_path + 'p' + str(p) + '/'
for rnd_seed in range(20,30):

    np.random.seed(rnd_seed)
    nc, nc_u = n - nr, (n - nr)//2
    ev_c = np.exp(2 * 1j * np.pi * np.random.vonmises(mu=0, kappa=1000, size=nc_u))
    ev_c = np.linspace(eig_m_c, eig_M_c, (n - nr)//2) * ev_c
    pars_true = gen_pars(p,n, nr, ev_r, ev_c, snr, whiten, dtype=dtype)
    pars_true['d'], pars_true['mu0'], pars_true['V0'] = np.zeros(p), np.zeros(n), pars_true['Pi'].copy()
    pars_true['C'] = la.orth(pars_true['C']) * np.sqrt(p) / np.sqrt(n)
    pars_true['R'] = np.asarray(np.random.uniform(size=p, low=snr[0], high=snr[1]), dtype=dtype)
    x,y = draw_data(pars_true,T, dtype=dtype)
    y -= y.mean(axis=0)

    idx_a = np.sort(np.random.choice(p, 1000, replace=False)) if p > 1000 else np.arange(p)
    idx_b = idx_a.copy()

    sso = True
    obs_scheme = ObservationScheme(p=p, T=T, 
                                    sub_pops= (np.arange(0,int(0.55*p)),np.arange(int(0.45*p),p)), 
                                    obs_pops=(0,1), 
                                    obs_time=(T//2,T))
    obs_scheme.comp_subpop_stats()    

    plt.subplot(1,2,1)
    plt.imshow(pars_true['A'], interpolation='None')
    plt.colorbar()
    plt.subplot(1,2,2)
    plt.imshow(np.corrcoef(x.T), interpolation='None')
    del x
    plt.colorbar()
    plt.show()
    
    # start fitting    
    rnd_seed_fit = np.random.get_state()    
    pars_init={
        'C' : np.asarray(la.orth(np.random.normal(size=(p,n))),dtype=dtype) * np.sqrt(p) / np.sqrt(n),
    }

    # settings for GROUSE
    a_grouse = 0.05
    tracker = Grouse(p, n, a_grouse )
    tracker.U = pars_init['C'].copy()
    max_epoch_size = 1000
    max_iter_grouse = 100
    get_obs = obs_scheme.gen_get_observed()

    # fit GROUSE
    print('\n - GROUSE')
    tracker.step = a_grouse
    ct = 1.
    error = np.zeros((max_iter_grouse, n+1))
    t = time.time()

    for i in range(max_iter_grouse):
        if np.mod(i,max_iter_grouse//10) == 0:
            print('finished % ' + str((100*i)//max_iter_grouse))
        idx = np.random.permutation(T)
        idx = idx[:max_epoch_size] if len(idx) > max_epoch_size else idx
        for j in range(len(idx)):
            obs_idx =  np.zeros((p,1), dtype=bool)
            obs_idx[get_obs(idx[j])] = True
            tracker.consume(y[idx[j],:].reshape(-1,1), obs_idx)
            ct += 1     
            tracker.step = a_grouse / ct

        error[i] = np.hstack((calc_subspace_proj_error(pars_true['C'], tracker.U), principal_angle(pars_true['C'], tracker.U)))
    t = time.time() - t
    pars_est_g = {'C' : tracker.U.copy()}

    print('final proj. error (est.): ', str(error[-1][0]))
    print(principal_angle(pars_est_g['C'], pars_true['C']))

    plt.subplot(1,2,1)
    plt.plot(error[:,1:])
    plt.title('subspace proj. error (GROUSE)')
    plt.subplot(1,2,2)
    plt.loglog(error[:,1:])
    plt.title('subspace proj. error (GROUSE)')
    plt.show()

    traces_g = [error.copy()]
    ts_g = [t]            

    print('filtering data') 
    obs_scheme.gen_mask_from_scheme()
    tracker = Grouse(p, n, 0. )
    tracker.U = pars_est_g['C'].copy()
    x_g = np.zeros((T,n))
    for t in range (T):
        x_g[t,:] = tracker._project(y[t,:].reshape(p,1), obs_scheme.mask[t,:].reshape(p,1)).reshape(-1)
    obs_scheme.mask = None    
    obs_scheme.use_mask = False

    lag_range_g = np.arange(20)
    kl_ = np.max(lag_range_g) + 1
    print('extracting dynamics parameters') 
    pars_est_g['X'] = np.vstack([np.cov(x_g[m:-(kl_+1)+m, :].T, x_g[:-(kl_+1), :].T)[:n,n:] for m in lag_range_g])
    pars_est_g['A'] = np.linalg.lstsq(pars_est_g['X'][:(len(lag_range_g)-1)*n,:], pars_est_g['X'][n:len(lag_range_g)*n,:])[0]
    pars_est_g['Pi'] = (pars_est_g['X'][:n,:] + pars_est_g['X'][:n,:].T)/2 
    ev_est = np.linalg.eigvals(pars_est_g['A'])
    del x_g                


In [None]:
obs_scheme.gen_mask_from_scheme()

plt.imshow(obs_scheme.mask[::100, :].T, aspect='auto')
plt.show()

In [None]:
% matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import linalg as la
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import gen_data, gen_pars, draw_data 
from ssidid.icml_scripts import run_default


i = 1
run = '_e2_init'

rnd_seeds = range(20,23)
ps = np.array([1e2, 1e3, 1e4, 1e5],dtype=int)
ns = [ 10,  10,  10,  10]
Ts = int(1.5e4) * np.ones(len(ps),dtype=int)
lag_range = np.arange(20)

dtype=np.float32
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.95, 0.99, 0.95, 0.99
ev_r = np.linspace(eig_m_r, eig_M_r, nr)
p,n,T,snr = ps[i], ns[i], Ts[i], (1., 1.)

data_path = '/home/mackelab/Desktop/Projects/Stitching/results/'


for rnd_seed in rnd_seeds:

    np.random.seed(rnd_seed)
    nr = 0 # number of real eigenvalues
    whiten = True
    eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.95, 0.99, 0.99, 0.999
    nc, nc_u = n - nr, (n - nr)//2
    ev_r = np.linspace(eig_m_r, eig_M_r, nr)
    ev_c = np.exp(2 * 1j * np.pi * np.random.vonmises(mu=0, kappa=1000, size=nc_u))
    ev_c = np.linspace(eig_m_c, eig_M_c, (n - nr)//2) * ev_c
    mmap, verbose = False, True
    pars_true = gen_pars(p,n, nr, ev_r, ev_c, snr, whiten, dtype=dtype)
    pars_true['d'], pars_true['mu0'], pars_true['V0'] = np.zeros(p), np.zeros(n), pars_true['Pi'].copy()
    pars_true['C'] = la.orth(pars_true['C']) * np.sqrt(p) / np.sqrt(n)
    pars_true['R'] = np.asarray(np.random.uniform(size=p, low=snr[0], high=snr[1]), dtype=dtype)
    x,y = draw_data(pars_true,T, dtype=dtype)
    y -= y.mean(axis=0)


    idx_a = np.sort(np.random.choice(p, 1000, replace=False)) if p > 1000 else np.arange(p)
    idx_b = idx_a.copy()

    sso = True
    obs_scheme = ObservationScheme(p=p, T=T, 
                                    sub_pops=(np.arange(p),), 
                                    obs_pops=(0,), 
                                    obs_time=(T,))
    obs_scheme.comp_subpop_stats()    

    W = obs_scheme.comp_coocurrence_weights(lag_range, sso=sso, idx_a=idx_a, idx_b=idx_b)
    print('computing time-lagged covariances')
    Qs, Om = f_l2_Hankel_comp_Q_Om(n=n,y=y,lag_range=lag_range,obs_scheme=obs_scheme,
                          idx_a=idx_a,idx_b=idx_b,W=W,sso=sso,
                          mmap=mmap,data_path=None,ts=None,ms=None)

    def principal_angle(A, B):
        "A and B must be column-orthogonal."    
        A = np.atleast_2d(A).T if (A.ndim<2) else A
        B = np.atleast_2d(B).T if (B.ndim<2) else B
        A = la.orth(A)
        B = la.orth(B)
        svd = la.svd(A.T.dot(B))
        return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


    pars_true['X'] = np.vstack([ np.linalg.matrix_power(pars_true['A'],m).dot(pars_true['Pi']) for m in lag_range])
    print('true param. loss: ', f_l2_Hankel_nl(C=pars_true['C'],
                                   X=pars_true['X'],
                                   R=pars_true['R'],
                                   Qs=Qs,
                                   Om=Om,
                                   lag_range=lag_range,
                                   ms=range(len(lag_range)),
                                   idx_a=idx_a,
                                   idx_b=idx_b))
    print_slim(Qs,Om,lag_range,pars_true,idx_a,idx_b,None,False,None)

    pars_est_g = 'default'
    
    rnd_seed_fit = np.random.get_state()
    #np.random.seed(rnd_seed)
    pars_est={
        'C' : np.random.normal(size=(p,n)) / np.sqrt(n),
        'A' : np.diag(np.linspace(0.89, 0.91, n)),
        'B' : np.eye(n),
        'Pi': np.eye(n),
        'R' : np.zeros(p)
    }
    pars_est['X'] = np.vstack([ np.linalg.matrix_power(pars_est['A'],m).dot(pars_est['Pi']) for m in lag_range])
    C, Pi = pars_est['C'], pars_est['Pi']
    C /= np.atleast_2d(np.sqrt(np.sum(C*C.dot(Pi), axis=1))).T
    pars_est_g = 'default'
    
    rnd_seed_fit = np.random.get_state()
    #np.random.seed(rnd_seed)
    pars_est, traces, ts= run_default(
                alphas    = (0.1, 0.001), 
                b1s       = (0.9, 0.95), 
                a_decays  = (0.95, 0.99), 
                batch_sizes = (1, 1), 
                max_zip_sizes =  (150,150), 
                max_iters = (100, 80 ),
                parametrizations = ('nl', 'nl'),
                pars_est=pars_est, pars_true=pars_true, n=n, 
                y=y, sso=sso, obs_scheme=obs_scheme, lag_range=lag_range, 
                idx_a=idx_a, idx_b=idx_b,Qs=Qs,Om=Om, W=W,
                traces=[[], [], []], ts = [], dtype=np.float32)     

    
    save_dict = {'p' : p,
                 'n' : n,
                 'T' : T,
                 'snr' : snr,
                 'obs_scheme' : obs_scheme,
                 'lag_range' : lag_range,
                 'mmap' : mmap,
                 'pars_true' : pars_true,
                 'pars_est' : pars_est,
                 'y' : y,
                 'x' : x,
                 'idx_a' : idx_a,
                 'idx_b' : idx_b,
                 'W' : W,
                 'Qs' : Qs,
                 'Om' : Om,
                 'rnd_seed' : rnd_seed
                }
    file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T) + '_seed' + str(rnd_seed) + 'e2_init'
    np.savez(data_path + file_name, save_dict)
        
  

In [None]:
from ssidid.utility import gen_data, gen_pars, draw_data 

"""
p, n, T = 10000, 10, 1000
snr = (10, 10)

nr = np.mod(n, 2) # number of real eigenvalues
whiten = False

eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.9, 0.95, 0.9, 0.95

nc, nc_u = n - nr, (n - nr)//2
ev_r = np.linspace(eig_m_r, eig_M_r, nr)
ev_c = np.exp(2 * 1j * np.pi * np.random.vonmises(mu=0, kappa=1000, size=nc_u))
ev_c = np.linspace(eig_m_c, eig_M_c, (n - nr)//2) * ev_c

pars_true = gen_pars(p=p,n=n, nr=nr, ev_r=ev_r, ev_c=ev_c, snr=snr, whiten=whiten)


pars_true['d'], pars_true['mu0'], pars_true['V0'] = np.zeros(p), np.zeros(n), pars_true['Pi'].copy()
#pars_true['C'] = la.orth(pars_true['C']) * np.sqrt(p) / np.sqrt(n)
pars_true['C'] = pars_true['C'] / np.linalg.norm(pars_true['C'],axis=0) * np.sqrt(p) / np.sqrt(n)
pars_true['R'] = np.asarray(np.random.uniform(size=p, low=snr[0], high=snr[1]), dtype=dtype)

x,y = draw_data(pars_true,T, dtype=dtype)
y -= y.mean(axis=0)

obs_scheme = ObservationScheme(p=p, T=T)
"""

print(np.sort(np.abs(np.linalg.eigvals(pars_true['A']))), np.sort(np.angle(np.linalg.eigvals(pars_true['A']))))

pars_init = {'A'  : np.diag(np.linspace(0.89, 0.91, n)),
     'Q' : np.eye(n,dtype=dtype),
     'd' : np.zeros(p, dtype=dtype),
     'mu0' : np.zeros(n),
     'V0' : np.zeros((n,n)),
     'C'  : np.random.normal(size=(p,n))/np.sqrt(n),
     'R'  : np.ones(p,dtype=dtype)}

rnd_seed_fit = np.random.get_state()
model = init_LDS_model(pars_init, y, obs_scheme) # reset to initialisation                    

max_iter = 10
likes = np.zeros(max_iter)
res = np.zeros((max_iter, n))

t = time.time()
for i_ in progprint_xrange(max_iter):

    #model.EM_step()
    state = model.states_list[0]
    E_xtp1_xtT = state._E_step_diag()
    state._set_expected_stats(
    state.smoothed_mus,state.smoothed_sigmas,E_xtp1_xtT)
    model.M_step()    
    #model.states_list[0]._normalizer=None
    likes[i_] = state._normalizer
    _,pars_hat = collect_LDS_stats(model)
    pars_hat['Pi'] = np.atleast_2d(sp.linalg.solve_discrete_lyapunov(pars_hat['A'], pars_hat['Q']))
    #Qest = pars_hat['C'][idx_a,:].dot(pars_hat['Pi']).dot(pars_hat['C'][idx_a,:].T) + pars_hat['R'][np.ix_(idx_a,idx_a)]

    res[i_, :] = principal_angle(pars_hat['C'], pars_true['C'])
    #print(np.corrcoef(Qs[0].reshape(-1), Qest.reshape(-1))[0,1])

t = time.time() - t
print('fitting time: ', t)

save_dict = {'p' : p,'n' : n,'T' : T,
             'obs_scheme' : obs_scheme, 'y' : None,
             'pars_true' : pars_true, 'pars_est_EM' : pars_hat, 
             'traces' : [likes], 'ts_EM': [t], 
             'rnd_seed' : rnd_seed_fit
            }    

file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T) + '_seed' + str(rnd_seed) + 'e2_EM'
np.savez(data_path + file_name, save_dict, allow_pickle=False)    

plt.semilogy(res[:,:])
plt.xlabel('iteration')
plt.ylabel('principal angle')
plt.show()

In [None]:
pars_init = {'A'  : np.diag(np.linspace(0.89, 0.91, n)),
     'Q' : np.eye(n,dtype=dtype),
     'd' : np.zeros(p, dtype=dtype),
     'mu0' : np.zeros(n),
     'V0' : np.zeros((n,n)),
     'C'  : np.random.normal(size=(p,n))/np.sqrt(n),
     'R'  : np.ones(p,dtype=dtype)}

rnd_seed_fit = np.random.get_state()
model = init_LDS_model(pars_init, y, obs_scheme) # reset to initialisation                    

max_iter = 10
likes = np.zeros(max_iter)
res = np.zeros((max_iter, n))

t = time.time()
for i_ in progprint_xrange(max_iter):

    model.EM_step()
    likes[i_] = model.log_likelihood()
    _,pars_hat = collect_LDS_stats(model)
    pars_hat['Pi'] = np.atleast_2d(sp.linalg.solve_discrete_lyapunov(pars_hat['A'], pars_hat['Q']))
    #Qest = pars_hat['C'][idx_a,:].dot(pars_hat['Pi']).dot(pars_hat['C'][idx_a,:].T) + pars_hat['R'][np.ix_(idx_a,idx_a)]

    res[i_, :] = principal_angle(pars_hat['C'], pars_true['C'])
    print(np.corrcoef(Qs[0].reshape(-1), Qest.reshape(-1))[0,1])

t = time.time() - t
print('fitting time: ', t)

save_dict = {'p' : p,'n' : n,'T' : T,
             'obs_scheme' : obs_scheme, 'y' : None,
             'pars_true' : pars_true, 'pars_est_EM' : pars_hat, 
             'traces' : [likes], 'ts_EM': [t], 
             'rnd_seed' : rnd_seed_fit
            }    

file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T) + '_seed' + str(rnd_seed) + 'e2_EM'
np.savez(data_path + file_name, save_dict, allow_pickle=False)    

plt.semilogy(res[:,:])
plt.xlabel('iteration')
plt.ylabel('principal angle')
plt.show()

In [None]:
% matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import linalg as la
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import ObservationScheme, progprint_xrange
from subtracking import Grouse, calc_subspace_proj_error
from ssidid.utility import gen_data, gen_pars, draw_data 
from ssidid.icml_scripts import run_default


i = 1
run = '_e2_init'

rnd_seeds = range(20,21)
ps = np.array([1e2, 1e3, 1e4, 1e5],dtype=int)
ns = [ 10,  10,  10,  10]
Ts = int(1e4) * np.ones(len(ps),dtype=int)
lag_range = np.arange(20)

dtype=np.float32

p,n,T,snr = ps[i], ns[i], Ts[i], (1., 1.)

data_path = '/home/mackelab/Desktop/Projects/Stitching/results/'


for rnd_seed in rnd_seeds:

    np.random.seed(rnd_seed)
    nr = 0 # number of real eigenvalues
    whiten = True
    eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.95, 0.99, 0.95, 0.99
    nc, nc_u = n - nr, (n - nr)//2
    ev_r = np.linspace(eig_m_r, eig_M_r, nr)
    ev_c = np.exp(2 * 1j * np.pi * np.random.vonmises(mu=0, kappa=1000, size=nc_u))
    ev_c = np.linspace(eig_m_c, eig_M_c, (n - nr)//2) * ev_c
    mmap, verbose = False, True
    pars_true = gen_pars(p,n, nr, ev_r, ev_c, snr, whiten, dtype=dtype)
    pars_true['d'], pars_true['mu0'], pars_true['V0'] = np.zeros(p), np.zeros(n), pars_true['Pi'].copy()
    pars_true['C'] = la.orth(pars_true['C']) * np.sqrt(p) / np.sqrt(n)
    pars_true['R'] = np.asarray(np.random.uniform(size=p, low=snr[0], high=snr[1]), dtype=dtype)
    x,y = draw_data(pars_true,T, dtype=dtype)
    y -= y.mean(axis=0)


    idx_a = np.sort(np.random.choice(p, 1000, replace=False)) if p > 1000 else np.arange(p)
    idx_b = idx_a.copy()

    sso = True
    obs_scheme = ObservationScheme(p=p, T=T, 
                                    sub_pops=(np.arange(p),), 
                                    obs_pops=(0,), 
                                    obs_time=(T,))
    obs_scheme.comp_subpop_stats()    

    W = obs_scheme.comp_coocurrence_weights(lag_range, sso=sso, idx_a=idx_a, idx_b=idx_b)
    print('computing time-lagged covariances')
    Qs, Om = f_l2_Hankel_comp_Q_Om(n=n,y=y,lag_range=lag_range,obs_scheme=obs_scheme,
                          idx_a=idx_a,idx_b=idx_b,W=W,sso=sso,
                          mmap=mmap,data_path=None,ts=None,ms=None)

    def principal_angle(A, B):
        "A and B must be column-orthogonal."    
        A = np.atleast_2d(A).T if (A.ndim<2) else A
        B = np.atleast_2d(B).T if (B.ndim<2) else B
        A = la.orth(A)
        B = la.orth(B)
        svd = la.svd(A.T.dot(B))
        return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


    pars_true['X'] = np.vstack([ np.linalg.matrix_power(pars_true['A'],m).dot(pars_true['Pi']) for m in lag_range])
    print('true param. loss: ', f_l2_Hankel_nl(C=pars_true['C'],
                                   X=pars_true['X'],
                                   R=pars_true['R'],
                                   Qs=Qs,
                                   Om=Om,
                                   lag_range=lag_range,
                                   ms=range(len(lag_range)),
                                   idx_a=idx_a,
                                   idx_b=idx_b))
    print_slim(Qs,Om,lag_range,pars_true,idx_a,idx_b,None,False,None)

    pars_est_g = 'default'
    
    rnd_seed_fit = np.random.get_state()
    #np.random.seed(rnd_seed)
    pars_est={
        'C' : np.random.normal(size=(p,n)) / np.sqrt(n),
        'A' : np.diag(np.linspace(0.89, 0.91, n)),
        'B' : np.eye(n),
        'Pi': np.eye(n),
        'R' : np.zeros(p)
    }
    pars_est['X'] = np.vstack([ np.linalg.matrix_power(pars_est['A'],m).dot(pars_est['Pi']) for m in lag_range])
    C, Pi = pars_est['C'], pars_est['Pi']
    C /= np.atleast_2d(np.sqrt(np.sum(C*C.dot(Pi), axis=1))).T
    pars_est_g = 'default'
    
    rnd_seed_fit = np.random.get_state()
    #np.random.seed(rnd_seed)
    pars_est, traces, ts= run_default(
                alphas    = (0.1, 0.005), 
                b1s       = (0.9, 0.9), 
                a_decays  = (0.95, 0.95), 
                batch_sizes = (1, 1), 
                max_zip_sizes =  (100,100), 
                max_iters = (100, 100 ),
                parametrizations = ('nl', 'ln'),
                pars_est=pars_est, pars_true=pars_true, n=n, 
                y=y, sso=sso, obs_scheme=obs_scheme, lag_range=lag_range, 
                idx_a=idx_a, idx_b=idx_b,Qs=Qs,Om=Om, W=W,
                traces=[[], [], []], ts = [], dtype=np.float32)     

    
    save_dict = {'p' : p,
                 'n' : n,
                 'T' : T,
                 'snr' : snr,
                 'obs_scheme' : obs_scheme,
                 'lag_range' : lag_range,
                 'mmap' : mmap,
                 'pars_true' : pars_true,
                 'pars_est' : pars_est,
                 'y' : y,
                 'x' : x,
                 'idx_a' : idx_a,
                 'idx_b' : idx_b,
                 'W' : W,
                 'Qs' : Qs,
                 'Om' : Om,
                 'rnd_seed' : rnd_seed
                }
    file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T) + '_seed' + str(rnd_seed) + 'e2_init'
    np.savez(data_path + file_name, save_dict)
        
  

In [None]:
np.var(y,axis=0), pars_true['R']

In [None]:
p = 100
y = y[:, :p]
pars_true['C'] = pars_true['C'][:p,:]
pars_true['R'] = pars_true['R'][:p]

idx_a = np.sort(np.random.choice(p, 1000, replace=False)) if p > 1000 else np.arange(p)
idx_b = idx_a.copy()

sso = True
obs_scheme = ObservationScheme(p=p, T=T, 
                                sub_pops=(np.arange(p),), 
                                obs_pops=(0,), 
                                obs_time=(T,))
obs_scheme.comp_subpop_stats()    

W = obs_scheme.comp_coocurrence_weights(lag_range, sso=sso, idx_a=idx_a, idx_b=idx_b)
print('computing time-lagged covariances')
Qs, Om = f_l2_Hankel_comp_Q_Om(n=n,y=y,lag_range=lag_range,obs_scheme=obs_scheme,
                      idx_a=idx_a,idx_b=idx_b,W=W,sso=sso,
                      mmap=mmap,data_path=None,ts=None,ms=None)


rnd_seed_fit = np.random.get_state()
#np.random.seed(rnd_seed)
pars_est={
    'C' : np.random.normal(size=(p,n)) / np.sqrt(n),
    'A' : np.diag(np.linspace(0.89, 0.91, n)),
    'B' : np.eye(n),
    'Pi': np.eye(n),
    'R' : np.zeros(p)
}
pars_est['X'] = np.vstack([ np.linalg.matrix_power(pars_est['A'],m).dot(pars_est['Pi']) for m in lag_range])
C, Pi = pars_est['C'], pars_est['Pi']
C /= np.atleast_2d(np.sqrt(np.sum(C*C.dot(Pi), axis=1))).T
pars_est_g = 'default'

rnd_seed_fit = np.random.get_state()
#np.random.seed(rnd_seed)
pars_est, traces, ts= run_default(
            alphas    = (0.1, 0.005), 
            b1s       = (0.9, 0.9), 
            a_decays  = (0.95, 0.95), 
            batch_sizes = (1, 1), 
            max_zip_sizes =  (100,100), 
            max_iters = (100, 100 ),
            parametrizations = ('nl', 'ln'),
            pars_est=pars_est, pars_true=pars_true, n=n, 
            y=y, sso=sso, obs_scheme=obs_scheme, lag_range=lag_range, 
            idx_a=idx_a, idx_b=idx_b,Qs=Qs,Om=Om, W=W,
            traces=[[], [], []], ts = [], dtype=np.float32)     

In [None]:
from subtracking import Grouse, calc_subspace_proj_error

rnd_seed_fit = np.random.get_state()    
# settings for GROUSE
a_grouse = 0.25
tracker = Grouse(p, n, a_grouse )
max_epoch_size = 100
max_iter_grouse = 500
get_obs = obs_scheme.gen_get_observed()

# fit GROUSE
print('\n - GROUSE')
tracker.step = a_grouse
ct = 1.
error = np.zeros((max_iter_grouse, n+1))
t = time.time()
get_obs = obs_scheme.gen_get_observed()

for i in range(max_iter_grouse):
    if np.mod(i,max_iter_grouse//10) == 0:
        print('finished % ' + str((100*i)//max_iter_grouse))
    idx = np.random.permutation(T-np.max(lag_range)-1)
    idx = idx[:max_epoch_size] if len(idx) > max_epoch_size else idx
    for j in range(len(idx)):
        obs_idx =  np.zeros((p,1), dtype=bool)
        obs_idx[get_obs(idx[j])] = True
        tracker.consume(y[idx[j],:].reshape(-1,1), obs_idx)
        ct += 1     
        tracker.step = a_grouse / ct

    error[i] = np.hstack((calc_subspace_proj_error(pars_true['C'], tracker.U), principal_angle(pars_true['C'], tracker.U)))
t = time.time() - t
pars_est_g = {'C' : tracker.U.copy()}

print('final proj. error (est.): ', str(error[-1][0]))
print(principal_angle(pars_est_g['C'], pars_true['C']))

plt.subplot(1,2,1)
plt.plot(error[:,1:])
plt.title('subspace proj. error (GROUSE)')
plt.subplot(1,2,2)
plt.loglog(error[:,1:])
plt.title('subspace proj. error (GROUSE)')
plt.show()

traces_g = [error.copy()]
ts_g = [t]            

print('filtering data') 
obs_scheme.gen_mask_from_scheme()
tracker = Grouse(p, n, 0. )
tracker.U = pars_est_g['C'].copy()
x_g = np.zeros((T,n))
for t in range (T):
    x_g[t,:] = tracker._project(y[t,:].reshape(p,1), obs_scheme.mask[t,:].reshape(p,1)).reshape(-1)
obs_scheme.mask = None    
obs_scheme.use_mask = False

lag_range_g = np.arange(20)
kl_ = np.max(lag_range_g) + 1
print('extracting dynamics parameters') 
pars_est_g['X'] = np.vstack([np.cov(x_g[m:-(kl_+1)+m, :].T, x_g[:-(kl_+1), :].T)[:n,n:] for m in lag_range_g])
pars_est_g['A'] = np.linalg.lstsq(pars_est_g['X'][:(len(lag_range_g)-1)*n,:], pars_est_g['X'][n:len(lag_range_g)*n,:])[0]
pars_est_g['Pi'] = (pars_est_g['X'][:n,:] + pars_est_g['X'][:n,:].T)/2 
ev_est = np.linalg.eigvals(pars_est_g['A'])
del x_g                


In [None]:
import py4sid
kl_ = 3
pars_est_4sid = {}
pars_est_4sid['A'], pars_est_4sid['C'] = py4sid.estimate_parameters_moments(y,kl_,n)

print(principal_angle(pars_est_4sid['C'], pars_true['C']))


In [None]:

from sklearn import decomposition

pca = decomposition.PCA(n_components=n)

t = time.time()
pca.fit(y)

t = time.time() -t 
print('fitting time ', t)

C_pca =pca.components_.T
pars_pca = { 'C' : pca.components_.T.copy(), 
             'X' : np.vstack([np.diag(pca.explained_variance_) for m in range(len(lag_range)) ]),
             'R' : np.zeros(p)}
Qpca = pars_pca['C'].dot(pars_pca['X'][:n,:]).dot(pars_pca['C'].T)
plt.imshow(Qpca, interpolation='None')
plt.show()
print_slim(Qs,Om,lag_range,pars_pca,np.arange(len(idx_a),dtype=int),np.arange(len(idx_a),dtype=int),
           None,False,data_path)
print('inst. corrcoeff', np.corrcoef(Qpca.reshape(-1), Qs[0].reshape(-1))[0,1])
print('prin. angles',  principal_angle(pars_true['C'], pars_pca['C']) )    


In [None]:
np.random.seed(rnd_seed)
pars_true = gen_pars(p,n, nr, ev_r, ev_c, snr, whiten, dtype=dtype)
pars_true['d'], pars_true['mu0'], pars_true['V0'] = np.zeros(p), np.zeros(n), pars_true['Pi'].copy()
pars_true['C'] = la.orth(pars_true['C']) * np.sqrt(p) / np.sqrt(n)
pars_true['R'] = np.asarray(np.random.uniform(size=p, low=snr[0], high=snr[1]), dtype=dtype)
x,y = draw_data(pars_true,T, dtype=dtype)
y -= y.mean(axis=0)

In [None]:
from pykalman import KalmanFilter
from mpl_toolkits.mplot3d import Axes3D

obs_scheme.gen_mask_from_scheme()



smoothed_state_means = [np.zeros(n)]
x_est = np.zeros((0,n))

y_idx = np.arange(1000)
kf = KalmanFilter(transition_matrices = pars_est['A'], 
                  observation_matrices = pars_est['C'][y_idx,:],
                  observation_covariance = pars_est['R'][y_idx], 
                  initial_state_mean=smoothed_state_means[-1]
                 )

x_est = np.zeros((0,n))
t = time.time()
(filtered_state_means, filtered_state_covariances) = kf.filter(y[:,y_idx])
#(smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
x_est = np.vstack((x_est, smoothed_state_means))
print('duration for filter+smooth', time.time() - t)

#measurements = ma.asarray(y)
#measurements[np.invert(obs_scheme.mask)] = ma.masked


In [None]:

x_est = np.zeros((0,n))
t = time.time()
(filtered_state_means, filtered_state_covariances) = kf.filter(measurements[:,:10])
#(smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
x_est = np.vstack((x_est, smoothed_state_means))
print('duration for filter+smooth', time.time() - t)

plt.figure(figsize=(20,16))

clrs = cm.hsv(np.linspace(0, 1, len(n)))

for j in range(n):
    plt.subplot(n,1,j)
    plt.plot(x[:,j], 'k', linewidth=3)
    plt.hold(True)
    #plt.axis([0, T+1, -2.5, 2.5])
               
for j in range(n):
    plt.subplot(n,1,j)
    plt.plot(smoothed_state_means[:,j], color=clrs[0], linewidth=1.5)
    plt.plot([T, T], [-10, 10], 'k-')
               
plt.show()