# Experiment 'e1': recovering LDS parameters from varying amounts of data

- system size fixed, i.e. p = 1k, n = 20, and signal-to-noise ratio such that R gives 90% of total variance. 
- fitting first dynamics-agnostic, then switching to linearized parameterization with in particular $A$ exctracted from agnostic fit
- direct comparison with GROUSE on subspace identification task (principal angles). 

## notes:
- this is a master file. Individual runs for different data-set lengths $T \in [10^3, 10^4, 10^5]$ were run on instances of this file that might have been slightly altered (e.g. reducing max batch size and instead running more epochs for small T's). They should be backed up at the lab dropbox folder. 

# Load stored full data, extract observed data for this experiment

In [None]:
% matplotlib inline
from ssidid.icml_scripts import run_default
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import linalg as la
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import get_subpop_stats, gen_data
from subtracking import Grouse, calc_subspace_proj_error

lag_range = np.arange(30)
kl_ = np.max(lag_range) + 1
sso = True

init = 'e1_init'
p,T_full,n,snr = 1000, 100000 + kl_, 10, (9., 9.)
nr = 0 # number of real eigenvalues
snr = (9., 9.)
whiten = True
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.9, 0.99, 0.9, 0.99
mmap, verbose = True, True
chunksize=np.min((p,100))

Ts = np.array([1000, 3000, 10000, 30000, 100000]) + kl_

rnd_seeds = range(10,20)

def principal_angle(A, B):
    "A and B must be column-orthogonal."    
    A = np.atleast_2d(A).T if (A.ndim<2) else A
    B = np.atleast_2d(B).T if (B.ndim<2) else B
    A = la.orth(A)
    B = la.orth(B)
    svd = la.svd(A.T.dot(B))
    return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


for rnd_seed in rnd_seeds:
    data_path =  '/home/mackelab/Desktop/Projects/Stitching/results/icml_e1/seed_' + str(int(rnd_seed)) + '/'

    np.random.seed(rnd_seed)
    pars_true, x, y, _, _ = gen_data(p,n,lag_range,T_full, nr,
                                     eig_m_r, eig_M_r, 
                                     eig_m_c, eig_M_c,
                                     mmap, chunksize,
                                     data_path,
                                     snr=snr, whiten=whiten)    

    y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='w+', shape=(T_full,p))
    y_full[:] = y[:].copy()
    y_full -= np.mean(y_full, axis=0)
    del y_full
    
    save_dict = {'p' : p,
                 'n' : n,
                 'T' : T_full,
                 'snr' : snr,
                 'obs_scheme' : None,
                 'lag_range' : lag_range,
                 'x' : x,
                 'mmap' : mmap,
                 'y' : data_path if mmap else y,
                 'pars_true' : pars_true,
                 'pars_est' : None,
                 'idx_a' : np.arange(p),
                 'idx_b' : np.arange(p),
                 'W' : None,
                 'Qs' : None,
                 'Om' : None,
                 'rnd_seed' : rnd_seed
                }
    file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) + 'e1_init'
    np.savez(data_path + file_name, save_dict)    
    
    
    for T in Ts:

        print('T = ' , str(T))

        file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) +  init
        load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
        pars_true = load_file['pars_true']
        idx_a, idx_b = load_file['idx_a'].copy(), load_file['idx_b'].copy()

        y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
        y = np.memmap(data_path+'y', dtype=np.float, mode='w+', shape=(T,p))
        y[:] = y_full[:T, :].copy()
        del y_full
        del y
        chunksize = np.minimum(p, 100)
        if mmap: 
            print('ensuring zero-mean data for given observation scheme')
            for i in progprint_xrange(p//chunksize, perline=10):
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, i*chunksize:(i+1)*chunksize] = y[:, i*chunksize:(i+1)*chunksize] - y[:, i*chunksize:(i+1)*chunksize].mean(axis=0)
                del y
            if (p//chunksize)*chunksize < p:
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, (p//chunksize)*chunksize:] = y[:, (p//chunksize)*chunksize:] - y[:, (p//chunksize)*chunksize:].mean(axis=0)
                del y        
            y = np.memmap(data_path+'y', dtype=np.float, mode='r', shape=(T,p))
        else:
            y -= y.mean(axis=0)

        run = '_e1'    

        print('re-computing observation scheme')    
        obs_scheme = ObservationScheme(p=p, T=T, 
                                        sub_pops=(np.arange(p),), 
                                        obs_pops=(0,), 
                                        obs_time=(T,))
        obs_scheme.comp_subpop_stats()    
        sub_pops = obs_scheme.sub_pops

        W = obs_scheme.comp_coocurrence_weights(lag_range, sso=True, idx_a=idx_a, idx_b=idx_b)

        print('re-computing observed covariance matrices')    
        Qs, Om = f_l2_Hankel_comp_Q_Om(n=n,y=y,lag_range=lag_range,obs_scheme=obs_scheme,
                              idx_a=idx_a,idx_b=idx_b,W=W,sso=True,
                              mmap=mmap,data_path=data_path,ts=None,ms=None)

        pars_true['X'] = np.vstack([ np.linalg.matrix_power(pars_true['A'],m).dot(pars_true['Pi']) for m in lag_range])
        print('true param. loss: ', f_l2_Hankel_nl(C=pars_true['C'],
                                       X=pars_true['X'],
                                       R=pars_true['R'],
                                       Qs=Qs,
                                       Om=Om,
                                       lag_range=lag_range,
                                       ms=range(len(lag_range)),
                                       idx_a=idx_a,
                                       idx_b=idx_b))
        print_slim(Qs,Om,lag_range,pars_true,idx_a,idx_b,None,False,data_path)

        rnd_seed = np.random.get_state()
        #np.random.seed(rnd_seed)
        pars_est, traces, ts= run_default(
                    alphas    = (0.01, 0.001), 
                    b1s       = (0.98, 0.95), 
                    a_decays  = (0.98, 0.98), 
                    batch_sizes = (1, 10), 
                    max_zip_sizes =  (1000,250), 
                    max_iters = (100, 200),
                    parametrizations = ('nl', 'ln'),
                    pars_est='default', pars_true=pars_true, n=n, 
                    y=y, sso=sso, obs_scheme=obs_scheme, lag_range=lag_range, 
                    idx_a=idx_a, idx_b=idx_b,Qs=Qs,Om=Om, W=W,
                    traces=[[], [], []], ts = [])    


        pars_est_g = 'default'
        # settings for GROUSE
        a_grouse = 1
        tracker = Grouse(p, n, a_grouse )
        max_epoch_size = 1000
        max_iter_grouse = 1000
        get_obs = obs_scheme.gen_get_observed()

        # fit GROUSE
        print('\n - GROUSE')
        tracker.step = a_grouse
        ct = 1.
        error = np.zeros((max_iter_grouse, n+1))
        t = time.time()
        get_obs = obs_scheme.gen_get_observed()

        for i in range(max_iter_grouse):
            if np.mod(i,max_iter_grouse//10) == 0:
                print('finished % ' + str((100*i)//max_iter_grouse))
            idx = np.random.permutation(T-np.max(lag_range)-1)
            idx = idx[:max_epoch_size] if len(idx) > max_epoch_size else idx
            for j in range(len(idx)):
                obs_idx =  np.zeros((p,1), dtype=bool)
                obs_idx[get_obs(idx[j])] = True
                tracker.consume(y[idx[j],:].reshape(-1,1), obs_idx)
                ct += 1     
                tracker.step = a_grouse / ct

            error[i] = np.hstack((calc_subspace_proj_error(pars_true['C'], tracker.U), principal_angle(pars_true['C'], tracker.U)))
        t = time.time() - t
        pars_est_g = {'C' : tracker.U.copy()}

        print('final proj. error (est.): ', str(error[-1][0]))

        plt.subplot(1,2,1)
        plt.plot(error[:,1:])
        plt.title('subspace proj. error (GROUSE)')
        plt.subplot(1,2,2)
        plt.loglog(error[:,1:])
        plt.title('subspace proj. error (GROUSE)')
        plt.show()


        print('final principal angles')
        C = pars_est_g['C'].copy()
        print(principal_angle(pars_true['C'], C))

        del C    
        traces_g = [error.copy()]
        ts_g = [t]            

        # extracting dynamics for GROUSE
        print('filtering data') 
        obs_scheme.gen_mask_from_scheme()
        tracker = Grouse(p, n, 0. )
        tracker.U = pars_est_g['C'].copy()
        x_g = np.zeros((T,n))
        for t in range (T):
            x_g[t,:] = tracker._project(y[t,:].reshape(p,1), obs_scheme.mask[t,:].reshape(p,1)).reshape(-1)
        obs_scheme.mask = None    
        print('extracting dynamics parameters') 
        pars_est_g['X'] = np.vstack([np.cov(x_g[m:-(kl_+1)+m, :].T, x_g[:-(kl_+1), :].T)[:n,n:] for m in lag_range])
        pars_est_g['A'] = np.linalg.lstsq(pars_est_g['X'][:(len(lag_range)-1)*n,:], pars_est_g['X'][n:len(lag_range)*n,:])[0]
        pars_est_g['Pi'] = (pars_est_g['X'][:n,:] + pars_est_g['X'][:n,:].T)/2 
        ev_est = np.linalg.eigvals(pars_est_g['A'])
        del x_g

        plt.plot(np.real(np.linalg.eigvals( pars_est['A'])), 'go-')
        plt.plot(np.real(np.linalg.eigvals(pars_est_g['A'])), 'bo-')
        plt.plot(np.real(np.linalg.eigvals(pars_true['A'])), 'k')
        plt.show()
        plt.plot(np.imag(np.linalg.eigvals( pars_est['A'])), 'go-')
        plt.plot(np.imag(np.linalg.eigvals(pars_est_g['A'])), 'bo-')
        plt.plot(np.imag(np.linalg.eigvals(pars_true['A'])), 'k')
        plt.show()


        save_dict = {'p' : p,'n' : n,'T' : T,'snr' : snr,'lag_range' : lag_range,
                     'obs_scheme' : obs_scheme, 'mmap' : mmap,'y' : data_path if mmap else y,
                     'pars_true' : pars_true, 'pars_est' : pars_est, 'pars_est_g' : pars_est_g,
                     'idx_a' : idx_a,'idx_b' : idx_b, 'W' : W,'Qs' : None,'Om' : None,
                     'traces' : traces, 'traces_g' : traces_g, 'ts':ts, 'ts_g':ts_g,
                     'rnd_seed' : rnd_seed
                    }
        file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
        #file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+str(run)+'_'+str(overlap)
        np.savez(data_path + file_name, save_dict)    



# Adding ssidid fits

In [None]:
% matplotlib inline
from ssidid.icml_scripts import run_default
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import linalg as la
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import get_subpop_stats, gen_data
from subtracking import Grouse, calc_subspace_proj_error

lag_range = np.arange(10)
kl_ = np.max(lag_range) + 1
sso = True

run = 'e1'
p,T_full,n,snr = 1000, 100030, 10, (9., 9.)
nr = 0 # number of real eigenvalues
snr = (9., 9.)
whiten = True
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.9, 0.99, 0.9, 0.99
mmap, verbose = True, True
chunksize=np.min((p,100))

rnd_seeds = range(10,20)
Ts = np.array([5000, 50000]) + kl_


def principal_angle(A, B):
    "A and B must be column-orthogonal."    
    A = np.atleast_2d(A).T if (A.ndim<2) else A
    B = np.atleast_2d(B).T if (B.ndim<2) else B
    A = la.orth(A)
    B = la.orth(B)
    svd = la.svd(A.T.dot(B))
    return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


for rnd_seed in rnd_seeds:
    
    print('seed : ' + str(rnd_seed))
    
    data_path =  '/home/mackelab/Desktop/Projects/Stitching/results/icml_e1/seed_' + str(int(rnd_seed)) + '/'

    y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
    
    file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) + 'e1_init'
    load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
    pars_true = load_file['pars_true'].copy()
    
    for T in Ts:

        print('T = ' , str(T))

        file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) +  init
        load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
        pars_true = load_file['pars_true']
        idx_a, idx_b = load_file['idx_a'].copy(), load_file['idx_b'].copy()

        y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
        y = np.memmap(data_path+'y', dtype=np.float, mode='w+', shape=(T,p))
        y[:] = y_full[:T, :].copy()
        del y_full
        del y
        chunksize = np.minimum(p, 100)
        if mmap: 
            print('ensuring zero-mean data for given observation scheme')
            for i in progprint_xrange(p//chunksize, perline=10):
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, i*chunksize:(i+1)*chunksize] = y[:, i*chunksize:(i+1)*chunksize] - y[:, i*chunksize:(i+1)*chunksize].mean(axis=0)
                del y
            if (p//chunksize)*chunksize < p:
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, (p//chunksize)*chunksize:] = y[:, (p//chunksize)*chunksize:] - y[:, (p//chunksize)*chunksize:].mean(axis=0)
                del y        
            y = np.memmap(data_path+'y', dtype=np.float, mode='r', shape=(T,p))
        else:
            y -= y.mean(axis=0)

        run = '_e1'    
        file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
        load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()

        print('re-computing observation scheme')    
        obs_scheme = ObservationScheme(p=p, T=T, 
                                        sub_pops=(np.arange(p),), 
                                        obs_pops=(0,), 
                                        obs_time=(T,))
        obs_scheme.comp_subpop_stats()    
        sub_pops = obs_scheme.sub_pops

        W = obs_scheme.comp_coocurrence_weights(lag_range, sso=True, idx_a=idx_a, idx_b=idx_b)

        print('re-computing observed covariance matrices')    
        Qs, Om = f_l2_Hankel_comp_Q_Om(n=n,y=y,lag_range=lag_range,obs_scheme=obs_scheme,
                              idx_a=idx_a,idx_b=idx_b,W=W,sso=True,
                              mmap=mmap,data_path=data_path,ts=None,ms=None)

        pars_true['X'] = np.vstack([ np.linalg.matrix_power(pars_true['A'],m).dot(pars_true['Pi']) for m in lag_range])
        print('true param. loss: ', f_l2_Hankel_nl(C=pars_true['C'],
                                       X=pars_true['X'],
                                       R=pars_true['R'],
                                       Qs=Qs,
                                       Om=Om,
                                       lag_range=lag_range,
                                       ms=range(len(lag_range)),
                                       idx_a=idx_a,
                                       idx_b=idx_b))
        print_slim(Qs,Om,lag_range,pars_true,idx_a,idx_b,None,False,data_path)

        rnd_seed = np.random.get_state()
        #np.random.seed(rnd_seed)
        pars_est, traces, ts= run_default(
                    alphas    = (0.01, 0.001), 
                    b1s       = (0.98, 0.95), 
                    a_decays  = (0.98, 0.98), 
                    batch_sizes = (1, 10), 
                    max_zip_sizes =  (1000,250), 
                    max_iters = (100, 200),
                    parametrizations = ('nl', 'ln'),
                    pars_est='default', pars_true=pars_true, n=n, 
                    y=y, sso=sso, obs_scheme=obs_scheme, lag_range=lag_range, 
                    idx_a=idx_a, idx_b=idx_b,Qs=Qs,Om=Om, W=W,
                    traces=[[], [], []], ts = [])    


        plt.plot(np.real(np.linalg.eigvals( pars_est['A'])), 'go-')
        plt.plot(np.real(np.linalg.eigvals(pars_true['A'])), 'k')
        plt.show()
        plt.plot(np.imag(np.linalg.eigvals( pars_est['A'])), 'go-')
        plt.plot(np.imag(np.linalg.eigvals(pars_true['A'])), 'k')
        plt.show()

        save_dict = {'p' : p,'n' : n,'T' : T,'snr' : snr,'lag_range' : load_file['lag_range'],
                     'obs_scheme' : load_file['obs_scheme'], 'mmap' : mmap,'y' : data_path if mmap else y,
                     'pars_true' : load_file['pars_true'], 'pars_est' : pars_est, 
                     'idx_a' : idx_a,'idx_b' : idx_b, 'W' : None,'Qs' : None,'Om' : None,
                     'traces' : traces, 'ts': ts, 
                     'rnd_seed' : rnd_seed, 
                     'pars_est_g' : load_file['pars_est_g'], 'traces_g' :  load_file['traces_g'], 'ts_g': load_file['ts_g']
                    }
        file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
        #file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+str(run)+'_'+str(overlap)
        np.savez(data_path + file_name, save_dict)    



# Adding GROUSE fits

In [None]:
% matplotlib inline
from ssidid.icml_scripts import run_default
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import linalg as la
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import get_subpop_stats, gen_data
from subtracking import Grouse, calc_subspace_proj_error

lag_range = np.arange(10)
kl_ = np.max(lag_range) + 1
sso = True

init = 'e1_init'
p,T_full,n,snr = 1000, 100030, 10, (9., 9.)
nr = 0 # number of real eigenvalues
snr = (9., 9.)
whiten = True
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.9, 0.99, 0.9, 0.99
mmap, verbose = True, True
chunksize=np.min((p,100))

rnd_seeds = range(10,20)
Ts = np.array([5000, 50000]) + kl_


def principal_angle(A, B):
    "A and B must be column-orthogonal."    
    A = np.atleast_2d(A).T if (A.ndim<2) else A
    B = np.atleast_2d(B).T if (B.ndim<2) else B
    A = la.orth(A)
    B = la.orth(B)
    svd = la.svd(A.T.dot(B))
    return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


for rnd_seed in rnd_seeds:
    
    print('seed : ' + str(rnd_seed))
    
    data_path =  '/home/mackelab/Desktop/Projects/Stitching/results/icml_e1/seed_' + str(int(rnd_seed)) + '/'

    y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
    
    file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) + 'e1_init'
    load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
    pars_true = load_file['pars_true'].copy()
    
    for T in Ts:

        print('T = ' , str(T))

        file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) +  init
        load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
        pars_true = load_file['pars_true']
        idx_a, idx_b = load_file['idx_a'].copy(), load_file['idx_b'].copy()

        y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
        y = np.memmap(data_path+'y', dtype=np.float, mode='w+', shape=(T,p))
        y[:] = y_full[:T, :].copy()
        del y_full
        del y
        chunksize = np.minimum(p, 100)
        if mmap: 
            print('ensuring zero-mean data for given observation scheme')
            for i in progprint_xrange(p//chunksize, perline=10):
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, i*chunksize:(i+1)*chunksize] = y[:, i*chunksize:(i+1)*chunksize] - y[:, i*chunksize:(i+1)*chunksize].mean(axis=0)
                del y
            if (p//chunksize)*chunksize < p:
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, (p//chunksize)*chunksize:] = y[:, (p//chunksize)*chunksize:] - y[:, (p//chunksize)*chunksize:].mean(axis=0)
                del y        
            y = np.memmap(data_path+'y', dtype=np.float, mode='r', shape=(T,p))
        else:
            y -= y.mean(axis=0)

        file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
        load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
        pars_est_g = load_file['pars_est_g']

        print('re-computing observation scheme')    
        obs_scheme = ObservationScheme(p=p, T=T, 
                                        sub_pops=(np.arange(p),), 
                                        obs_pops=(0,), 
                                        obs_time=(T,))
        obs_scheme.comp_subpop_stats()    
        sub_pops = obs_scheme.sub_pops


        print('filtering data') 
        obs_scheme.gen_mask_from_scheme()
        tracker = Grouse(p, n, 0. )
        tracker.U = pars_est_g['C'].copy()
        x_g = np.zeros((T,n))
        for t in range (T):
            x_g[t,:] = tracker._project(y[t,:].reshape(p,1), obs_scheme.mask[t,:].reshape(p,1)).reshape(-1)
        obs_scheme.mask = None


        lag_range_g = np.arange(20)
        kl_ = np.max(lag_range_g) + 1
        print('extracting dynamics parameters') 
        pars_est_g['X'] = np.vstack([np.cov(x_g[m:-(kl_+1)+m, :].T, x_g[:-(kl_+1), :].T)[:n,n:] for m in lag_range_g])
        pars_est_g['A'] = np.linalg.lstsq(pars_est_g['X'][:(len(lag_range_g)-1)*n,:], pars_est_g['X'][n:len(lag_range_g)*n,:])[0]
        pars_est_g['Pi'] = (pars_est_g['X'][:n,:] + pars_est_g['X'][:n,:].T)/2 
        ev_est = np.linalg.eigvals(pars_est_g['A'])
        del x_g

        print('storing')
        save_dict = {'p' : p,'n' : n,'T' : T,'snr' : snr,'lag_range' : load_file['lag_range'],
                     'obs_scheme' : load_file['obs_scheme'], 'mmap' : mmap,'y' : data_path if mmap else y,
                     'pars_true' : load_file['pars_true'], 'pars_est' : load_file['pars_est'], 
                     'idx_a' : load_file['idx_a'],'idx_b' : load_file['idx_b'], 'W' : None,'Qs' : None,'Om' : None,
                     'traces' : load_file['traces'], 'ts': load_file['ts'], 
                     'rnd_seed' : rnd_seed, 
                     'pars_est_g' : load_file['pars_est_g'], 'traces_g' :  load_file['traces_g'], 'ts_g': load_file['ts_g']
                    }
        np.savez(data_path + file_name, save_dict)        


# Adding dynamics estimate for GROUSE

In [None]:
% matplotlib inline
from ssidid.icml_scripts import run_default
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import linalg as la
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import get_subpop_stats, gen_data
from subtracking import Grouse, calc_subspace_proj_error

lag_range = np.arange(10)
kl_ = np.max(lag_range) + 1
sso = True

run = '_e1'
p,T_full,n,snr = 1000, 100030, 10, (9., 9.)
nr = 0 # number of real eigenvalues
snr = (9., 9.)
whiten = True
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.9, 0.99, 0.9, 0.99
mmap, verbose = True, True
chunksize=np.min((p,100))

rnd_seeds = range(10,20)
Ts = np.array([1000, 3000, 10000, 30000, 100000]) + kl_
Ts_g = Ts + 30 - kl_


def principal_angle(A, B):
    "A and B must be column-orthogonal."    
    A = np.atleast_2d(A).T if (A.ndim<2) else A
    B = np.atleast_2d(B).T if (B.ndim<2) else B
    A = la.orth(A)
    B = la.orth(B)
    svd = la.svd(A.T.dot(B))
    return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)


for rnd_seed in rnd_seeds:

    t = time.time()

    print('seed : ' + str(rnd_seed))
    
    data_path =  '/home/mackelab/Desktop/Projects/Stitching/results/icml_e1/seed_' + str(int(rnd_seed)) + '/'

    y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
    
    file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) + 'e1_init'
    load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
    pars_true = load_file['pars_true'].copy()
    idx_a, idx_b = load_file['idx_a'].copy(), load_file['idx_b'].copy()
    
    for idxT in range(len(Ts)):
        
        T = Ts[idxT]
        print('T = ' , str(T))

        y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
        y = np.memmap(data_path+'y', dtype=np.float, mode='w+', shape=(T,p))
        y[:] = y_full[:T, :].copy()
        del y_full
        del y
        chunksize = np.minimum(p, 100)
        if mmap: 
            print('ensuring zero-mean data for given observation scheme')
            for i in progprint_xrange(p//chunksize, perline=10):
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, i*chunksize:(i+1)*chunksize] = y[:, i*chunksize:(i+1)*chunksize] - y[:, i*chunksize:(i+1)*chunksize].mean(axis=0)
                del y
            if (p//chunksize)*chunksize < p:
                y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
                y[:, (p//chunksize)*chunksize:] = y[:, (p//chunksize)*chunksize:] - y[:, (p//chunksize)*chunksize:].mean(axis=0)
                del y        
            y = np.memmap(data_path+'y', dtype=np.float, mode='r', shape=(T,p))
        else:
            y -= y.mean(axis=0)


        if T in [5010, 50010]:
            file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
            load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
            pars_est_g = load_file['pars_est_g']
        else: 
            file_name = 'p'+str(p)+'n'+str(n)+'T'+str(Ts_g[idxT])+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
            load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
            pars_est_g = load_file['pars_est_g']
        
        
        print('re-computing observation scheme')    
        obs_scheme = ObservationScheme(p=p, T=T, 
                                        sub_pops=(np.arange(p),), 
                                        obs_pops=(0,), 
                                        obs_time=(T,))
        obs_scheme.comp_subpop_stats()    
        sub_pops = obs_scheme.sub_pops


        print('filtering data') 
        obs_scheme.gen_mask_from_scheme()
        tracker = Grouse(p, n, 0. )
        tracker.U = pars_est_g['C'].copy()
        x_g = np.zeros((T,n))
        for t in range (T):
            x_g[t,:] = tracker._project(y[t,:].reshape(p,1), obs_scheme.mask[t,:].reshape(p,1)).reshape(-1)
        obs_scheme.mask = None


        lag_range_g = np.arange(20)
        kl_ = np.max(lag_range_g) + 1
        print('extracting dynamics parameters') 
        pars_est_g['X'] = np.vstack([np.cov(x_g[m:-(kl_+1)+m, :].T, x_g[:-(kl_+1), :].T)[:n,n:] for m in lag_range_g])
        pars_est_g['A'] = np.linalg.lstsq(pars_est_g['X'][:(len(lag_range_g)-1)*n,:], pars_est_g['X'][n:len(lag_range_g)*n,:])[0]
        pars_est_g['Pi'] = (pars_est_g['X'][:n,:] + pars_est_g['X'][:n,:].T)/2 
        ev_est = np.linalg.eigvals(pars_est_g['A'])
        del x_g

        print('storing')
        save_dict = {'p' : p,'n' : n,'T' : T,'snr' : load_file['snr'],'lag_range' : load_file['lag_range'],
                     'obs_scheme' : load_file['obs_scheme'], 'mmap' : mmap,'y' : data_path if mmap else y,
                     'pars_true' : load_file['pars_true'], 'pars_est' : load_file['pars_est'], 
                     'idx_a' : load_file['idx_a'],'idx_b' : load_file['idx_b'], 'W' : None,'Qs' : None,'Om' : None,
                     'traces' : load_file['traces'], 'ts': load_file['ts'], 
                     'rnd_seed' : load_file['rnd_seed'], 
                     'pars_est_g' : pars_est_g, 'traces_g' :  load_file['traces_g'], 'ts_g': load_file['ts_g']
                    }
        np.savez(data_path + file_name, save_dict)        

        
    t = time.time() - t
        

# safefile consolidation

In [None]:
% matplotlib inline
from ssidid.icml_scripts import run_default
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy import linalg as la
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import ObservationScheme, progprint_xrange
from ssidid.utility import get_subpop_stats, gen_data
from subtracking import Grouse, calc_subspace_proj_error

lag_range = np.arange(10)
kl_ = np.max(lag_range) + 1
sso = True

run = '_e1'

p,T_full,n,snr = 1000, 100030, 10, (9., 9.)
nr = 0 # number of real eigenvalues
snr = (9., 9.)
mmap, verbose = True, True

Ts = np.array([1000, 3000, 10000, 30000, 100000]) + kl_
Ts_g = Ts + 30 - kl_
rnd_seeds = range(10, 20)

def principal_angle(A, B):
    "A and B must be column-orthogonal."    
    A = np.atleast_2d(A).T if (A.ndim<2) else A
    B = np.atleast_2d(B).T if (B.ndim<2) else B
    A = la.orth(A)
    B = la.orth(B)
    svd = la.svd(A.T.dot(B))
    return np.arccos(np.minimum(svd[1], 1.0)) / (np.pi/2)

for rnd_seed in rnd_seeds:
    
    print('seed : ' + str(rnd_seed))
    
    data_path =  '/home/mackelab/Desktop/Projects/Stitching/results/icml_e1/seed_' + str(int(rnd_seed)) + '/'

    y_full = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T_full,p))
    
    file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T_full) + 'snr' + str(np.int(np.mean(snr)//1)) + 'e1_init'
    load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()
    pars_true = load_file['pars_true'].copy()
    idx_a, idx_b = load_file['idx_a'].copy(), load_file['idx_b'].copy()
    
    for idxT in range(len(Ts)):

        T = Ts[idxT]
        print('T = ' , str(T))

        file_name_g = 'p'+str(p)+'n'+str(n)+'T'+str(Ts_g[idxT])+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
        load_file_g = np.load(data_path + file_name_g + '.npz')['arr_0'].tolist()
        pars_est_g = {'C' :  load_file_g['pars_est_g']['C'].copy(),
                      'A' :  load_file_g['pars_est_g']['A'].copy(),
                      'X' :  load_file_g['pars_est_g']['X'].copy(),
                      'Pi' :  load_file_g['pars_est_g']['Pi'].copy()}
        traces_g = load_file_g['traces_g']
        ts_g = load_file_g['ts_g']
        
        del load_file
        
        file_name = 'p'+str(p)+'n'+str(n)+'T'+str(T)+'snr'+str(np.int(np.mean(snr)//1))+'_run'+str(run)                
        load_file = np.load(data_path + file_name + '.npz')['arr_0'].tolist()

        print('storing')
        save_dict = {'p' : p,'n' : n,'T' : T,'snr' : snr,'lag_range' : load_file['lag_range'],
                     'obs_scheme' : load_file['obs_scheme'], 'mmap' : mmap,'y' : data_path if mmap else y,
                     'pars_true' : load_file['pars_true'], 'pars_est' : load_file['pars_est'], 
                     'idx_a' : load_file['idx_a'],'idx_b' : load_file['idx_b'], 'W' : None,'Qs' : None,'Om' : None,
                     'traces' : load_file['traces'], 'ts': load_file['ts'], 
                     'rnd_seed' : load_file['rnd_seed'], 
                     'pars_est_g' : pars_est_g, 'traces_g' : traces_g, 'ts_g': ts_g
                    }
        
        #print('file_name_g', file_name_g)
        #print(pars_est_g.keys())
        #print(traces_g)
        #print(ts_g)
        #print('file_name', file_name)
        #print(load_file['pars_est_g'])
        np.savez(data_path + file_name, save_dict)        
        

# Data generation (to be run once!)

In [None]:
% matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import glob, os, psutil, time

from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid.utility import get_subpop_stats, gen_data
from ssidid import ObservationScheme
from subtracking import Grouse, calc_subspace_proj_error
from ssidid import progprint_xrange
from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om

# define problem size
lag_range = np.arange(0,30)
kl_ = np.max(lag_range)+1
T_full = 100000 + kl_
p, n, T = 1000, 10, T_full

nr = 0 # number of real eigenvalues
snr = (9., 9.)
whiten = True
eig_m_r, eig_M_r, eig_m_c, eig_M_c = 0.9, 0.99, 0.9, 0.99

# I/O matter
mmap, chunksize = True, np.min((p,100))
verbose=True

# create subpopulations
sso = True
sub_pops = (np.arange(p),)

obs_pops = np.concatenate([ np.arange(len(sub_pops)) for r in range(reps) ])
obs_time = np.linspace(0,T, len(obs_pops)+1)[1:].astype(int)
obs_scheme = ObservationScheme(p=p, T=T, 
                                sub_pops=sub_pops, 
                                obs_pops=obs_pops, 
                                obs_time=obs_time)
obs_scheme.comp_subpop_stats()
    
np.random.seed(rnd_seed)
pars_true, x, y, _, _ = gen_data(p,n,lag_range,T, nr,
                                 eig_m_r, eig_M_r, 
                                 eig_m_c, eig_M_c,
                                 mmap, chunksize,
                                 data_path,
                                 snr=snr, whiten=whiten)    

save_dict = {'p' : p,
             'n' : n,
             'T' : T,
             'snr' : snr,
             'obs_scheme' : obs_scheme,
             'lag_range' : lag_range,
             'x' : x,
             'mmap' : mmap,
             'y' : data_path if mmap else y,
             'pars_true' : pars_true,
             'pars_est' : pars_est,
             'idx_a' : idx_a,
             'idx_b' : idx_b,
             'W' : W,
             'Qs' : None,
             'Om' : None,
             'rnd_seed' : rnd_seed
            }
file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T) + 'snr' + str(np.int(np.mean(snr)//1)) + 'e1_init'
np.savez(data_path + file_name, save_dict)    


In [None]:
from ssidid.SSID_Hankel_loss import run_bad, plot_slim, print_slim, f_l2_Hankel_nl, f_l2_Hankel_comp_Q_Om
from ssidid import progprint_xrange

data_path =  '/home/mackelab/Desktop/Projects/Stitching/results/icml_e1/'


if mmap: 
    print('ensuring zero-mean data for given observation scheme')
    for i in progprint_xrange(p//chunksize, perline=10):
        y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
        y[:, i*chunksize:(i+1)*chunksize] = y[:, i*chunksize:(i+1)*chunksize] - y[:, i*chunksize:(i+1)*chunksize].mean(axis=0)
        del y
    if (p//chunksize)*chunksize < p:
        y = np.memmap(data_path+'y', dtype=np.float, mode='r+', shape=(T,p))
        y[:, (p//chunksize)*chunksize:] = y[:, (p//chunksize)*chunksize:] - y[:, (p//chunksize)*chunksize:].mean(axis=0)
        del y        
    y = np.memmap(data_path+'y', dtype=np.float, mode='r', shape=(T,p))
else:
    y -= y.mean(axis=0)

idx_a = np.sort(np.random.choice(p, 1000, replace=False)) if p > 1000 else np.arange(p)
idx_b = idx_a.copy()

W = obs_scheme.comp_coocurrence_weights(lag_range, sso=True, idx_a=idx_a, idx_b=idx_b)
print('computing time-lagged covariances')
Qs, Om = f_l2_Hankel_comp_Q_Om(n=n,y=y,lag_range=lag_range,obs_scheme=obs_scheme,
                      idx_a=idx_a,idx_b=idx_b,W=W,sso=sso,
                      mmap=mmap,data_path=data_path,ts=None,ms=None)

#pars_true['X'] = np.vstack([ np.linalg.matrix_power(pars_true['A'],m).dot(pars_true['Pi']) for m in lag_range])
pars_true['X'] = np.vstack([ np.cov(x[m:T-kl_+m].T, x[:T-kl_].T)[:n,n:] for m in lag_range])
print('true param. loss: ', f_l2_Hankel_nl(C=pars_true['C'],
                               X=pars_true['X'],
                               R=pars_true['R'],
                               Qs=Qs,
                               Om=Om,
                               lag_range=lag_range,
                               ms=range(len(lag_range)),
                               idx_a=idx_a,
                               idx_b=idx_b))
print_slim(Qs,Om,lag_range,pars_true,idx_a,idx_b,_,False,data_path)


In [None]:
save_dict = {'p' : p,
             'n' : n,
             'T' : T,
             'snr' : snr,
             'obs_scheme' : obs_scheme,
             'lag_range' : lag_range,
             'x' : x,
             'mmap' : mmap,
             'y' : data_path if mmap else y,
             'pars_true' : pars_true,
             'pars_est' : pars_est,
             'idx_a' : idx_a,
             'idx_b' : idx_b,
             'W' : W,
             'Qs' : None,
             'Om' : None,
             'rnd_seed' : rnd_seed
            }
file_name = 'p' + str(p) + 'n' + str(n) + 'T' + str(T) + 'snr' + str(np.int(np.mean(snr)//1)) + 'e1_init'
np.savez(data_path + file_name, save_dict)

In [None]:
y

In [None]:
y_full2 = np.memmap(data_path+'y_full', dtype=np.float, mode='r', shape=(T,p))
y_full2.mean(axis=0)