# Stochastic gradient descent
- three variants
    - size-1 mini-batches (batch_size = 1), zips through all observed entries of $\mbox{cov}(y_{t+m},y_t)$ and computes gradients from one at time
    - column mini-batches (batch_size = p), zips through all columns of $\mbox{cov}(y_{t+m},y_t)$ and computes gradients from observed entries 
    - batch-gradients (batch_size = None), computes a full gradient using all observed entries $\mbox{cov}(y_{t+m},y_t)$ at the same time
    
- mini-batch gradients use Adam for following the gradients with momentum and with re-normalising of gradients along each dimension. Full gradients use plain gradient descent.
- max_iter is defined as the number of 'zips' through the data set. Thus for different batch sizes, max_iter fixes the amount of information visited within the data covariance matrices, *not* the number of gradient steps 

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy.optimize import fmin_bfgs, check_grad
import glob, os

os.chdir('../core')
from utility import get_subpop_stats, draw_sys
from SSID_Hankel_loss import f_l2_Hankel, l2_sis_setup, g_l2_Hankel_sis, plot_outputs_l2_gradient_test
from SSID_Hankel_loss import yy_Hankel_cov_mat, l2_sis_draw, adam_zip, adam_zip_stable
os.chdir('../dev')

p,n = 30,10
k,l = 3,3

nr = 4

batch_size = p # batch_size = 1 (size-1 mini-batches), p (column mini-batches), None (full gradients)

a, a_A, b1, b2, e = 0.0001, 0.000001, 0.9, 0.99, 1e-8
max_iter = 1000

gammas = np.array([0.000000])
tau = 0.5

# create subpopulations
sub_pops = (np.arange(0,p//2+1), np.arange(p//2-1,p))

# draw system matrices    
ev_r = np.linspace(0.7, 0.99, nr)
ev_c = np.exp(2 * 1j * np.pi * np.random.uniform(size= (n - nr)//2))
ev_c = np.linspace(0.8, 0.99, (n - nr)//2) * ev_c

if p < 200:
    print('sub_pops', sub_pops)
obs_idx, idx_grp, co_obs, overlaps, overlap_grp, idx_overlap, Om, Ovw, Ovc = \
    get_subpop_stats(sub_pops, p, verbose=False)
pars_true, Qs, Qs_full = draw_sys(p,n,k,l,Om, nr, ev_r,ev_c)
f_base, _ = l2_sis_setup(k,l,n,Qs,Om,idx_grp,obs_idx)


def s(A):
    s = np.linalg.svd(A)[1]
    return np.isfinite( np.log( 1 - s**2 ).sum() )

#def s(A):
#    return True

err_est  = np.zeros((gammas.size, 4))
eigA_est = np.zeros((gammas.size, n))  
for rep in range(gammas.size):
    
    gamma = gammas[rep]
    
    def f_i(theta):

        A = theta[:n*n].reshape(n,n)        
        f_log_bar = - np.log( np.linalg.det(np.eye(n)-A.dot(A.T)) )

        if gamma == 0 and not np.isfinite(f_log_bar):
            f_log_bar = 0

        return f_base(theta) + gamma * f_log_bar
    
    def g_i(theta, idx_use, idx_co):

        A = theta[:n*n].reshape(n,n)        
        inv = np.linalg.solve(np.eye(n)-A.dot(A.T), np.eye(n))
        g_log_bar = np.zeros(theta.size)                                 
        g_log_bar[:n*n] = 2 * inv.dot(A).reshape(-1,)

        gamma_g_log_bar = gamma * g_log_bar
        if gamma == 0:
            gamma_g_log_bar[np.invert(np.isfinite(gamma_g_log_bar))] = 0
        return g_l2_Hankel_sis(theta,k,l,n,Qs,idx_use,idx_co) + gamma_g_log_bar
    
    A_0  = np.diag(np.random.uniform(low=0.7, high=0.8, size=n))
    B_0  = np.eye(n) #np.random.normal(size=(n,n))
    C_0  = np.random.normal(size=(p,n))
    Pi_0 = B_0.dot(B_0.T)
    
    pars_0 = np.hstack((A_0.reshape(n*n,),
                        B_0.reshape(n*n,),
                        C_0.reshape(p*n,)))


    def converged(theta_old, theta, e, t):
        if t >= max_iter:
            return True
        return False
        #return np.abs(f_i(theta_old) - f_i(theta)) < e
    
    print('starting descent')
    
    pars_est_vec, fs = adam_zip_stable(f_i,g_i,s,tau,pars_0.copy(),a,a_A,b1,b2,e,max_iter,converged,Om,idx_grp,co_obs,batch_size)

    A_est = pars_est_vec[:n*n].reshape(n,n)
    B_est = pars_est_vec[n*n:2*n*n].reshape(n,n)
    Pi_est = B_est.dot(B_est.T)
    C_est = pars_est_vec[-p*n:].reshape(p,n)

    print('gamma =', gamma)

    eigA_est[rep,:] = np.abs(np.sort(np.linalg.eigvals(A_est)))
    print('|eig(A_est)|', eigA_est[rep,:])
    print('|eig(A_true)|', np.abs(np.sort(np.linalg.eigvals(pars_true['A']))))


    err_est[rep,0] = f_l2_Hankel(pars_est_vec,k,l,n,Qs, Om)
    err_est[rep,1] = f_l2_Hankel(pars_est_vec,k,l,n,Qs,Ovw)
    err_est[rep,2] = f_l2_Hankel(pars_est_vec,k,l,n,Qs,Ovc)
    err_est[rep,3] = f_l2_Hankel(pars_est_vec,k,l,n,Qs_full,~Om)

    print('final squared error on observed parts:', 
          err_est[rep,0])
    print('final squared error on overlapping parts:', 
          err_est[rep,1])
    print('final squared error on cross-overlapping parts:',
          err_est[rep,2])
    print('final squared error on stitched parts:',
          err_est[rep,3])

    pars_init = {'A': A_0, 'C': C_0, 'Pi': Pi_0, 'B': B_0}
    pars_est  = {'A': A_est, 'C': C_est, 'Pi': Pi_est, 'B': B_est}
    #plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
    #                                   Qs_full, Om, Ovc, Ovw, f_i, g_i, if_flip = True)

    plt.figure(figsize=(20,8))
    plt.plot(fs[:max_iter])
    plt.show()


# visualise overall results

plt.figure(figsize=(20,8))
plt.hsv()
plt.plot(err_est)
plt.hsv()
plt.xticks( np.arange(gammas.size), gammas)
plt.xlabel('\gamma')
plt.ylabel('MSE')
plt.legend(['obs.', 'overlap', 'cross-overl.', 'stitched'])
plt.title('Squared errors as function of log-barrier height')
plt.show()

plt.figure(figsize=(20,8))
clrs = np.zeros((gammas.size, 3))
clrs[:,2] = np.linspace(0.05, 0.99, gammas.size)
clrs[:,0] = np.linspace(0.05, 0.99, gammas.size)[::-1]
for i in range(gammas.size):    
    plt.plot(eigA_est[i,:], color=clrs[i,:])
    plt.hold(True)
plt.plot(np.sort(np.abs(np.linalg.eigvals(pars_true['A']))), 'k')
plt.plot([0, 1.1*n], [1, 1], 'r--')
plt.hot()
plt.xticks( np.arange(n), np.arange(n)+1)
plt.xlabel('# eigenvalue')
plt.ylabel('EV')
lgnd = [np.ceil(gammas[i]*100)/100 for i in range(gammas.size)]
lgnd.append('true')
lgnd.append('stability')
plt.legend(lgnd)
plt.axis([0, 1.1*n, plt.ylim()[0], plt.ylim()[1]])
plt.title('Eigenvalues as function of log-barrier height')
plt.show()

    

# just one more turn...

In [None]:
gamma /= 10
def f_i(theta):

    A = theta[:n*n].reshape(n,n)        
    f_log_bar = - np.log( np.linalg.det(np.eye(n)-A.dot(A.T)) )

    if gamma == 0 and not np.isfinite(f_log_bar):
        f_log_bar = 0

    return f_base(theta) + gamma * f_log_bar
def g_i(theta, idx_use, idx_co):

    A = theta[:n*n].reshape(n,n)        
    inv = np.linalg.solve(np.eye(n)-A.dot(A.T), np.eye(n))
    g_log_bar = np.zeros(theta.size)                                 
    g_log_bar[:n*n] = 2 * inv.dot(A).reshape(-1,)

    gamma_g_log_bar = gamma * g_log_bar
    if gamma == 0:
        gamma_g_log_bar[np.invert(np.isfinite(gamma_g_log_bar))] = 0
    return g_l2_Hankel_sis(theta,k,l,n,Qs,idx_use,idx_co) + gamma_g_log_bar

max_iter = 1000
def converged(theta_old, theta, e, t):
    if t >= max_iter:
        return True
    return False

pars_est_vec, fs = adam_zip_stable(f_i,g_i,s,tau,pars_est_vec.copy(),a,a_A,b1,b2,e,max_iter,converged,Om,idx_grp,co_obs,batch_size)
plt.figure(figsize=(20,8))
plt.plot(fs[:max_iter])
plt.show()



A_est = pars_est_vec[:n*n].reshape(n,n)
B_est = pars_est_vec[n*n:2*n*n].reshape(n,n)
Pi_est = B_est.dot(B_est.T)
C_est = pars_est_vec[-p*n:].reshape(p,n)

pars_init = {'A': A_0, 'C': C_0, 'Pi': Pi_0, 'B': B_0}
pars_est  = {'A': A_est, 'C': C_est, 'Pi': Pi_est, 'B': B_est}
plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
                                   Qs_full, Om, Ovc, Ovw, f_i, None, if_flip = True)


# alternating blocked descent

- using SGD on $C$
- after each pass over the observed parts of the covariance matrix, use analyic solution for $A$, $\Pi$

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy.optimize import fmin_bfgs, check_grad
import glob, os

os.chdir('../core')
from utility import get_subpop_stats, draw_sys
import SSID_Hankel_loss
from SSID_Hankel_loss import f_l2_Hankel, plot_outputs_l2_gradient_test
from SSID_Hankel_loss import l2_bad_sis_setup
from SSID_Hankel_loss import yy_Hankel_cov_mat, l2_sis_draw, adam_zip_bad_stable, id_A
from SSID_Hankel_loss import ssidSVD
os.chdir('../dev')

p,n = 1000,20
k,l = 20,20
k_init, l_init = 3,3

nr = 20
batch_size = p # batch_size = 1 (size-1 mini-batches), p (column mini-batches), None (full gradients)

a, b1, b2, e = 0.001, 0.9, 0.99, 1e-8
max_iter, max_iter_init = 100, 10

reps = 1
stable = False
linear = False

# create subpopulations
sub_pops = (np.arange(0,p//2+1), np.arange(p//2-1,p))

# draw system matrices    
ev_r = np.linspace(0.7, 0.99, nr)
ev_c = np.exp(2 * 1j * np.pi * np.random.uniform(size= (n - nr)//2))
ev_c = np.linspace(0.8, 0.99, (n - nr)//2) * ev_c


if p < 200:
    print('sub_pops', sub_pops)
obs_idx, idx_grp, co_obs, overlaps, overlap_grp, idx_overlap, Om, Ovw, Ovc = \
    get_subpop_stats(sub_pops=sub_pops, p=p, verbose=False)
pars_true, Qs, Qs_full = draw_sys(p=p,n=n,k=k,l=l,Om=Om, nr=nr, ev_r=ev_r,ev_c=ev_c)
f_i, g_C, g_A = l2_bad_sis_setup(k=k,l=l,n=n,Qs=Qs,Om=Om,idx_grp=idx_grp,obs_idx=obs_idx,linear=linear,stable=stable)

print('getting initial parameter values (SSID on largest subpopulation)')
idx = sub_pops[0]
H_kl = yy_Hankel_cov_mat(pars_true['C'][idx,:],pars_true['A'],pars_true['Pi'],
                         k_init,l_init,Om=None)
pars_ssid = ssidSVD(H_kl, Qs[0][np.ix_(idx,idx)], n, pi_method='proper')
#pars_ssid = pars_true.copy()
U,S,_ = np.linalg.svd(pars_ssid['Pi'])
M = np.diag(1/np.sqrt(S)).dot(U.T)

for rep in range(reps):
        
    pars_init = {'A'  : M.dot(pars_ssid['A']).dot(np.linalg.inv(M)),
                 'Pi' : M.dot(pars_ssid['Pi']).dot(M.T),
                 'B'  : np.eye(n), 
                 'C'  : np.random.normal(size=(p,n))} #pars_ssid['C'].dot(np.linalg.inv(M))}

    """
    def converged(theta_old, theta, e, t):
        return True if t >= max_iter_init else False    
    pars_est_vec, fs = adam_zip_bad_stable(f=f_i,g_C=g_C,g_A=id_A,pars_0=pars_init,
                                           a=a,b1=b1,b2=b2,e=e,max_iter=max_iter_init,converged=converged,
                                           Om=Om,idx_grp=idx_grp,co_obs=co_obs,
                                           batch_size=batch_size,linear=linear)    
    """
    print('starting descent')    
    def converged(theta_old, theta, e, t):
        return True if t >= max_iter else False
    pars_est_vec, fs = adam_zip_bad_stable(f=f_i,g_C=g_C,g_A=g_A,pars_0=pars_init,
                                           a=a,b1=b1,b2=b2,e=e,max_iter=max_iter,converged=converged,
                                           Om=Om,idx_grp=idx_grp,co_obs=co_obs,
                                           batch_size=batch_size,linear=linear)   
    pars_est  = {'A': pars_est_vec[:n*n].reshape(n,n), 
                 'C': pars_est_vec[-p*n:].reshape(p,n), 
                 'B': pars_est_vec[n*n:2*n*n].reshape(n,n)}
    pars_est['Pi'] = pars_est['B'].dot( pars_est['B'].T)

    plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
                                       Qs_full, Om, Ovc, Ovw, f_i, None, 
                                       linear=linear, idx_grp = idx_grp, co_obs = co_obs, 
                                       if_flip = True)

    plt.figure(figsize=(20,8))
    plt.plot(fs[:max_iter])
    plt.show()
    

In [None]:
plt.figure(figsize=(20,8))
plt.plot(fs[:max_iter])
plt.show()

In [None]:
H_true = yy_Hankel_cov_mat(pars_true['C'],pars_true['A'],pars_true['Pi'],k,l,Om=None,linear=True)
X=SSID_Hankel_loss.s_A_l2_Hankel_bad_sis(pars_est['C'],k,l,Qs,idx_grp,co_obs, linear=False)
H_est  = yy_Hankel_cov_mat(pars_est['C'], X,None,k,l,Om=None,linear=False)

plt.figure(figsize=(20,10))
plt.subplot(1,3,1)
plt.imshow(H_true)
plt.subplot(1,3,2)
plt.imshow(H_est)
plt.subplot(1,3,3)
plt.plot(H_true.reshape(-1,), H_est.reshape(-1,), 'k.')
plt.show()

# just one more turn...

In [None]:
max_iter = 100
def converged(theta_old, theta, e, t):
    return True if t >= max_iter else False

pars_est_vec, fs = adam_zip_bad_stable(f=f_i,g_C=g_C,g_A=g_A,pars_0=pars_est_vec.copy(),
                                       a=a,b1=b1,b2=b2,e=e,max_iter=max_iter,converged=converged,
                                       Om=Om,idx_grp=idx_grp,co_obs=co_obs,
                                       batch_size=batch_size,linear=linear) 

plt.figure(figsize=(20,8))
plt.plot(fs[:max_iter])
plt.show()

A_est = pars_est_vec[:n*n].reshape(n,n)
B_est = pars_est_vec[n*n:2*n*n].reshape(n,n)
Pi_est = B_est.dot(B_est.T)
C_est = pars_est_vec[-p*n:].reshape(p,n)
pars_est  = {'A': A_est, 'C': C_est, 'Pi': Pi_est, 'B': B_est}
plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
                                   Qs_full, Om, Ovc, Ovw, f_i, None, 
                                   linear=linear, idx_grp = idx_grp, co_obs = co_obs, 
                                   if_flip = True)

print(np.linalg.eigvals(A_est))


In [None]:
max_iter = 500
def converged(theta_old, theta, e, t):
    return True if t >= max_iter else False

pars_est_vec, fs = adam_zip_bad_stable(f=f_i,g_C=g_C,g_A=g_A,pars_0=pars_est_vec.copy(),
                                       a=a,b1=b1,b2=b2,e=e,max_iter=max_iter,converged=converged,
                                       Om=Om,idx_grp=idx_grp,co_obs=co_obs,
                                       batch_size=batch_size,linear=linear) 

plt.figure(figsize=(20,8))
plt.plot(fs[:max_iter])
plt.show()

A_est = pars_est_vec[:n*n].reshape(n,n)
B_est = pars_est_vec[n*n:2*n*n].reshape(n,n)
Pi_est = B_est.dot(B_est.T)
C_est = pars_est_vec[-p*n:].reshape(p,n)
pars_est  = {'A': A_est, 'C': C_est, 'Pi': Pi_est, 'B': B_est}
plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
                                   Qs_full, Om, Ovc, Ovw, f_i, None, 
                                   linear=linear, idx_grp = idx_grp, co_obs = co_obs, 
                                   if_flip = True)

print(np.linalg.eigvals(A_est))


In [None]:
from scipy.io import savemat # store results for comparison with Matlab code   

os.chdir('../fits/')

save_file = 'usbad_p1000n20r2'

save_file_m = {'A_true':pars_true['A'],
               'B_true':pars_true['B'],
               'Pi_true' : pars_true['Pi'], 
               'C_true' : pars_true['C'],
               'A_0': pars_init['A'],
               'B_0': pars_init['B'],
               'Pi_0': pars_init['Pi'],
               'C_0': pars_init['C'],
               'A_est': pars_est['A'],
               'B_est':  pars_est['B'],
               'Pi_est' :  pars_est['Pi'], 
               'C_est' :  pars_est['C']}

savemat(save_file,save_file_m) # does the actual saving

pars_true_vec = np.hstack((pars_true['A'].reshape(n*n,),
                    pars_true['B'].reshape(n*n,),
                    pars_true['C'].reshape(p*n,)))
pars_init_vec = np.hstack((pars_init['A'].reshape(n*n,),
                    pars_init['B'].reshape(n*n,),
                    pars_init['C'].reshape(p*n,)))

np.savez(save_file, 
         pars_0_vec=pars_init_vec,
         pars_true_vec=pars_true_vec, 
         pars_est_vec=pars_est_vec)  

In [None]:
import numpy as np
import scipy.linalg as la

def sdls(A,B,X0=None,Y0=None,tol=1e-10,verbose=False):
    """
     [X,Y,norm_res,muv,tt,iter,fail] = SDLS(A,B,X0,Y0,tol,verbose)

     Uses a stanadard path-following interior-point method based on the
     AHO search direction to solve the symmetric semidefinite constrained
     least squares problem:

       min  norm(A*X-B,’fro’)
       s.t. X symm. pos. semidef.

     where A and B are real m-by-n matrices, and X is a real n-by-n matrix.
     X0 and Y0 are n-by-n initial strictly feasible matrices, which means
     that X0 and Y0 are symmetric positive definite.
     Set as [] for the default value of eye(n).

     tol is the zero tolerance described below.
     Set as [] for the default value of 1e-10.

     Set verbose = 1 for screen output during algorithm execution,
     otherwise set vebose = 0 for no output.

     SDLS returns approximate optimal solutions to the above primal
     problem and its associated dual problem so that

       norm(res,’fro’)  <=  sqrt(tol)*norm(res0,’fro’)
            trace(X*Y)  <=  tol*trace(X0*Y0)

     where res = (Z+Z’)/2-Y, Z =  A’*(A*X-B), and res0 is res evaluated Appendix B.  Matlab M-files
     at X0, Y0.

     SDLS optionally returns:

      norm_res : norm(res,’fro’) at the final iterate,
      muv  : a vector of the duality gaps for each iteration
      tt   : the total running time of the algorithm
      iter : the number of iterations required
      fail : fail = 1 if the algorithm failed to achieve the desired
             tolerances within the maximum number of iterations allowed;
             otherwise fail = 0
     Nathan Krislock, University of British Columbia, 2003.

     N. Krislock. Numerical solution of semidefinite constrained least
     squares problems.  M.Sc. thesis, University of British Columbia,
     Vancouver, British Columbia, Canada, 2003.
    """

    max_iter = 1000  # max iteration
    m,n = A.shape
    AA, AB, I = A.T.dot(A), A.T.dot(B), np.eye(n)
    X = np.eye(n) if X0 is None else X0.copy()
    Y = np.eye(n) if Y0 is None else X0.copy()

    XAA,XY = X.dot(AA), X.dot(Y);
    Z = XAA.T - AB  
    Z = (Z+Z.T)/2  
    R = Z - Y
    norm_res = np.linalg.norm(R)
    mu = np.trace(XY)/n  
    muv = np.zeros(max_iter)
    muv[0] = mu
    tol1 = np.sqrt(tol)*norm_res 
    tol2 = tol*mu
    r, theta = 0, 0 
    while ( norm_res > tol1 or mu > tol2 ) and r < max_iter : 
        # Compute sigma and tau
        sigma = 1/n**2 if norm_res < tol1 else 1-1/n**2
        tau = sigma*mu;
        # Compute the AHO search direction (dX,dY)
        E = (np.kron(I,Y)+np.kron(Y,I))/2
        XZ = X.dot(Z)
        M  = (np.kron(I,XAA)+np.kron(AA,X)+np.kron(X,AA)+np.kron(XAA,I))/4 + E
        d = (tau*I - (XZ+XZ.T)/2).reshape(-1,order='F')
        # d = F*vec(-R) + vec(tau*I-(X*Y+Y*X)/2);
        P,L,U = la.lu(M)

        #dx = U\(L\(P*d))        
        dx = np.linalg.solve(U, np.linalg.solve(L, P.dot(d)))
        
        dX = mat(dx)
        dX = (dX+dX.T)/2  
        AAdX = AA.dot(dX)
        dY = (AAdX+AAdX.T)/2 + R  
        dY = (dY+dY.T)/2;
        # Compute the step length theta
        c = 0.9 + 0.09*theta
        theta1, theta2 = max_step(X,dX), max_step(Y,dY)
        theta_max = np.min((theta1, theta2))
        theta = np.min((c*theta_max,1))
        # Update
        X += theta*dX
        Y += theta*dY
        XAA, XY = X.dot(AA), X.dot(Y)
        Z = XAA.T - AB  
        Z = (Z+Z.T)/2  
        R = Z - Y
        norm_res = np.linalg.norm(R)
        mu = np.trace(XY)/n  
        muv[r] = mu;
        r +=1
    fail = True if r==max_iter and ( norm_res > tol1 or mu > tol2 ) else False
    if fail:
        print('\n Failed to reach desired tolerance. \n')

    return X,Y,norm_res,muv,r,fail        
        
def mat(v,n=None):
    # V = mat(v,n)
    #
    # Given an m*n column vector v, returns the corresponding
    # m-by-n matrix V.  If n is not given, it is assumed that
    # v is an n^2 column vector.
    if n is None:
        n = int(np.sqrt(v.size))
        m = n  
        mn = m*n
    else:
        mn = v.size;
        m = mn//n;
    V, k = np.zeros((m,n)), 0
    for i in range(0,mn,m):
        V[:,k] = v[i:(i+m)]
        k += 1
    return V
                  
def max_step(X,dX):
    # theta = MAX_STEP(X,dX)
    #
    # Given n-by-n symmetric matrices, X and dX, where X is positive definite,
    # MAX_STEP returns the largest theta_max > 0 such that
    #
    #   X + theta*dX
    #
    # is positive definite for all 0 < theta < theta_max. If X + theta*dX is
    # positive definite for all theta, then theta_max = Inf.

    x = np.max(la.eig(-dX,X,right=False,left=False));
    
    if np.isfinite(x) and not np.allclose(x, np.real(x)):
        print('\n Warning: max_step returns complex argument')
        print('max_step: ', x)
        print('real(max_step): ', np.real(x))
        
    x = np.real(x)
    return 1/x if x > 0 else np.inf


In [None]:
import scipy.stats as stats
n = 100
df = n
A = np.random.normal(size=(n,n))
Pi = stats.wishart.rvs(n, np.eye(n))/n

X = np.zeros((4*n,n))
As = np.zeros((4*n,n))
for i in range(4):
    X[i*n:(i+1)*n,:] = np.linalg.matrix_power(A,i+1).dot(Pi)
    As[i*n:(i+1)*n,:] = np.linalg.matrix_power(A,i+1)
    
X,Y,norm_res,muv,r,fail = sdls(As,X,X0=None,Y0=None,tol=1e-10,verbose=False)


X - Pi

# Check log-barrier values and gradients

In [None]:
import numpy as np
import matplotlib.pypimport numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import fmin_bfgs, check_grad
n = 3
def f_i(theta):

    A = theta[:n*n].reshape(n,n)        
    f_log_bar = - np.log( np.linalg.det(np.eye(n)-A.dot(A.T)) )

    if np.linalg.det(np.eye(n)-A.dot(A.T)) < 0:
        print('negative det!')
        
    return f_log_bar
def g_i(theta):

    A = theta[:n*n].reshape(n,n)        
    inv = np.linalg.solve(np.eye(n)-A.dot(A.T), np.eye(n))
    g_log_bar = 2 * inv.dot(A)
                       
    return g_log_bar.reshape(-1,)
                       
%matplotlib inline
V = np.random.normal(size=(n,n))
V /= np.sqrt(np.sum(V**2,axis=0)).reshape(1,-1)
#theta = np.diag(2 * np.random.uniform(0,1, n) - 1)
theta = 0.99 * np.eye(n)
theta = V.dot(theta).dot(np.linalg.inv(V)).reshape(-1,)

print('A \n', theta.reshape(n,n))
print('eig(A) \n', np.sort(np.abs(np.linalg.eigvals(theta.reshape(n,n))))[::-1])
        
print('difference in gradient to finite-differencing value:', check_grad(f_i, g_i, theta))

max_iter = 10000
EVs = np.zeros(max_iter)
fs  = np.zeros(max_iter)
for i in range(max_iter):
    
    theta -= 0.00001 * g_i(theta)
    
    EVs[i] = np.mean(np.sort(np.abs(np.linalg.eigvals(theta.reshape(n,n))))[::-1])
    fs[i]  = f_i(theta)
    
    if np.mod(i, max_iter//5)==0:
        print('A \n', theta.reshape(n,n))
        print('eig(A) \n', np.sort(np.abs(np.linalg.eigvals(theta.reshape(n,n))))[::-1])
        print('\n f(A)', f_i(theta))
        print('\n')

plt.figure(figsize=(20,7))
plt.subplot(1,2,1)
plt.plot(EVs)
plt.subplot(1,2,2)
plt.plot(fs)
plt.show()                       ot as plt
from scipy.optimize import fmin_bfgs, check_grad
n = 3
def f_i(theta):

    A = theta[:n*n].reshape(n,n)        
    f_log_bar = - np.log( np.linalg.det(np.eye(n)-A.dot(A.T)) )

    if np.linalg.det(np.eye(n)-A.dot(A.T)) < 0:
        print('negative det!')
        
    return f_log_bar
def g_i(theta):

    A = theta[:n*n].reshape(n,n)        
    inv = np.linalg.solve(np.eye(n)-A.dot(A.T), np.eye(n))
    g_log_bar = 2 * inv.dot(A)
                       
    return g_log_bar.reshape(-1,)
                       
%matplotlib inline
V = np.random.normal(size=(n,n))
V /= np.sqrt(np.sum(V**2,axis=0)).reshape(1,-1)
#theta = np.diag(2 * np.random.uniform(0,1, n) - 1)
theta = 0.99 * np.eye(n)
theta = V.dot(theta).dot(np.linalg.inv(V)).reshape(-1,)

print('A \n', theta.reshape(n,n))
print('eig(A) \n', np.sort(np.abs(np.linalg.eigvals(theta.reshape(n,n))))[::-1])
        
print('difference in gradient to finite-differencing value:', check_grad(f_i, g_i, theta))

max_iter = 10000
EVs = np.zeros(max_iter)
fs  = np.zeros(max_iter)
for i in range(max_iter):
    
    theta -= 0.00001 * g_i(theta)
    
    EVs[i] = np.mean(np.sort(np.abs(np.linalg.eigvals(theta.reshape(n,n))))[::-1])
    fs[i]  = f_i(theta)
    
    if np.mod(i, max_iter//5)==0:
        print('A \n', theta.reshape(n,n))
        print('eig(A) \n', np.sort(np.abs(np.linalg.eigvals(theta.reshape(n,n))))[::-1])
        print('\n f(A)', f_i(theta))
        print('\n')

plt.figure(figsize=(20,7))
plt.subplot(1,2,1)
plt.plot(EVs)
plt.subplot(1,2,2)
plt.plot(fs)
plt.show()                       

# extracting $\Pi$ from latent covariances given $A$ 

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy.optimize import fmin_bfgs, check_grad
import glob, os
import cvxopt

os.chdir('../core')
from utility import get_subpop_stats, gen_pars
from SSID_Hankel_loss import yy_Hankel_cov_mat
os.chdir('../dev')

p,n = 1000,2
k,l = 2,2
k_init, l_init = 10,10

nr = 2
batch_size = p # batch_size = 1 (size-1 mini-batches), p (column mini-batches), None (full gradients)

a, b1, b2, e = 0.0001, 0.9, 0.99, 1e-8
max_iter, max_iter_init = 100, 10

reps = 1
if_stable = False

# create subpopulations
sub_pops = (np.arange(0,p//2+1), np.arange(p//2-1,p))

# draw system matrices    
ev_r = np.linspace(0.7, 0.99, nr)
ev_c = np.exp(2 * 1j * np.pi * np.random.uniform(size= (n - nr)//2))
ev_c = np.linspace(0.8, 0.99, (n - nr)//2) * ev_c

pars = gen_pars(p,n, nr , ev_r , ev_c )

obs_idx, idx_grp, co_obs, overlaps, overlap_grp, idx_overlap, Om, Ovw, Ovc = \
    get_subpop_stats(sub_pops, p, verbose=False)

Xl = [pars['Pi'].copy()]
for m in range(1,k+l-1):
    Xl.append(pars['A'].dot(Xl[m-1]))

In [None]:
As = np.vstack([np.linalg.matrix_power(pars['A'],m) for m in range(1,k+l-1)])
Xs = np.vstack(Xl[1:])

vX = Xs.T.reshape(-1,)
P2 = np.kron(np.eye(n), As)

def mat(X):
    return cvxopt.matrix(X, tc='d')

P,q = mat(P2.T.dot(P2)), mat(- vX.T.dot(P2))
G,h = mat(-np.eye(n*n)), mat(np.zeros((n*n,1)))
dims={'l': 0, 'q': [], 's': [n]}

In [None]:
cvxopt.solvers.options['show_progress'] = False
sol = cvxopt.solvers.coneqp(P=P, q=q, G=G, h=h, dims=dims )


In [None]:
np.asarray(sol['x']).reshape(n,n)

In [None]:
P, q, G, h