# Stochastic gradient descent
- three variants
    - size-1 mini-batches (batch_size = 1), zips through all observed entries of $\mbox{cov}(y_{t+m},y_t)$ and computes gradients from one at time
    - column mini-batches (batch_size = p), zips through all columns of $\mbox{cov}(y_{t+m},y_t)$ and computes gradients from observed entries 
    - batch-gradients (batch_size = None), computes a full gradient using all observed entries $\mbox{cov}(y_{t+m},y_t)$ at the same time
    
- mini-batch gradients use Adam for following the gradients with momentum and with re-normalising of gradients along each dimension. Full gradients use plain gradient descent.
- max_iter is defined as the number of 'zips' through the data set. Thus for different batch sizes, max_iter fixes the amount of information visited within the data covariance matrices, *not* the number of gradient steps 

In [None]:
#%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy.optimize import fmin_bfgs, check_grad
import glob, os

os.chdir('../core')
from utility import get_subpop_stats, draw_sys
from SSID_Hankel_loss import f_l2_Hankel, l2_sis_setup, g_l2_Hankel_sis, plot_outputs_l2_gradient_test
from SSID_Hankel_loss import yy_Hankel_cov_mat, l2_sis_draw, adam_zip, adam_zip_stable
import SDLS
os.chdir('../dev')

p,n = 10,3
k,l = 3,3

nr = 3

batch_size = p # batch_size = 1 (size-1 mini-batches), p (column mini-batches), None (full gradients)

a, a_A, b1, b2, e = 0.0001, 0.0000001, 0.9, 0.99, 1e-8
max_iter = 1000

gammas = np.array([10e-8])
tau = 0.5

# create subpopulations
sub_pops = (np.arange(0,p//2+1), np.arange(p//2-1,p))

# draw system matrices    
ev_r = np.linspace(0.8, 0.999, nr)
ev_c = np.exp(2 * 1j * np.pi * np.random.uniform(size= (n - nr)//2))
ev_c = np.linspace(0.8, 0.999, (n - nr)//2) * ev_c

if p < 200:
    print('sub_pops', sub_pops)
obs_idx, idx_grp, co_obs, overlaps, overlap_grp, idx_overlap, Om, Ovw, Ovc = \
    get_subpop_stats(sub_pops, p, verbose=False)
pars_true, Qs, Qs_full = draw_sys(p,n,k,l,Om, nr, ev_r,ev_c)
f_base, _ = l2_sis_setup(k,l,n,Qs,Om,idx_grp,obs_idx)



def s(A):
    s = np.linalg.svd(A)[1]
    return np.isfinite( np.log( 1 - s**2 ).sum() )

err_est  = np.zeros((gammas.size, 4))
eigA_est = np.zeros((gammas.size, n))  
for rep in range(gammas.size):
    
    gamma = gammas[rep]

    def f_i(theta):

        A = theta[:n*n].reshape(n,n)        
        f_log_bar = - np.log( np.linalg.det(np.eye(n)-A.dot(A.T)) )

        if gamma == 0 and not np.isfinite(f_log_bar):
            f_log_bar = 0

        return f_base(theta) + gamma * f_log_bar
    
    def g_i(theta, idx_use, idx_co):

        A = theta[:n*n].reshape(n,n)        
        inv = np.linalg.solve(np.eye(n)-A.dot(A.T), np.eye(n))
        g_log_bar = np.zeros(theta.size)                                 
        g_log_bar[:n*n] = 2 * inv.dot(A).reshape(-1,)

        gamma_g_log_bar = gamma * g_log_bar
        if gamma == 0:
            gamma_g_log_bar[np.invert(np.isfinite(gamma_g_log_bar))] = 0
        return g_l2_Hankel_sis(theta,k,l,n,Qs,idx_use,idx_co) + gamma_g_log_bar

    A_0  = np.diag(np.random.uniform(low=0.7, high=0.8, size=n))
    B_0  = np.eye(n) #np.random.normal(size=(n,n))
    C_0  = np.random.normal(size=(p,n))
    Pi_0 = B_0.dot(B_0.T)
    
    pars_0 = np.hstack((A_0.reshape(n*n,),
                        B_0.reshape(n*n,),
                        C_0.reshape(p*n,)))

    def converged(theta_old, theta, e, t):
        if t >= max_iter:
            return True
        return False
        #return np.abs(f_i(theta_old) - f_i(theta)) < e
    
    print('starting descent')
    
    pars_est_vec, traces = adam_zip_stable(f_i,g_i,s,tau,pars_0.copy(),a,a_A,b1,b2,e,max_iter,converged,Om,idx_grp,co_obs,batch_size)
    fs, sig = traces
    
    A_est = pars_est_vec[:n*n].reshape(n,n)
    B_est = pars_est_vec[n*n:2*n*n].reshape(n,n)
    Pi_est = B_est.dot(B_est.T)
    C_est = pars_est_vec[-p*n:].reshape(p,n)

    print('gamma =', gamma)

    eigA_est[rep,:] = np.abs(np.sort(np.linalg.eigvals(A_est)))
    print('|eig(A_est)|', eigA_est[rep,:])
    print('|eig(A_true)|', np.abs(np.sort(np.linalg.eigvals(pars_true['A']))))


    err_est[rep,0] = f_l2_Hankel(pars_est_vec,k,l,n,Qs, Om)
    err_est[rep,1] = f_l2_Hankel(pars_est_vec,k,l,n,Qs,Ovw)
    err_est[rep,2] = f_l2_Hankel(pars_est_vec,k,l,n,Qs,Ovc)
    err_est[rep,3] = f_l2_Hankel(pars_est_vec,k,l,n,Qs_full,~Om)

    print('final squared error on observed parts:', 
          err_est[rep,0])
    print('final squared error on overlapping parts:', 
          err_est[rep,1])
    print('final squared error on cross-overlapping parts:',
          err_est[rep,2])
    print('final squared error on stitched parts:',
          err_est[rep,3])

    pars_init = {'A': A_0, 'C': C_0, 'Pi': Pi_0, 'B': B_0}
    pars_est  = {'A': A_est, 'C': C_est, 'Pi': Pi_est, 'B': B_est}
    #plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
    #                                   Qs_full, Om, Ovc, Ovw, f_i, g_i, if_flip = True)

    plt.figure(figsize=(20,8))
    plt.subplot(2,1,1)
    plt.plot(fs[:max_iter])
    plt.ylabel('f')
    plt.title('target function error')
    plt.subplot(2,1,2)
    plt.plot(sig[:max_iter])
    plt.ylabel('max sig(A)')
    plt.title('maximum singular value of A')
    plt.show()


# visualise overall results

plt.figure(figsize=(20,8))
plt.hsv()
plt.plot(err_est)
plt.hsv()
plt.xticks( np.arange(gammas.size), gammas)
plt.xlabel('\gamma')
plt.ylabel('MSE')
plt.legend(['obs.', 'overlap', 'cross-overl.', 'stitched'])
plt.title('Squared errors as function of log-barrier height')
plt.show()

plt.figure(figsize=(20,8))
clrs = np.zeros((gammas.size, 3))
clrs[:,2] = np.linspace(0.05, 0.99, gammas.size)
clrs[:,0] = np.linspace(0.05, 0.99, gammas.size)[::-1]
for i in range(gammas.size):    
    plt.plot(eigA_est[i,:], color=clrs[i,:])
    plt.hold(True)
plt.plot(np.sort(np.abs(np.linalg.eigvals(pars_true['A']))), 'k')
plt.plot([0, 1.1*n], [1, 1], 'r--')
plt.hot()
plt.xticks( np.arange(n), np.arange(n)+1)
plt.xlabel('# eigenvalue')
plt.ylabel('EV')
lgnd = [np.ceil(gammas[i]*100)/100 for i in range(gammas.size)]
lgnd.append('true')
lgnd.append('stability')
plt.legend(lgnd)
plt.axis([0, 1.1*n, plt.ylim()[0], plt.ylim()[1]])
plt.title('Eigenvalues as function of log-barrier height')
plt.show()

    

# just one more turn...

In [None]:
gamma /= 1000
def f_i(theta):

    A = theta[:n*n].reshape(n,n)        
    f_log_bar = - np.log( np.linalg.det(np.eye(n)-A.dot(A.T)) )

    if gamma == 0 and not np.isfinite(f_log_bar):
        f_log_bar = 0

    return f_base(theta) + gamma * f_log_bar
def g_i(theta, idx_use, idx_co):

    A = theta[:n*n].reshape(n,n)        
    inv = np.linalg.solve(np.eye(n)-A.dot(A.T), np.eye(n))
    g_log_bar = np.zeros(theta.size)                                 
    g_log_bar[:n*n] = 2 * inv.dot(A).reshape(-1,)

    gamma_g_log_bar = gamma * g_log_bar
    if gamma == 0:
        gamma_g_log_bar[np.invert(np.isfinite(gamma_g_log_bar))] = 0
    return g_l2_Hankel_sis(theta,k,l,n,Qs,idx_use,idx_co) + gamma_g_log_bar

max_iter = 1000
def converged(theta_old, theta, e, t):
    if t >= max_iter:
        return True
    return False

pars_est_vec, fs = adam_zip_stable(f_i,g_i,s,tau,pars_est_vec.copy(),a,a_A,b1,b2,e,max_iter,converged,Om,idx_grp,co_obs,batch_size)
plt.figure(figsize=(20,8))
plt.plot(fs[:max_iter])
plt.show()



A_est = pars_est_vec[:n*n].reshape(n,n)
B_est = pars_est_vec[n*n:2*n*n].reshape(n,n)
Pi_est = B_est.dot(B_est.T)
C_est = pars_est_vec[-p*n:].reshape(p,n)

pars_init = {'A': A_0, 'C': C_0, 'Pi': Pi_0, 'B': B_0}
pars_est  = {'A': A_est, 'C': C_est, 'Pi': Pi_est, 'B': B_est}
plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
                                   Qs_full, Om, Ovc, Ovw, f_i, None, if_flip = True)

print('gamma', gamma)
print('eigvals[A]', np.linalg.eigvals(pars_est['A']))

# alternating blocked descent

- using SGD on $C$
- after each pass over the observed parts of the covariance matrix, use analyic solution for $A$, $\Pi$

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from scipy.optimize import fmin_bfgs, check_grad
import glob, os

os.chdir('../core')
from utility import get_subpop_stats, draw_sys
import SSID_Hankel_loss
from SSID_Hankel_loss import run_bad, plot_outputs_l2_gradient_test, l2_sis_setup
from SSID_Hankel_loss import yy_Hankel_cov_mat, l2_sis_draw, adam_zip_bad_stable, id_A, id_Pi
os.chdir('../dev')

p,n,nr = 1000, 20, 10
k,l = 5,5
k_init, l_init = 3,3

batch_size = p # batch_size = 1 (size-1 mini-batches), p (column mini-batches), None (full gradients)

a, b1, b2, e = 0.001, 0.9, 0.99, 1e-8
max_iter_nl =  100
max_iter_lin = 50
reps = 1

# create subpopulations
sub_pops = (np.arange(0,p//2+1), np.arange(p//2-1,p))

obs_idx, idx_grp, co_obs, overlaps, overlap_grp, idx_overlap, Om, Ovw, Ovc = \
    get_subpop_stats(sub_pops=sub_pops, p=p, verbose=False)

# draw system matrices    
ev_r = np.linspace(0.7, 0.99, nr)
ev_c = np.exp(2 * 1j * np.pi * np.random.uniform(size= (n - nr)//2))
ev_c = np.linspace(0.8, 0.99, (n - nr)//2) * ev_c

pars_true, Qs, Qs_full = draw_sys(p=p,n=n,k=k,l=l,Om=Om, nr=nr, ev_r=ev_r,ev_c=ev_c)


for rep in range(reps):        
    
    linear = False
    stable = False
    pars_init_ssid, pars_est, traces = run_bad(k=k,l=l,n=n,Qs=Qs,Om=Om,
                                          sub_pops=sub_pops,idx_grp=idx_grp,co_obs=co_obs,obs_idx=obs_idx,
                                          linear=linear,stable=stable,init='SSID',
                                          a=a,b1=b1,b2=b2,e=e,max_iter=max_iter_nl,batch_size=batch_size)
    f_i = l2_sis_setup(k,l,n,Qs,Om,idx_grp,obs_idx)[0] # get f to compute final errors
    plot_outputs_l2_gradient_test(pars_true, pars_init_ssid, pars_est, k, l, Qs, 
                                       Qs_full, Om, Ovc, Ovw, f_i, None, traces = traces,
                                       linear=linear, idx_grp = idx_grp, co_obs = co_obs, 
                                       if_flip = True, m = 1)
    
    linear = True
    stable = False
    pars_init_linear = pars_est.copy()
    pars_init_linear['A'], pars_init_linear['Pi'] = None, None 
    pars_init, pars_est, traces = run_bad(k=k,l=l,n=n,Qs=Qs,Om=Om,
                                          sub_pops=sub_pops,idx_grp=idx_grp,co_obs=co_obs,obs_idx=obs_idx,
                                          linear=linear,stable=stable,init=pars_init_linear,
                                          a=a,b1=b1,b2=b2,e=e,max_iter=max_iter_lin,batch_size=batch_size)
    plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
                                       Qs_full, Om, Ovc, Ovw, f_i, None, traces = traces,
                                       linear=linear, idx_grp = idx_grp, co_obs = co_obs, 
                                       if_flip = True, m = 1)
    

# just one more turn...

In [None]:
max_iter = 150
linear = False
_, pars_est, traces = run_bad(k=k,l=l,n=n,Qs=Qs,Om=Om,
                                      sub_pops=sub_pops,idx_grp=idx_grp,co_obs=co_obs,obs_idx=obs_idx,
                                      linear=linear,stable=stable,init=pars_est,
                                      a=a,b1=b1,b2=b2,e=e,max_iter=max_iter,batch_size=batch_size)
f_i = l2_sis_setup(k,l,n,Qs,Om,idx_grp,obs_idx)[0]
plot_outputs_l2_gradient_test(pars_true, pars_init, pars_est, k, l, Qs, 
                                   Qs_full, Om, Ovc, Ovw, f_i, None, traces = traces,
                                   linear=linear, idx_grp = idx_grp, co_obs = co_obs, 
                                   if_flip = True, m = 1)


In [None]:
plt.axis?

# analysis of results

## scatter plots

In [None]:
k_,l_ = 5,5
H_true = yy_Hankel_cov_mat(pars_true['C'],pars_true['A'],pars_true['Pi'],k_,l_,Om=Om,linear=True)
if linear:
    H_est = yy_Hankel_cov_mat(pars_est['C'],pars_est['A'],pars_est['Pi'],k_,l_,Om=Om,linear=linear)
else:
    X=SSID_Hankel_loss.s_A_l2_Hankel_bad_sis(pars_est['C'],k_,l_,Qs,idx_grp,co_obs, linear=linear)[1]
    H_est  = yy_Hankel_cov_mat(pars_est['C'], X,None,k_,l_,Om=Om,linear=linear)
    
print('observed covariance entries')
plt.figure(figsize=(20,10))
plt.subplot(1,3,1)
plt.imshow(H_true, interpolation='none')
plt.subplot(1,3,2)
plt.imshow(H_est, interpolation='none')
plt.subplot(1,3,3)
plt.plot(H_true.reshape(-1,), H_est.reshape(-1,), 'k.')
plt.show()
print(np.corrcoef(H_true.reshape(-1,), H_est.reshape(-1,)))

H_true = yy_Hankel_cov_mat(pars_true['C'],pars_true['A'],pars_true['Pi'],k_,l_,Om=~Om,linear=True)
if linear:
    H_est = yy_Hankel_cov_mat(pars_est['C'],pars_est['A'],pars_est['Pi'],k_,l_,Om=~Om,linear=linear)
else:
    X=SSID_Hankel_loss.s_A_l2_Hankel_bad_sis(pars_est['C'],k_,l_,Qs,idx_grp,co_obs, linear=linear)[1]
    H_est  = yy_Hankel_cov_mat(pars_est['C'], X,None,k_,l_,Om=~Om,linear=linear)


print('stitched covariance entries')
plt.figure(figsize=(20,10))
plt.subplot(1,3,1)
plt.imshow(H_true, interpolation='none')
plt.subplot(1,3,2)
plt.imshow(H_est, interpolation='none')
plt.subplot(1,3,3)
plt.plot(H_true.reshape(-1,), H_est.reshape(-1,), 'k.')
plt.show()
print(np.corrcoef(H_true.reshape(-1,), H_est.reshape(-1,)))

m = 1
print('full time-lagged covariances, for time-lag m = ', m)
H_true = pars_true['C'].dot( np.linalg.matrix_power(pars_true['A'],m).dot(pars_true['Pi']) ).dot(pars_true['C'].T)
if linear:
    H_est = pars_est['C'].dot( np.linalg.matrix_power(pars_est['A'],m).dot(pars_est['Pi']) ).dot(pars_est['C'].T)
else:
    H_est = pars_est['C'].dot(X[:,m-1].reshape(n,n).dot(pars_est['C'].T))
plt.figure(figsize=(20,10))
plt.subplot(1,3,1)
plt.imshow(H_true, interpolation='none')
plt.subplot(1,3,2)
plt.imshow(H_est, interpolation='none')
plt.subplot(1,3,3)
plt.plot(H_true.reshape(-1,), H_est.reshape(-1,), 'k.')
plt.show()
print(np.corrcoef(H_true.reshape(-1,), H_est.reshape(-1,)))



## check linearity of extracted latent covariances

In [None]:
import SDLS

pars = pars_init
A_true, X_true = SSID_Hankel_loss.s_A_l2_Hankel_bad_sis(pars['C'],k,l,Qs,idx_grp,co_obs, linear=True,stable=False,A_old=None)

print('checking MSE solution for A')
plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.imshow(A_true, interpolation='none')
plt.title('MSE solution for A')
plt.subplot(1,2,2)
plt.imshow(pars['A'], interpolation='none')
plt.title('pars[A]')
plt.show()

print('eig(pars[A])', np.sort(np.linalg.eigvals(pars['A'])))
print('eig(A_rec)', np.sort(np.linalg.eigvals(A_true)))


X1,X2 = np.zeros((n, n*(k+l-2))), np.zeros((n, n*(k+l-2)))
for m in range(k+l-2):
    X1[:,m*n:(m+1)*n] = X_true[:,m].reshape(n,n)
    X2[:,m*n:(m+1)*n] = X_true[:,m+1].reshape(n,n)

plt.figure(figsize=(20,10))
plt.subplot(2,1,1)
plt.imshow( X2, interpolation='none')
plt.title('[X2, X3, ...]')
plt.subplot(2,1,2)
plt.imshow( A_true.dot(X1), interpolation='none')
plt.title('A * [X1, X2, ...]')
plt.show()        
m = 9
print('checking MSE solution for X, m = ', m)
AmPi = np.linalg.matrix_power(pars['A'],m ).dot(pars['Pi'])
X_m  = X_true[:,m-1].reshape(n,n)
plt.figure(figsize=(20,10))
plt.subplot(1,3,1)
plt.imshow( AmPi, interpolation='none')
plt.title('true cov(x_{t+m), x_t)')
plt.subplot(1,3,2)
plt.imshow( X_m, interpolation='none')
plt.title('est. cov(x_{t+m), x_t)')
#print( 'A^m Pi - X_m' , np.mean((AmPi - X_m)**2) )
#print( 'A^m Pi - X_m^T' , np.mean((AmPi - X_m.T)**2) )

As_true = np.zeros((n*(k+l-2),n))
XT_true = np.zeros((n*(k+l-2),n))
for m in range(1,k+l-1):
    XT_true[(m-1)*n:m*n,:] = X_true[:,m-1].reshape(n,n)
    As_true[(m-1)*n:m*n,:] = np.linalg.matrix_power(A_true,m)
    #print( 'A^m Pi - X_m' , np.mean((np.linalg.matrix_power(A_true,m).dot(pars['Pi']) - X_true[:,m-1].reshape(n,n))**2) )


Pie_true,_,norm_res,muv,r,fail = SDLS.sdls(A=As_true,B=XT_true,X0=None,Y0=None,tol=1e-10,verbose=False)

plt.subplot(1,3,3)
plt.imshow( np.linalg.matrix_power(A_true,m ).dot(Pie_true), interpolation='none')
plt.title('reconstr. cov(x_{t+m), x_t)')
plt.show()

plt.figure(figsize=(20,12))
plt.subplot(3,1,1)
plt.imshow(XT_true.T, interpolation='none')
plt.title('data est. for latent covs')
plt.subplot(3,1,2)
plt.imshow((As_true.dot(pars['Pi'])).T, interpolation='none')
plt.title('param est. for latent covs, using pars[Pi]')
plt.subplot(3,1,3)
plt.imshow((As_true.dot(Pie_true)).T, interpolation='none')
plt.title('param est. for latent covs, using MSE Pi')
plt.show()
print( 'A^m Pi - X_m' , np.mean((np.linalg.matrix_power(A_true,m).dot(Pie_true) - X_true[:,m].reshape(n,n))**2) )
print( 'As X_{m-1} - X_m' , np.mean((A_true.dot(X1) - X2)**2) )

In [None]:
plt.plot(X2.reshape(-1,), (A_true.dot(X1)).reshape(-1,), '.')
plt.show()
np.corrcoef(X2.reshape(-1,), (A_true.dot(X1)).reshape(-1,))

In [None]:
plt.subplot(2,1,1)
plt.imshow(pars_est['C'].T, interpolation='none')
plt.subplot(2,1,2)
plt.imshow(pars_true['C'].T, interpolation='none')

In [None]:
pars_est['C']

In [None]:

plt.imshow(np.linalg.lstsq(As, XT)[0], interpolation='none')

In [None]:
from scipy.io import savemat # store results for comparison with Matlab code   

os.chdir('../fits/')

save_file = 'usbad_p1000n20r2'

save_file_m = {'A_true':pars_true['A'],
               #'B_true':pars_true['B'],
               'Pi_true' : pars_true['Pi'], 
               'C_true' : pars_true['C'],
               'A_0': pars_init['A'],
               'B_0': pars_init['B'],
               'Pi_0': pars_init['Pi'],
               'C_0': pars_init['C'],
               'A_est': pars_est['A'],
               #'B_est':  pars_est['B'],
               'Pi_est' :  pars_est['Pi'], 
               'C_est' :  pars_est['C']}

savemat(save_file,save_file_m) # does the actual saving

pars_true_vec = np.hstack((pars_true['A'].reshape(n*n,),
                    pars_true['B'].reshape(n*n,),
                    pars_true['C'].reshape(p*n,)))
pars_init_vec = np.hstack((pars_init['A'].reshape(n*n,),
                    pars_init['B'].reshape(n*n,),
                    pars_init['C'].reshape(p*n,)))

np.savez(save_file, 
         pars_0_vec=pars_init_vec,
         pars_true_vec=pars_true_vec, 
         pars_est_vec=pars_est_vec)  

# extracting $\Pi$ from latent covariances given $A$ 

In [None]:
%matplotlib inline
import os
import numpy as np
import scipy.stats as stats
os.chdir('../core')
import SDLS
os.chdir('../dev')
import matplotlib.pyplot as plt

sig, tol = 100, 10e-2
n = 30
V = np.random.normal(size=(n,n))
V = V / np.sum(V**2,0).reshape(1,-1)
Adyn = V.dot(np.diag(np.linspace(0.8, 0.99,n))).dot(np.linalg.inv(V))
Pi = np.outer(np.linspace(0.5,1.0,n), np.linspace(0.5,1.0,n))

A = np.zeros((4*n,n))
B = np.zeros((4*n,n))
for i in range(4):
    B[i*n:(i+1)*n,:] = np.linalg.matrix_power(Adyn,i+1).dot(Pi)
    A[i*n:(i+1)*n,:] = np.linalg.matrix_power(Adyn,i+1)
    
B += sig * np.random.normal(size=B.shape)
    
Pie,Y,norm_res,muv,r,fail = SDLS.sdls(A,B,X0=None,Y0=None,tol=tol,verbose=False)

print('maximal absolute single-entry deviation of AX and B: ', np.max(np.abs(A.dot(Pie) - B)))
print('maximal absolute single-entry deviation of Pi and Pi_est: ', np.max(np.abs(Pie - Pi)))

plt.subplot(1,2,1)
plt.imshow(Pie,interpolation='none')
plt.subplot(1,2,2)
plt.imshow(Pi,interpolation='none')
plt.show()

for i in range(5):
    V = np.random.normal(size=(n,n))
    V = V / np.sum(V**2,0).reshape(1,-1)
    Adyn = V.dot(np.diag(np.linspace(0.8, 0.99,n))).dot(np.linalg.inv(V))
    Pi = stats.wishart.rvs(df=n, scale=np.eye(n))

    A = np.zeros((4*n,n))
    B = np.zeros((4*n,n))
    for i in range(4):
        B[i*n:(i+1)*n,:] = np.linalg.matrix_power(Adyn,i+1).dot(Pi)
        A[i*n:(i+1)*n,:] = np.linalg.matrix_power(Adyn,i+1)        
    B += sig * np.random.normal(size=B.shape)
    
    Pie,Y,norm_res,muv,r,fail = SDLS.sdls(A,B,X0=None,Y0=None,tol=tol,verbose=False)
    print('maximal absolute single-entry deviation of AX and B: ', np.max(np.abs(A.dot(Pie) - B)))
    print('maximal absolute single-entry deviation of Pi and Pi_est: ', np.max(np.abs(Pie - Pi)))
    
    plt.subplot(1,2,1)
    plt.imshow(Pie,interpolation='none')
    plt.subplot(1,2,2)
    plt.imshow(Pi,interpolation='none')
    plt.show()    
    