In [None]:
% matplotlib inline
import matplotlib.pyplot as plt
from ssidid.SSID_Hankel_loss import g_C_l2_vector_pair_sso
import numpy as np
import time 

p,n,T = 20, 5, 100

np.random.seed(0)
C, R = np.random.normal(size=(p,n)), np.random.normal(size=(p,))
Xm = np.random.normal(size=(n,n))

x = np.random.normal(size=(T,n))
y = x.dot(C.T)

lag_range = np.arange(n)
kl_ = np.max(lag_range)+1
ts, m_ = range(T-kl_), 1
grad = np.zeros_like(C)
#a,b = np.random.choice(p, p//2, replace=False), np.random.choice(p, p//2, replace=False)
a,b = np.arange(p), np.arange(p)

idx_grp = (a,b)
is_, js_, inj = (0,), (1,), []

CCs = [C[a,:].T.dot(C[a,:]) for a in idx_grp]
Cs = [C[a,:].copy() for a in idx_grp]
grads = [grad[a,:].copy() for a in idx_grp]

t_ = time.time()
for t in ts:
    yp, yf, Wm = y[t], y[t+m_], np.ones((len(idx_grp), len(idx_grp)))
    g_C_l2_vector_pair_sso(grad, m_, C, Xm, R, CCs, idx_grp, is_, js_, inj, yp, yf, Wm)            
print(time.time() - t_)
#print(grad)


plt.imshow(grad, interpolation='None')
plt.colorbar()
plt.show()

In [None]:
%load_ext Cython

In [None]:
%%cython

import time
import numpy as np
cimport numpy as np
import cython
from libc.string cimport memset
from scipy.linalg.cython_blas cimport dsymm, dcopy, dgemm, dgemv, daxpy, dsyrk, \
    dtrmv, dger, dnrm2, ddot, dscal
from scipy.linalg.cython_lapack cimport dpotrf, dpotrs, dpotri, dtrtrs
#from util cimport copy_transpose, copy_upper_lower

cdef void pyc_g_C_l2_vector_pair_sso(grads, int m_, C, double[:,:] Xm, double[:] R, 
                                     CCs, idx_grp, is_, js_, inj, yps, yfs, double[:,:] Wm):

    cdef int p = C.shape[0], n = C.shape[1]
    cdef int nn = n * n
    cdef double one = 1., zero = 0., neg1 = -1.
    cdef int one_int = 1
    cdef int pa, pb, pan, pbn
    
    cdef double[:,:] SC = np.zeros((n,n))
    cdef double[:,:] tmp_nn = np.zeros((n,n))
    cdef double[:] tmp_n = np.zeros(n)
    
    cdef double[:] Sy =  np.zeros(n)
    cdef double[:] yb, ya, yb_, ya_
    cdef double[:,:] Cb_, Ca_, grad_a_, grad_b_, SC_
    cdef double[:,:] Cb, Ca, CC, grad_a, grad_b
    
    for i in is_:
        a = idx_grp[i]
        pa = len(a)
        pan = pa * n
        
        # ya = yfs[i]
        ya_ = yfs[i]
        ya = np.empty(pa)
        dcopy(&pa, &ya_[0], &one_int, &ya[0], &one_int)
        
        #Ca = Cs[i]
        Ca_ = C[a,:]
        Ca = np.empty((pa,n))
        dcopy(&pan, &Ca_[0,0], &one_int, &Ca[0,0], &one_int)

        #grad_a = grads[i]
        grad_a_ = grads[i].reshape(len(a), C.shape[1])
        grad_a = np.empty((pa,n))
        dcopy(&pan, &grad_a_[0,0], &one_int, &grad_a[0,0], &one_int)
        
        memset(&SC[0,0], 0, sizeof(SC))
        memset(&Sy[0], 0, sizeof(Sy))
        for j in js_:
            b = idx_grp[j]        
            pb = len(b)
            pbn = pb * n
            SC_ = CCs[j]
            yb = yps[j]

            Cb_ = C[b,:]
            Cb = np.empty((pb,n))
            dcopy(&pbn, &Cb_[0,0], &one_int, &Cb[0,0], &one_int)
            
            #SC += CCs[j] * Wm[i,j]            
            #dscal(&nn, &Wm[i,j], &SC[0,0], &one_int)
            daxpy(&nn, &Wm[i,j], &SC_[0,0], &one_int, &SC[0,0], &one_int)
            #dgemm('N', 'N', &n, &n, &n, &zero, &SC[0,0], &n, &SC[0,0], &n, &one, &SC[0,0], &n)
            #Sy += yp[b].dot(C[b,:]) * Wm[i,j]            
            dgemv('T', &pb, &n, &Wm[i,j], &Cb[0,0], &pb, &yb[0], &one_int, &one, &Sy[0], &one_int)
            

        # tmp_nn = Xm.dot(SC)
        dgemm('N', 'N', &n, &n, &n, &one, &Xm[0,0], &n, &SC[0,0], &n, &zero, &tmp_nn[0,0], &n) 
        # tmp_nn = tmp_nn.dot(Xm.T)
        dgemm('N', 'T', &n, &n, &n, &one, &tmp_nn[0,0], &n, &Xm[0,0], &n, &zero, &tmp_nn[0,0], &n) 
        # grad[a,:] = grad[a,:] + C[a,:].dot(tmp_nn)
        dgemm('N', 'N', &pa, &n, &n, &one, &Ca[0,0], &pa, &tmp_nn[0,0], &n, &one, &grad_a[0,0], &pa)        

        # tmp_n = Sy.dot(Xm.T)
        dgemv('N', &n, &n, &one, &Xm[0,0], &n, &Sy[0], &one_int, &zero, &tmp_n[0], &one_int)                       
        # grad[a,:] = grad[a,:] - yf[a].dot(tmp_n.T)
        dger(&pa, &n, &neg1, &ya[0], &one_int, &tmp_n[0], &one_int, &grad_a[0,0], &pa)
        
        dcopy(&pan, &grad_a[0,0], &one_int, &grad_a_[0,0], &one_int)      

    for j in js_:
        b = idx_grp[j]
        pb = len(b)
        pbn = pb * n
        
        # yb = yps[j]
        yb_ = yps[j]
        yb = np.empty(pb)
        dcopy(&pb, &yb_[0], &one_int, &yb[0], &one_int)
        
        #Cb = Cs[j]
        Cb_ = C[b,:]
        Cb = np.empty((pb,n))
        dcopy(&pbn, &Cb_[0,0], &one_int, &Cb[0,0], &one_int)

        #grad_b = grads[i]
        grad_b_ = grads[j].reshape(len(b), C.shape[1])
        grad_b = np.empty((pb,n))
        dcopy(&pbn, &grad_b_[0,0], &one_int, &grad_b[0,0], &one_int)
        
        memset(&SC[0,0], 0, sizeof(SC))
        memset(&Sy[0], 0, sizeof(Sy))
        for i in is_:        
            a = idx_grp[i]        
            pa = len(a)
            pan = pa * n
            SC_ = CCs[i]
            ya = yps[i]

            Ca_ = C[a,:]
            Ca = np.empty((pa,n))
            dcopy(&pan, &Ca_[0,0], &one_int, &Ca[0,0], &one_int)
            
            #SC += CCs[j] * Wm[i,j]            
            daxpy(&nn, &Wm[i,j], &SC_[0,0], &one_int, &SC[0,0], &one_int)
            #Sy += yp[b].dot(C[b,:]) * Wm[i,j]            
            dgemv('T', &pa, &n, &Wm[i,j], &Ca[0,0], &pa, &ya[0], &one_int, &one, &Sy[0], &one_int)
            
        # tmp_nn = Xm.T.dot(SC)
        dgemm('T', 'N', &n, &n, &n, &one, &Xm[0,0], &n, &SC[0,0], &n, &zero, &tmp_nn[0,0], &n) 
        # tmp_nn = tmp_nn.dot(Xm)
        dgemm('N', 'N', &n, &n, &n, &one, &tmp_nn[0,0], &n, &Xm[0,0], &n, &zero, &tmp_nn[0,0], &n) 
        # grad[b,:] = grad[b,:] + C[b,:].dot(tmp_nn)
        dgemm('N', 'N', &pb, &n, &n, &one, &Cb[0,0], &pb, &tmp_nn[0,0], &n, &one, &grad_b[0,0], &pb)        

        # tmp_n = Sy.dot(Xm)
        dgemv('T', &n, &n, &one, &Xm[0,0], &n, &Sy[0], &one_int, &zero, &tmp_n[0], &one_int)                       
        # grad[b,:] = grad[b,:] - yp[b].dot(tmp_n.T)
        dger(&pb, &n, &neg1, &yb[0], &one_int, &tmp_n[0], &one_int, &grad_b[0,0], &pb)
        
        dcopy(&pbn, &grad_b[0,0], &one_int, &grad_b_[0,0], &one_int)      
        
    """
    if m_ == 0:
        for i in inj:
            anb = idx_grp[i]
            grad[anb,:] += (R[anb]*Wm[i,i]).reshape(-1,1) * (C[anb,:].dot(Xm+Xm.T))
    """
    
p,n,T = 20, 5, 100

np.random.seed(0)
C, R = np.random.normal(size=(p,n)), np.random.normal(size=(p,))
Xm = np.random.normal(size=(n,n))

x = np.random.normal(size=(T,n))
y = x.dot(C.T)

lag_range = np.arange(n)
kl_ = np.max(lag_range)+1
ts, m_ = range(T-kl_), 1
#a,b = np.random.choice(p, p//2, replace=False), np.random.choice(p, p//2, replace=False)
a,b = np.arange(p), np.arange(p)

idx_grp = (a,b)
is_, js_, inj = (0,), (1,), []

CCs = [C[a,:].T.dot(C[a,:]) for a in idx_grp]
Cs = [C[a,:].copy() for a in idx_grp]
grads = [np.zeros((p,n))[a,:].copy() for a in idx_grp]

t_ = time.time()
for t in ts:
    yp, yf, Wm = y[t], y[t+m_], np.ones((len(idx_grp), len(idx_grp)))
    yps = [yp[a].copy() for a in idx_grp]
    yfs = [yf[a].copy() for a in idx_grp]    
    pyc_g_C_l2_vector_pair_sso(grads, m_, C, Xm, R, CCs, idx_grp, is_, js_, inj, yps, yfs, Wm)
print(time.time() - t_)
#print(grads)

In [None]:
% matplotlib inline
import matplotlib.pyplot as plt
plt.subplot(1,2,1)
plt.imshow(grads[0] + grads[1], interpolation='None')
plt.colorbar()
plt.subplot(1,2,2)
plt.imshow(grad, interpolation='None')
plt.colorbar()
plt.show()

In [None]:
plt.plot((grads[0]+grads[1]).reshape(-1), grad.reshape(-1), 'b.')
plt.show()

# check piece by piece

In [None]:
import numpy as np
import time

def g_C_l2_vector_pair_sso(grad, m_, C, Xm, R, CCs, idx_grp, is_, js_, inj, yps, yfs, Wm):
    
    p,n = C.shape

    for i in is_:
        a = idx_grp[i]
        SC, Sy = np.zeros((n,n),dtype=C.dtype), np.zeros(n,dtype=C.dtype)
        for j in js_:
            b = idx_grp[j]        
            SC += CCs[j]* Wm[i,j]
            Sy += yp[b].dot(C[b,:]) * Wm[i,j]
        grad[a,:] += C[a,:].dot( Xm.dot(SC).dot(Xm.T) ) - np.outer(yf[a], Sy.dot(Xm.T))

p,n,T = 10, 3, 100

np.random.seed(0)
C, R = np.random.normal(size=(p,n)), np.random.normal(size=(p,))
Xm = np.random.normal(size=(n,n))

x = np.random.normal(size=(T,n))
y = x.dot(C.T)

lag_range = np.arange(n)
kl_ = np.max(lag_range)+1
ts, m_ = range(T-kl_), 1
#a,b = np.random.choice(p, p//2, replace=False), np.random.choice(p, p//2, replace=False)
a,b = np.arange(p), np.arange(p)

idx_grp = (a,b)
is_, js_, inj = (0,), (1,), []

CCs = [C[a,:].T.dot(C[a,:]) for a in idx_grp]
Cs = [C[a,:].copy() for a in idx_grp]

grad = np.zeros((p,n))                                                                   
grads = [grad[a,:].copy() for a in idx_grp]

t_ = time.time()
for t in ts[:1]:
    yp, yf, Wm = y[t], y[t+m_], np.ones((len(idx_grp), len(idx_grp)))
    yps = [yp[a].copy() for a in idx_grp]
    yfs = [yf[a].copy() for a in idx_grp]    
    #c_g_C_l2_vector_pair_sso(grads, m_, C, Xm, R, CCs, idx_grp, is_, js_, inj, yps, yfs, Wm)
    g_C_l2_vector_pair_sso(grad, m_, C, Xm, R, CCs, idx_grp, is_, js_, inj, yps, yfs, Wm)
print(time.time() - t_)
print(y[0])
print(grad)

In [None]:
%load_ext Cython

In [None]:
%%cython 

import time
import numpy as np
cimport numpy as np
import cython
from libc.string cimport memset
from scipy.linalg.cython_blas cimport dsymm, dcopy, dgemm, dgemv, daxpy, dsyrk, \
    dtrmv, dger, dnrm2, ddot, dscal
from scipy.linalg.cython_lapack cimport dpotrf, dpotrs, dpotri, dtrtrs

cdef test(grads, int m_, C, double[:,:] Xm, double[:] R, 
                                     CCs, idx_grp, is_, js_, inj, yps, yfs, double[:,:] Wm):

    cdef int p = C.shape[0], n = C.shape[1]
    cdef int nn = n * n
    cdef double one = 1., zero = 0., neg1 = -1.
    cdef int one_int = 1
    cdef int pa, pb, pan, pbn
    
    cdef double[:,:] tmp_nn = np.zeros((n,n))
    cdef double[:,:] tmp_nn2 = np.zeros((n,n))
    cdef double[:] tmp_n = np.zeros(n)
    
    cdef double[:] Sy =  np.zeros(n)
    cdef double[:,:] SC = np.zeros((n,n)), SC_ = np.zeros((n,n))
    cdef double[:] yb, ya, yb_, ya_
    cdef double[:,:] Cb_, Ca_, grad_a_, grad_b_
    cdef double[:,:] Cb, Ca, CC, grad_a, grad_b
    
    for i in is_:
        a = idx_grp[i]
        pa = len(a)
        pan = pa * n
        
        # ya = yfs[i]
        ya_ = yfs[i]
        ya = np.empty(pa)
        dcopy(&pa, &ya_[0], &one_int, &ya[0], &one_int)
        
        #Ca = Cs[i]
        Ca_ = C[a,:].T.copy()
        Ca = np.empty((pa,n))
        dcopy(&pan, &Ca_[0,0], &one_int, &Ca[0,0], &one_int)

        #grad_a = grads[i]
        grad_a_ = grads[i].reshape(len(a), C.shape[1])
        grad_a = np.empty((pa,n))
        dcopy(&pan, &grad_a_[0,0], &one_int, &grad_a[0,0], &one_int)
        
        for j in js_:
            
            b = idx_grp[j]        
            pb = len(b)
            pbn = pb * n
            
            #SC += CCs[j] * Wm[i,j]            
            SC_ = CCs[j]
            daxpy(&nn, &Wm[i,j], &SC_[0,0], &one_int, &SC[0,0], &one_int)
            
            #Sy += yp[b].dot(C[b,:]) * Wm[i,j]            
            yb_ = yps[j]
            yb = np.empty(pb)
            dcopy(&pb, &yb_[0], &one_int, &yb[0], &one_int)
            Cb_ = C[b,:].T.copy()
            Cb = np.empty((pb, n))
            dcopy(&pbn, &Cb_[0,0], &one_int, &Cb[0,0], &one_int)
            dgemv('T', &pb, &n, &Wm[i,j], &Cb[0,0], &pb, &yb[0], &one_int, &one, &Sy[0], &one_int)
        
        # tmp_nn = Xm.dot(SC)
        dgemm('T', 'N', &n, &n, &n, &one, &Xm[0,0], &n, &SC[0,0], &n, &zero, &tmp_nn[0,0], &n) 
        # tmp_nn = tmp_nn.dot(Xm.T)
        dgemm('N', 'N', &n, &n, &n, &one, &tmp_nn[0,0], &n, &Xm[0,0], &n, &zero, &tmp_nn2[0,0], &n) 
        # grad[a,:] = grad[a,:] + C[a,:].dot(tmp_nn)
        dgemm('N', 'N', &pa, &n, &n, &one, &Ca[0,0], &pa, &tmp_nn2[0,0], &n, &one, &grad_a[0,0], &pa)        

        # tmp_n = Sy.dot(Xm.T)
        dgemv('T', &n, &n, &one, &Xm[0,0], &n, &Sy[0], &one_int, &zero, &tmp_n[0], &one_int)                       
        # grad[a,:] = grad[a,:] - yf[a].dot(tmp_n.T)
        dger(&pa, &n, &neg1, &ya[0], &one_int, &tmp_n[0], &one_int, &grad_a[0,0], &pa)
        
        daxpy(&pan, &one, &grad_a[0,0], &one_int, &grad_a_[0,0], &one_int)

    for j in js_:
        b = idx_grp[j]
        pb = len(b)
        pbn = pb * n
        
        # yb = yps[j]
        yb_ = yps[j]
        yb = np.empty(pb)
        dcopy(&pb, &yb_[0], &one_int, &yb[0], &one_int)
        
        #Cb = Cs[j]
        Cb_ = C[b,:].T.copy()
        Cb = np.empty((pb,n))
        dcopy(&pbn, &Cb_[0,0], &one_int, &Cb[0,0], &one_int)

        #grad_b = grads[i]
        grad_b_ = grads[j].reshape(len(b), C.shape[1])
        grad_b = np.empty((pb,n))
        dcopy(&pbn, &grad_b_[0,0], &one_int, &grad_b[0,0], &one_int)
        
        memset(&SC[0,0], 0, sizeof(SC))
        memset(&Sy[0], 0, sizeof(Sy))
        
        for i in is_:        
            a = idx_grp[i]        
            pa = len(a)
            pan = pa * n
            
            #SC += CCs[j] * Wm[i,j]            
            SC_ = CCs[i]
            daxpy(&nn, &Wm[i,j], &SC_[0,0], &one_int, &SC[0,0], &one_int)
                        
            #Sy += yp[b].dot(C[b,:]) * Wm[i,j]            
            ya_ = yfs[i]
            ya = np.empty(pa)
            dcopy(&pa, &ya_[0], &one_int, &ya[0], &one_int)
            Ca_ = C[a,:].T.copy()
            Ca = np.empty((pa,n))            
            dcopy(&pan, &Ca_[0,0], &one_int, &Ca[0,0], &one_int)            
            dgemv('T', &pa, &n, &Wm[i,j], &Ca[0,0], &pa, &ya[0], &one_int, &one, &Sy[0], &one_int)
            
        # tmp_nn = Xm.T.dot(SC)
        dgemm('N', 'N', &n, &n, &n, &one, &Xm[0,0], &n, &SC[0,0], &n, &zero, &tmp_nn[0,0], &n) 
        # tmp_nn = tmp_nn.dot(Xm)
        dgemm('N', 'T', &n, &n, &n, &one, &tmp_nn[0,0], &n, &Xm[0,0], &n, &zero, &tmp_nn2[0,0], &n) 
        # grad[b,:] = grad[b,:] + C[b,:].dot(tmp_nn)
        dgemm('N', 'N', &pb, &n, &n, &one, &Cb[0,0], &pb, &tmp_nn2[0,0], &n, &one, &grad_b[0,0], &pb)        

        # tmp_n = Sy.dot(Xm)
        dgemv('N', &n, &n, &one, &Xm[0,0], &n, &Sy[0], &one_int, &zero, &tmp_n[0], &one_int)                       
        # grad[b,:] = grad[b,:] - yp[b].dot(tmp_n.T)
        dger(&pb, &n, &neg1, &yb[0], &one_int, &tmp_n[0], &one_int, &grad_b[0,0], &pb)
        
        #dcopy(&pbn, &grad_b[0,0], &one_int, &grad_b_[0,0], &one_int)              
        daxpy(&pbn, &one, &grad_b[0,0], &one_int, &grad_b_[0,0], &one_int)
        
    #if m_ == 0:
    #    for i in inj:
    #        anb = idx_grp[i]
    #        
    #        Ranb = R[anb] * Wm[i,i]
    #        Canb = C[anb]
    #        tmp_nn = Xm + Xm.T
    #        grad[anb,:] += (R[anb]*Wm[i,i]).reshape(-1,1) * (C[anb,:].dot(tmp_nn))
        
        
#################################################################

p,n,T = 10, 3, 100

np.random.seed(0)
C, R = np.random.normal(size=(p,n)), np.random.normal(size=(p,))
Xm = np.random.normal(size=(n,n))

x = np.random.normal(size=(T,n))
y = x.dot(C.T)

print('y[0]', y[0])

lag_range = np.arange(n)
kl_ = np.max(lag_range)+1
ts, m_ = range(T-kl_), 1
#a,b = np.random.choice(p, p//2, replace=False), np.random.choice(p, p//2, replace=False)
a,b = np.arange(p), np.arange(p)

idx_grp = (a,b)
is_, js_, inj = (0,), (1,), []

CCs = [C[a,:].T.dot(C[a,:]) for a in idx_grp]
Cs = [C[a,:].copy() for a in idx_grp]

grad = np.zeros((p,n))                                                                   
grads = [grad[a,:].copy() for a in idx_grp]

t_ = time.time()
for t in ts[:1]:
    yp, yf, Wm = y[t], y[t+m_], 1/T  * np.ones((len(idx_grp), len(idx_grp)))
    yps = [yp[a].copy() for a in idx_grp]
    yfs = [yf[a].copy() for a in idx_grp]    
    test(grads, m_, C, Xm, R, CCs, idx_grp, is_, js_, inj, yps, yfs, Wm)
    print('out', grads[0] + grads[1])
print(time.time() - t_)

t_ = time.time()
for i in is_:
    a = idx_grp[i]
    SC, Sy = np.zeros((n,n),dtype=C.dtype), np.zeros(n,dtype=C.dtype)
    for j in js_:
        b = idx_grp[j]        
        SC += CCs[j]* Wm[i,j]
        Sy += yp[b].dot(C[b,:]) * Wm[i,j]
    grad[a,:] += C[a,:].dot( Xm.dot(SC).dot(Xm.T) ) - np.outer(yf[a], Sy.dot(Xm.T))
for j in js_:
    b = idx_grp[j]        
    SC, Sy = np.zeros((n,n),dtype=C.dtype), np.zeros(n,dtype=C.dtype)
    for i in is_:        
        a = idx_grp[i]        
        SC += CCs[i] * Wm[i,j]
        Sy += yf[a].dot(C[a,:]) * Wm[i,j]
    grad[b,:] += C[b,:].dot( Xm.T.dot(SC).dot(Xm) ) - np.outer(yp[b], Sy.dot(Xm))
if m_ == 0:
    for i in inj:
        anb = idx_grp[i]
        grad[anb,:] += (R[anb]*Wm[i,i]).reshape(-1,1) * (C[anb,:].dot(Xm+Xm.T))
    
print(time.time() - t_)
    
des = grad
print('des', des)
