In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
import glob, os

os.chdir('../core')
import stitching_ssid as ssid

os.chdir('../../../../pyRRHDLDS/core')
import ssm_scripts

import pprint

# Covariance matrix completion 
- $\mbox{cov}(y) = C C^\top$
- if cov($y$) has missing off-diagonal blocks, parts of $C$ are underdetermined (change of latent basis)
- Bishop et al. (2014) introduced a basic algorithm for rotating latent bases based on overlap
- Srini had the idea to just learn all chunks of C at the same time using gradient descent

In [None]:
def f(C,Q,Om):
    v = (C.dot(C.T)[Om] - Q[Om])
    return v.dot(v.T)

def g(C,Q,idx_grp, obs_idx):
    
    p,n = C.shape
    
    grad = np.zeros((p,n))
    for i in range(len(idx_grp)):
        
        def co_observed(x):
            for idx in obs_idx:
                if x in idx and i in idx:
                    return True
            return False
                
        co_obs_i = [idx_grp[item] for item in np.arange(len(idx_grp)) if co_observed(item)]
        co_obs_i = np.sort(np.hstack(co_obs_i))
        
        grad[idx_grp[i],:] = g_idxgrp(C,Q,idx_grp[i],co_obs_i)
        
    return grad

def g_idxgrp(C,Q,idx_grp_i,co_obs_i):
    Cc, Ci, Qic = C[co_obs_i,:], C[idx_grp_i,:], Q[np.ix_(idx_grp_i, co_obs_i)]
     
    return (Ci.dot(Cc.T) - Qic).dot(Cc) - np.diag(np.sum(Ci*Ci,axis=1)-Q[idx_grp_i,idx_grp_i]).dot(Ci)
#    return (Ci.dot(Cc.T) - Qic).dot(Cc) - (np.sum(Ci*Ci,axis=1)-Q[idx_grp_i,idx_grp_i]) * Ci


p,n = 11,3

# create subpopulations
sub_pops = (np.arange(0,7), np.arange(4,p))
print('sub_pops', sub_pops)

Om = np.zeros((p,p), dtype=bool)
for i in range(len(sub_pops)):
    Om[np.ix_(sub_pops[i],sub_pops[i])] = True

obs_idx, idx_grp = ssid.get_obs_index_groups(obs_scheme={'sub_pops': sub_pops,'obs_pops': (0,1)},p=p)

print('idx_grp:', idx_grp)
print('obs_idx:', obs_idx)

for rep in range(10):
    
    C_true = np.random.normal(size=(p,n))
    Q_true = C_true.dot(C_true.T)
    Q_obs = Q_true * np.asarray( Om, dtype=int)
    Q_sti = Q_true * np.asarray(~Om, dtype=int)
    
    C_0 = np.random.normal(size=(p,n))
    C_est = C_0.copy()
    max_iter, a= 10000, 0.001
    err  = np.empty(max_iter)
    for t in range(max_iter):
        
            if np.mod(t,2000)==0:
                a /= 2
                
            C_est -= a * g(C_est, Q_obs, idx_grp, obs_idx)
            err_obs[t] = f(C_est, Q_obs, Om)
            err_sti[t] = f(C_est, Q_sti,~Om)

    print('final squared error on observed parts:', err_sti[-1])

    plt.figure(figsize=(20,7))
    plt.subplot(1,4,1)
    plt.plot(err_obs[t//4:])
    plt.hold(True)
    plt.plot(err_sti[t//4:])
    plt.xlabel('iteration')
    plt.ylabel('SE on observed parts of matrix')
    plt.title('Squared error')
    plt.subplot(1,4,2)
    plt.imshow(C_0.dot(C_0.T),interpolation='none')
    plt.title('Initial matrix (C_0 C_0^T)')
    plt.subplot(1,4,3)
    plt.imshow(C_est.dot(C_est.T),interpolation='none')
    plt.title('Estimated matrix')
    plt.subplot(1,4,4)
    plt.imshow(Q_true,interpolation='none')
    plt.title('True  matrix')
    plt.show()
    
    
#vCC, vQ, vOm = np.ravel(C.dot(C.T)), np.ravel(Q), np.ravel(Om)

#print(vCC[vOm].dot(vCC[vOm]) + vQ[vOm].dot(vQ[vOm]) - 2 * vCC[vOm].dot(vQ[vOm]))
#print( np.sum( ((C.dot(C.T)-Q)**2)[Om] ) )



# Hankel covariance matrix completion 
- $ H_{k,l} = (I_k \otimes C) H^{xx}_{k,l} (I_k \otimes C)^\top $
- $ H^{xx}_{k,l} = \left[\begin{array}{llll} A \Pi & A^2 \Pi & \ldots & A^l \Pi\\ A^2 \Pi & A^3 \Pi & \ldots & A^{l+1} \Pi\\ \vdots & \vdots & \ddots & \vdots \\ A^{k} \Pi & A^{k+1} \Pi & \ldots & A^{k+l-1} \Pi \end{array} \right] $
- if cov($x$) has missing off-diagonal blocks, parts of $C$ are underdetermined (change of latent basis)
- each block of the Hankel cov matrix $H_{k,l}$ exhibits the same structure of missing entries as does cov($y$.
- We can combine the overlaps of the $k \times l$ many blocks of $H_{k,l}$ when collecting constraints on the latent basis.
- We here assume $A,\Pi$ to be known, and apply Srini's idea of joint gradient descent on the whole $C$ to $H_{k,l}$

In [None]:


""" TO BE DEBUGGED """


def f(C,A, Q,Om):
    v = (C.dot(A.dot(C.T))[Om] - Q[Om])
    return v.dot(v.T)

def g(C,A,Q,idx_grp, obs_idx):
    
    p,n = C.shape
    
    grad = np.zeros((p,n))
    for i in range(len(idx_grp)):
        
        def co_observed(x):
            for idx in obs_idx:
                if x in idx and i in idx:
                    return True
            return False
                
        co_obs_i = [idx_grp[item] for item in np.arange(len(idx_grp)) if co_observed(item)]
        co_obs_i = np.sort(np.hstack(co_obs_i))
        
        grad[idx_grp[i],:] = g_idxgrp(C,A,Q,idx_grp[i],co_obs_i)
        
    return grad

def g_idxgrp(C,A,Q,idx_grp_i,co_obs_i):
    Cc, Ci, Qic = C[co_obs_i,:].dot(A.T), C[idx_grp_i,:], Q[np.ix_(idx_grp_i, co_obs_i)]
     
    return (Ci.dot(Cc.T) - Qic).dot(Cc) - np.diag(np.sum(Ci.dot(A)*Ci,axis=1)-Q[idx_grp_i,idx_grp_i]).dot(Ci)
#    return (Ci.dot(Cc.T) - Qic).dot(Cc) - (np.sum(Ci*Ci,axis=1)-Q[idx_grp_i,idx_grp_i]) * Ci


p,n = 11,3

# create subpopulations
sub_pops = (np.arange(0,7), np.arange(4,p))
print('sub_pops', sub_pops)

Om = np.zeros((p,p), dtype=bool)
for i in range(len(sub_pops)):
    Om[np.ix_(sub_pops[i],sub_pops[i])] = True

obs_idx, idx_grp = ssid.get_obs_index_groups(obs_scheme={'sub_pops': sub_pops,'obs_pops': (0,1)},p=p)

print('idx_grp:', idx_grp)
print('obs_idx:', obs_idx)

for rep in range(20):
    
    C_true = np.random.normal(size=(p,n))
    Pi     = np.random.normal(size=(n,n))
    Pi     = Pi.dot(Pi.T)
    A      = np.diag([0.6, 0.8, 0.99]) #np.random.normal(size=(n,n))
    APi    = A.dot(Pi)
    
    Q_true = C_true.dot(APi.dot(C_true.T))
    Q_obs = Q_true * np.asarray( Om, dtype=int)
    Q_sti = Q_true * np.asarray(~Om, dtype=int)
    C_0 = np.random.normal(size=(p,n))
    C_est = C_0.copy()
    max_iter, a= 10000, 0.0001
    err_obs, err_sti  = np.empty(max_iter), np.empty(max_iter)
    for t in range(max_iter):
        
            if np.mod(t,2000)==0:
                a /= 2
                
            C_est -= a * g(C_est, APi, Q_obs, idx_grp, obs_idx)
            err_obs[t] = f(C_est, APi, Q_obs, Om)
            err_sti[t] = f(C_est, APi, Q_sti,~Om)

    print('final squared error on observed parts:', err_sti[-1])

    plt.figure(figsize=(20,7))
    plt.subplot(1,4,1)
    plt.plot(err_obs[t//4:])
    plt.hold(True)
    plt.plot(err_sti[t//4:])
    plt.xlabel('iteration')
    plt.ylabel('SE on observed parts of matrix')
    plt.title('Squared error')
    plt.subplot(1,4,2)
    plt.imshow(C_0.dot(APi.dot(C_0.T)),interpolation='none')
    plt.title('Initial matrix (C_0 C_0^T)')
    plt.subplot(1,4,3)
    plt.imshow(C_est.dot(APi.dot(C_est.T)),interpolation='none')
    plt.title('Estimated matrix')
    plt.subplot(1,4,4)
    plt.imshow(Q_true,interpolation='none')
    plt.title('True  matrix')
    plt.show()
    
#vCC, vQ, vOm = np.ravel(C.dot(C.T)), np.ravel(Q), np.ravel(Om)

#print(vCC[vOm].dot(vCC[vOm]) + vQ[vOm].dot(vQ[vOm]) - 2 * vCC[vOm].dot(vQ[vOm]))
#print( np.sum( ((C.dot(C.T)-Q)**2)[Om] ) )



In [None]:
np.mod(11,10)==0