In [1]:
import numpy as np
import sys,os
import matplotlib.pyplot as plt
from scipy.stats import multivariate_t as mvt, norm 
import sklearn
from sklearn.covariance import GraphicalLasso
import scipy
from sklearn.metrics import zero_one_loss
from sklearn.metrics.pairwise import pairwise_kernels
import time
import tqdm
import networkx as nx

import matplotlib.pylab as pylab
params = {'legend.fontsize': 25,
         'axes.labelsize': 25,
         'axes.titlesize':25,
         'xtick.labelsize':25,
         'ytick.labelsize':'x-large',
          'axes.titlesize' : 'x-large'}
pylab.rcParams.update(params)
sys.path.insert(0, 'C:/Users/User/Code/DyGraph')

import DyGraph as dg

from sklearn.datasets import make_sparse_spd_matrix

In [2]:
def calc_one_zero_error(T,Estimate, ratio = True):
    d = T.shape[0]
    T[np.abs(T)<1e-7] = 0.0
    Estimate[np.abs(Estimate)<1e-7] = 0.0
    error = np.sum(~(np.sign(T[np.triu_indices(T.shape[0], k = 1)]) == np.sign(Estimate[np.triu_indices(Estimate.shape[0], k = 1)])))
    if ratio:
        error = error/float(d*(d-1)/2)
    return error

def calc_f1(T,Estimate):
    d = T.shape[0]
    T[np.abs(T)<1e-7] = 0.0
    Estimate[np.abs(Estimate)<1e-7] = 0.0
    y_true = np.abs(np.sign(T[np.triu_indices(d, 1)]))
    y_pred = np.abs(np.sign(Estimate[np.triu_indices(d, 1)]))
    return sklearn.metrics.f1_score(y_true,y_pred)

def calc_f1(T,Estimate):
    d = T.shape[0]
    T[np.abs(T)<1e-7] = 0.0
    Estimate[np.abs(Estimate)<1e-7] = 0.0
    y_true = np.abs(np.sign(T[np.triu_indices(d, 1)]))
    y_pred = np.abs(np.sign(Estimate[np.triu_indices(d, 1)]))
    return sklearn.metrics.f1_score(y_true,y_pred)

def calc_precision(T,Estimate):
    d = T.shape[0]
    T[np.abs(T)<1e-7] = 0.0
    Estimate[np.abs(Estimate)<1e-7] = 0.0
    y_true = np.abs(np.sign(T[np.triu_indices(d, 1)]))
    y_pred = np.abs(np.sign(Estimate[np.triu_indices(d, 1)]))
    return sklearn.metrics.precision_score(y_true,y_pred)

def calc_recall(T,Estimate):
    d = T.shape[0]
    T[np.abs(T)<1e-7] = 0.0
    Estimate[np.abs(Estimate)<1e-7] = 0.0
    y_true = np.abs(np.sign(T[np.triu_indices(d, 1)]))
    y_pred = np.abs(np.sign(Estimate[np.triu_indices(d, 1)]))
    return sklearn.metrics.recall_score(y_true,y_pred)

def calc_density(prec):
    tmp = prec.copy()
    np.fill_diagonal(tmp,0)
    G = nx.from_numpy_array(tmp)
    # G = nx.fast_gnp_random_graph(300,0.3)
    return nx.density(G)

def calc_roc(T,Estimate):
    d = T.shape[0]
    T[np.abs(T)<1e-7] = 0.0
    Estimate[np.abs(Estimate)<1e-7] = 0.0
    y_true = np.abs(np.sign(T[np.triu_indices(d, 1)]))
    y_pred = np.abs(np.sign(Estimate[np.triu_indices(d, 1)]))
    return sklearn.metrics.recall_score(y_true,y_pred)

In [3]:

prec_0 = make_sparse_spd_matrix(50, alpha=0.8, smallest_coef=-0.8, largest_coef=0.8, norm_diag = True,random_state=42)
print(np.linalg.cond(prec_0))
tmp = prec_0.copy()
np.fill_diagonal(tmp,0)
G = nx.from_numpy_array(tmp)
print(nx.density(G))
print("precision")
print(prec_0)
print("Covariance")
S = np.linalg.inv(prec_0)
print(S)

260.51168481012627
0.5559183673469388
precision
[[ 1.         -0.16466916 -0.23081246 ...  0.          0.
   0.        ]
 [-0.16466916  1.         -0.09466553 ...  0.          0.13315111
   0.25059233]
 [-0.23081246 -0.09466553  1.         ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.         ...  1.          0.
   0.        ]
 [ 0.          0.13315111  0.         ...  0.          1.
   0.        ]
 [ 0.          0.25059233  0.         ...  0.          0.
   1.        ]]
Covariance
[[ 3.33140932 -0.25692045  1.10967945 ...  0.3199703  -0.8543497
   0.19986897]
 [-0.25692045  4.59401228 -0.09227131 ... -0.99734698 -2.72228913
  -0.97043293]
 [ 1.10967945 -0.09227131  3.11474081 ...  1.01690112  0.60505175
   0.98034227]
 ...
 [ 0.3199703  -0.99734698  1.01690112 ...  4.10090314  0.61848376
   2.03202162]
 [-0.8543497  -2.72228913  0.60505175 ...  0.61848376  7.12912669
   1.48473521]
 [ 0.19986897 -0.97043293  0.98034227 ...  2.03202162  1.48473521
   9.36433925]

# MVT - MVT vs Gaussian

In [4]:
ds = [5, 10, 50, 100]
alpha_prob = [0.03, 0.15, 0.8, 0.9] #[0.03, 0.15, 0.5, 0.7, 0.8, 0.9, 0.95, 0.97, 0.99 ]
alpha = 0.05
kappa = 0.1
tol = 1e-10
n = 1000
obs_per_graph = 100
prec_dict ={}

dens_inner = []
time_inner = []
F_inner = []
zo_inner = []
l1_inner = []

dens_outer = []
time_outer = []
F_outer = []
zo_outer = []
l1_outer = []


for i,d in enumerate(ds):
    print(d)

    prec = make_sparse_spd_matrix(d, alpha=alpha_prob[i], smallest_coef=-0.9, largest_coef=0.9, norm_diag = True, random_state = 42)
    tmp = prec.copy()
    np.fill_diagonal(tmp,0)
    G = nx.from_numpy_array(tmp)
    print(nx.density(G))
    print("\n")

    prec_dict[i] = prec


    X1 = mvt.rvs(shape = np.linalg.inv(prec),  df = 4, size = n) # np.random.multivariate_normal(mean = np.zeros(prec.shape[0]),cov = np.linalg.inv(prec), size = n)
    X = X1



    start = time.time()
    dg_opt1 = dg.dygl_parallel(obs_per_graph = obs_per_graph, max_iter = 10000, lamda = obs_per_graph*alpha, kappa = obs_per_graph*kappa, tol = tol)
    dg_opt1.fit(X, nr_workers=1, temporal_penalty="element-wise", lik_type="t", nr_em_itr = 1, time_index=range(X.shape[0]), nu = 4, em_tol = 1e-6)
    elapsed = time.time()-start
    dens_inner.append(nx.density(G))
    time_inner.append(elapsed)

    zo_inner.append(np.mean([calc_one_zero_error(prec, dg_opt1.theta[k]) for k in range(len(dg_opt1.theta))]))
    F_inner.append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 'fro')/scipy.linalg.norm(prec, ord = 'fro') for k in range(len(dg_opt1.theta))]))
    l1_inner.append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 1)/scipy.linalg.norm(prec, ord = 1) for k in range(len(dg_opt1.theta))]))
        
    start = time.time()
    dg_opt1 = dg.dygl_parallel(obs_per_graph = obs_per_graph, max_iter = 10000, lamda = obs_per_graph*alpha, kappa = obs_per_graph*kappa, tol = tol)
    dg_opt1.fit(X, nr_workers=1, temporal_penalty="element-wise", lik_type="gaussian", time_index=range(X.shape[0]))
    elapsed = time.time()-start
    dens_outer.append(nx.density(G))
    time_outer.append(elapsed)

    zo_outer.append(np.mean([calc_one_zero_error(prec, dg_opt1.theta[k]) for k in range(len(dg_opt1.theta))]))
    F_outer.append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 'fro')/scipy.linalg.norm(prec, ord = 'fro') for k in range(len(dg_opt1.theta))]))
    l1_outer.append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 1)/scipy.linalg.norm(prec, ord = 1) for k in range(len(dg_opt1.theta))]))

out_dict = {'nr_obs_per_graph':obs_per_graph, 'n':n, 'temporal_penalty':'global-reconstruction', 
'density_inner':dens_inner, 'tol':tol, 'time_inner':time_inner, 'zo_inner':zo_inner, 'F_inner':F_inner, 'l1_inner':l1_inner,
'density_outer':dens_outer, 'tol':tol, 'time_outer':time_outer, 'zo_outer':zo_outer, 'F_inner':F_outer, 'l1_inner':l1_outer,
'ds':ds, 'alpha':alpha, 'kappa':kappa, 'max_iter':5000} 
    
import pickle
with open(f'../distributions/mvt_mvt_gaussian.pkl', 'wb') as handle:
    pickle.dump(out_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)


5
0.9




Error 1.02E-10:   4%|▎         | 358/10000 [00:00<00:20, 475.93it/s]
Error 1.07E-10:   2%|▏         | 210/10000 [00:00<00:14, 694.53it/s]


10
0.8888888888888888




Error 1.00E-10:   9%|▉         | 922/10000 [00:02<00:21, 419.36it/s]
Error 1.01E-10:   8%|▊         | 845/10000 [00:01<00:14, 646.46it/s]


50
0.5559183673469388




Error 7.30E-3:   1%|          | 96/10000 [00:01<02:13, 74.04it/s]

KeyboardInterrupt: 

In [None]:


fig, ax = plt.subplots(1,2, figsize = (14,5))

ax[0].plot(ds, time_inner, label  = 'Inner EM')
ax[0].plot(ds, time_outer, label  = 'Outer EM')
ax[0].set_xlabel("Dimension", fontdict = {'fontsize':24})
ax[0].set_ylabel("Seconds", fontdict = {'fontsize':24})
ax[0].tick_params(axis='both', which='major', labelsize=20)
#ax[0].set_xticks(0])
legend = ax[0].legend(fontsize = 14)
legend.get_title().set_fontsize('18')

ax[1].plot(ds,l1_inner, label ='l1 Inner', color = "darkblue", alpha = 0.5)
ax[1].plot(ds,l1_outer, label ='l1 Outer', color = "darkblue", linestyle = 'dashed')
ax[1].plot(ds,F_inner, label ='Frobenius Inner', color = "red", alpha = 0.5)
ax[1].plot(ds,F_outer, label ='Frobenius Outer', color = "red", linestyle = 'dashed')
ax[1].plot(ds,zo_inner, label ='One-Zero Inner', color = "darkgreen", alpha = 0.5)
ax[1].plot(ds,zo_outer, label ='One-Zero Outer', color = "darkgreen", linestyle = 'dashed')
#ax[1].plot(nr_admm_its,[np.mean(zo_error[i]) for i in range(len(zo_error))], label ='Zero-One')
#ax[1].plot(nr_admm_its,[np.mean(f_error[i]) for i in range(len(f_error))], label ='Frobenius')
legend = ax[1].legend(title = "Metric",fontsize = 14, bbox_to_anchor = (1.05,1))
legend.get_title().set_fontsize('18')
ax[1].set_xlabel("Nr. inner ADMM iterations", fontdict = {'fontsize':24})
ax[1].set_ylabel("Error", fontdict = {'fontsize':24})
ax[1].tick_params(axis='both', which='major', labelsize=20)
# ax[1].set_xticks([0,5, 10, 15])


 Fix an search

In [4]:
d = 50
alpha_prob =  0.8 #[0.03, 0.15, 0.5, 0.7, 0.8, 0.9, 0.95, 0.97, 0.99 ]
alphas = np.linspace(0.01, 0.2, 10)
kappas = np.linspace(0.01, 0.3, 10)
tol = 1e-10
n = 1000
obs_per_graph = 100
prec_dict ={}

dens_inner = {i: [] for i in range(len(kappas))}
time_inner = {i: [] for i in range(len(kappas))}
F_inner = {i: [] for i in range(len(kappas))}
zo_inner = {i: [] for i in range(len(kappas))}
l1_inner = {i: [] for i in range(len(kappas))}

dens_outer = {i: [] for i in range(len(kappas))}
time_outer = {i: [] for i in range(len(kappas))}
F_outer = {i: [] for i in range(len(kappas))}
zo_outer = {i: [] for i in range(len(kappas))}
l1_outer = {i: [] for i in range(len(kappas))}

prec = make_sparse_spd_matrix(d, alpha=alpha_prob, smallest_coef=-0.9, largest_coef=0.9, norm_diag = True, random_state = 42)
tmp = prec.copy()
np.fill_diagonal(tmp,0)
G = nx.from_numpy_array(tmp)
print(nx.density(G))
print("\n")


X1 = mvt.rvs(shape = np.linalg.inv(prec),  df = 4, size = n) # np.random.multivariate_normal(mean = np.zeros(prec.shape[0]),cov = np.linalg.inv(prec), size = n)
X = X1


pbar = tqdm.tqdm(total = len(alphas)*len(kappas))

for i, kappa in enumerate(kappas):
    for alpha in alphas:



        start = time.time()
        dg_opt1 = dg.dygl_parallel(obs_per_graph = obs_per_graph, max_iter = 10000, lamda = obs_per_graph*alpha, kappa = obs_per_graph*kappa, tol = tol)
        dg_opt1.fit(X, nr_workers=1, temporal_penalty="element-wise", lik_type="t", nr_em_itr = 1, time_index=range(X.shape[0]), nu = 4, em_tol = 1e-10, verbose =False)
        elapsed = time.time()-start
        dens_inner[i].append(nx.density(G))
        time_inner[i].append(elapsed)

        zo_inner[i].append(np.mean([calc_one_zero_error(prec, dg_opt1.theta[k]) for k in range(len(dg_opt1.theta))]))
        F_inner[i].append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 'fro')/scipy.linalg.norm(prec, ord = 'fro') for k in range(len(dg_opt1.theta))]))
        l1_inner[i].append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 1)/scipy.linalg.norm(prec, ord = 1) for k in range(len(dg_opt1.theta))]))
            
        start = time.time()
        dg_opt1 = dg.dygl_parallel(obs_per_graph = obs_per_graph, max_iter = 10000, lamda = obs_per_graph*alpha, kappa = obs_per_graph*kappa, tol = tol)
        dg_opt1.fit(X, nr_workers=1, temporal_penalty="element-wise", lik_type="gaussian", time_index=range(X.shape[0]), verbose =False)
        elapsed = time.time()-start
        dens_outer[i].append(nx.density(G))
        time_outer[i].append(elapsed)

        zo_outer[i].append(np.mean([calc_one_zero_error(prec, dg_opt1.theta[k]) for k in range(len(dg_opt1.theta))]))
        F_outer[i].append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 'fro')/scipy.linalg.norm(prec, ord = 'fro') for k in range(len(dg_opt1.theta))]))
        l1_outer[i].append(np.mean([scipy.linalg.norm(prec-dg_opt1.theta[k], ord = 1)/scipy.linalg.norm(prec, ord = 1) for k in range(len(dg_opt1.theta))]))

        pbar.update()

    out_dict = {'nr_obs_per_graph':obs_per_graph, 'n':n, 'temporal_penalty':'global-reconstruction', 'prec':prec,
    'density_inner':dens_inner, 'tol':tol, 'time_inner':time_inner, 'zo_inner':zo_inner, 'F_inner':F_inner, 'l1_inner':l1_inner,
    'density_outer':dens_outer,  'time_outer':time_outer, 'zo_outer':zo_outer, 'F_outer':F_outer, 'l1_outer':l1_outer,
    'alpha':alphas, 'kappa':kappas, 'max_iter':5000} 

    import pickle
    with open(f'../data/distributions/mvt_mvt_gaussian_search.pkl', 'wb') as handle:
        pickle.dump(out_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

0.5559183673469388




 65%|██████▌   | 65/100 [1:17:54<1:10:12, 120.36s/it]