-
Notifications
You must be signed in to change notification settings - Fork 0
/
Theta_glasso.py
245 lines (214 loc) · 8.55 KB
/
Theta_glasso.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 24 14:52:54 2018
@author: monicawang76
"""
import sys
import os
sys.path.append(os.path.abspath('..')+'/TVGL')
import pickle
import numpy as np
# from random import *
import random
import snap
import numpy.linalg as alg
from scipy.linalg import block_diag
import sklearn.covariance as cov
import TVGL as tvgl
import myTVGL as mtvgl
#---------------------------------------------- Define private functions -----------------------------------------------------
def getGraph(p,d):
# construct a graph from scale free distribution
# paper: The joint graphical lasso for inverse covarianceestimation across multiple classes
# reference: https://rss.onlinelibrary.wiley.com/doi/epdf/10.1111/rssb.12033
# p: number of nodes
# d: out degree of each node
Rnd = snap.TRnd(10)
UGraph = snap.GenPrefAttach(p, d, Rnd)
S = np.zeros((p,p))
for EI in UGraph.Edges():
# generate a random number in (-0.4, -0.1)U(0.1,0.4)
# method: https://stats.stackexchange.com/questions/270856/how-to-randomly-generate-random-numbers-in-one-of-two-intervals
r = np.random.uniform(0, 0.6)
S[EI.GetSrcNId(), EI.GetDstNId()] = r-0.4 if r < 0.3 else r-0.2 # assign values to edges
S0 = S.copy()
# orginal half graph without standardizing it into a PD matrix
S = S + S.T
S = S/(1.5*np.sum(np.absolute(S), axis = 1)[:,None])
A = (S + S.T)/2 + np.matrix(np.eye(p))
# check if A is PD
print(np.all(alg.eigvals(A) > 0))
return S0, A
# return A
def getGraphatT_Shuheng(S, A, n_change):
# n_change: number of edges to be changed
# n_change edges to be added
# n_change edges to be dropped
p = A.shape[0]
B = S + np.tril(np.ones((p,p)))
S_where_is_not_zero = np.where(S!=0)
S_where_is_zero = np.where(B==0)
n_offdiag_nonzero = S_where_is_not_zero[0].shape[0]
n_offdiag_zero = S_where_is_zero[0].shape[0]
drop_ix = random.sample(range(n_offdiag_nonzero), n_change)
add_ix = random.sample(range(n_offdiag_zero), n_change)
w_add = []
for i in range(n_change):
r = np.random.uniform(0, 0.6)
w_add.append(r-0.4 if r < 0.3 else r-0.2)
S_add = np.zeros((p,p))
S_add[S_where_is_zero[0][add_ix], S_where_is_zero[1][add_ix]] = w_add
S_drop = np.zeros((p,p))
S_drop[S_where_is_not_zero[0][drop_ix], S_where_is_not_zero[1][drop_ix]] = S[S_where_is_not_zero[0][drop_ix], S_where_is_not_zero[1][drop_ix]]
S_new = S + S_add - S_drop
S_new0 = S_new.copy()
S_new = S_new + S_new.T
vec_div = 1.5*np.sum(np.absolute(S_new), axis = 1)[:,None]
for i in range(p):
if vec_div[i]:
# only when the absolute value of the vector is not zero do the standardization
S_new[i,:] = S_new[i,:]/vec_div[i]
# S_new = S_new/(1.5*np.sum(np.absolute(S_new), axis = 1)[:,None])
A_new = (S_new + S_new.T)/2 + np.matrix(np.eye(p))
# check PD
np.all(alg.eigvals(A_new) > 0)
return S_new0, A_new
def getCov(A):
A_inv = alg.inv(A) # inverse of A
p = A.shape[0]
# Sigma = 0.6*(np.triu(np.ones((p,p)),1) + np.tril(np.ones((p,p)),-1)) + np.matrix(np.eye(p))
Sigma = np.zeros((p,p))
for i in range(A_inv.shape[0]):
for j in range(A_inv.shape[1]):
# Sigma[i,j] = Sigma[i,j]*A_inv[i,j]/np.sqrt(A_inv[i,i]*A_inv[j,j])
Sigma[i,j] = A_inv[i,j]/np.sqrt(A_inv[i,i]*A_inv[j,j])
return Sigma
def genEmpCov(samples, useKnownMean = False, m = 0):
# input samples p by n
# size = p
# samplesperstep = n
size, samplesPerStep = samples.shape
if useKnownMean == False:
m = np.mean(samples, axis = 1)
empCov = 0
for i in range(samplesPerStep):
sample = samples[:,i]
empCov = empCov + np.outer(sample - m, sample -m)
empCov = empCov/samplesPerStep
return empCov
def getF1(S0, S1):
# S0 is the true graph and S1 is the estimated graph
S_true, S_est = S0.copy(), S1.copy()
np.fill_diagonal(S_true, 0)
np.fill_diagonal(S_est, 0)
# number of detected edges on off diagonal
D = np.where(S_est != 0)[0].shape[0]
# number of true edges on off diagonal
T = np.where(S_true != 0)[0].shape[0]
# number of true edges detected
TandD = float(np.where(np.logical_and(S_true, S_est))[0].shape[0])
# print TandD
if D:
P = TandD/D
else:
print('No edge detected on off diagonal, precision is zero')
P = 0.
R = TandD/T
if P+R:
F1 = 2*P*R/(P+R)
else:
F1 = 0.
return P, R, F1
def getAIC(S_est, S_previous, empCov, ni):
# S_diff = (S_est - S_previous)
# S_diff = S_diff - np.diag(np.diag(S_diff))
# ind = (S_diff < 1e-2) & (S_diff > - 1e-2)
# S_diff[ind] = 0
# K = np.count_nonzero(S_diff)
ind = (S_est < 1e-2) & (S_est > - 1e-2)
S_est[ind] = 0
ind = (S_previous < 1e-2) & (S_previous > - 1e-2)
S_previous[ind] = 0
K = float(np.where(np.logical_and((S_est!=0) != (S_previous!=0), S_est!=0) == True)[0].shape[0])
# K = float(np.where(np.logical_and((S_est>0) != (S_previous>0), S_est>0) == True)[0].shape[0])
#loglik = ni*(np.log(alg.det(S_est)) - np.trace(np.dot(S_est, empCov)))
loglik = np.log(alg.det(S_est)) - np.trace(np.dot(S_est, empCov))
#print(-loglik)
#print(K)
AIC = -loglik + K
return AIC
def indicesOfExtremeValue(arr, set_length, choice):
if (choice == 'max'):
index = np.argmax(arr)
elif (choice == 'min'):
index = np.argmin(arr)
else:
print 'invalid argument, choose max or min'
index_x = index/set_length
index_y = index - (index_x)*set_length
return index, index_x, index_y
def alpha_max(emp_cov):
"""Find the maximum alpha for which there are some non-zeros off-diagonal.
Parameters
----------
emp_cov : 2D array, (n_features, n_features)
The sample covariance matrix
Notes
-----
This results from the bound for the all the Lasso that are solved
in GraphLasso: each time, the row of cov corresponds to Xy. As the
bound for alpha is given by `max(abs(Xy))`, the result follows.
"""
A = np.copy(emp_cov)
A.flat[::A.shape[0] + 1] = 0
return np.max(np.abs(A))
#------------------------------------------- End defining private functions ----------------------------------------------------
#------------------------------------------- retrieve data ---------------------------------------------------------------------
f = open("mydata.pkl", "rb")
X_list = pickle.load(f)
f.close()
len_class = len(X_list)
len_t = len(X_list[0])
ni = X_list[0][0].shape[0]
#-------------------------------------------------------------------------------------------------------------------------------
f = open("Theta_glasso.pkl", 'wb')
#------------------------------------- graphical lasso ----------------------------------------------
set_length = 51
#F1_result_list = []
#for class_ix in range(len_class):
# F1_result_c = []
# for time_ix in range(len_t):
# F1_result_t = []
# cov_last = None
# alpha_max_ct = alpha_max(genEmpCov(X_list[class_ix][time_ix].T))
# alpha_set = np.logspace(np.log10(alpha_max_ct*5e-2), np.log10(alpha_max_ct), set_length)
# for alpha in alpha_set:
# emp_cov = genEmpCov(X_list[class_ix][time_ix].T)
# ml_glasso = cov.graph_lasso(emp_cov, alpha, cov_init=cov_last, max_iter = 500)
# cov_last = ml_glasso[0]
# A_est = ml_glasso[1]
# F1_result_t.append(getF1(A_list[class_ix][time_ix], A_est))
# F1_result_c.append(F1_result_t)
# F1_result_list.append(F1_result_c)
print 'starting glasso'
Theta_glasso_list = [] # 1 dim: class_ix; 2 dim: time_ix; 3 dim: alpha
for class_ix in range(len_class):
print class_ix
Theta_glasso_c = []
for time_ix in range(len_t):
print time_ix
Theta_glasso_t = []
cov_last = None
alpha_max_ct = alpha_max(genEmpCov(X_list[class_ix][time_ix].T))
alpha_set = np.logspace(np.log10(alpha_max_ct*5e-2), np.log10(alpha_max_ct), set_length)
for alpha in alpha_set:
emp_cov = genEmpCov(X_list[class_ix][time_ix].T)
ml_glasso = cov.graph_lasso(emp_cov, alpha, cov_init=cov_last, max_iter = 500)
cov_last = ml_glasso[0]
Theta_glasso_t.append(ml_glasso[1])
Theta_glasso_c.append(Theta_glasso_t)
Theta_glasso_list.append(Theta_glasso_c)
#-----------------------------------------------------------------------------------------------------
pickle.dump(Theta_glasso_list, f)
f.close()