-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
315 lines (302 loc) · 12.6 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import numpy as np
import math
from time import time
import cvxpy as cp
import scipy.linalg as spl
import scipy.sparse.linalg as spsl
from scipy.stats import kendalltau
from scipy.sparse import csr_matrix, lil_matrix, csc_matrix
# Global variables
epsilon = np.finfo(np.float).eps ## machine precision
rtol = 1e-4 ## convergence tolerance
n_iter = 500
# Initialization, start from a feasible point for all parameters
def init_params(X, rankings, mat_Pij, method_beta_b_init='QP'):
'''
n: number of items
p: number of features
:param rankings: (c_l, A_l): 1...M
:param X: n*p, feature matrix
:param mat_Pij = est_Pij(n, rankings)
:param Q = sum for all pairs (i, j): [(P_ij x_j - P_ji x_i); (P_ij - P_ji)][(P_ij x_j - P_ji x_i); (P_ij - P_ji)]^T
'''
n = X.shape[0]
if method_beta_b_init == 'LS':
beta_init, b_init, time_beta_b_init = init_beta_b_ls(X=X, rankings=rankings)
else:
beta_init, b_init, time_beta_b_init = init_beta_b_convex_QP(X=X, rankings=rankings, mat_Pij=mat_Pij)
## weights are initialized uniformly as in all other methods
start_pi = time()
pi_init = 1.0 * np.ones(n, dtype=float) / n
time_pi_init = time() - start_pi
## u is initialized
start_u = time()
u_init = np.zeros(n, dtype=float)
time_u_init = time() - start_u
## theta newton is initialized
start_theta = time()
theta_init = np.zeros(n, dtype=float)
time_theta_init = time() - start_theta
## beta newton exp beta is initialized
exp_beta_init, time_exp_beta_init = init_exp_beta(X, rankings, mat_Pij)
return (beta_init, b_init, time_beta_b_init), (pi_init, time_pi_init), (u_init, time_u_init), \
(theta_init, time_theta_init), (exp_beta_init, time_exp_beta_init)
def init_beta_b_convex_QP(X, rankings, mat_Pij):
'''
n: number of items
p: number of features
:param rankings: (c_l, A_l): 1...M
:param X: n*p, feature matrix
:param mat_Pij = est_Pij(n, rankings)
min._{beta,b} {beta,b}^T Q {beta,b}, s.t. Xbeta + b >=0 and sum(Xbeta + b)=1
:return: beta, b, time
'''
p = X.shape[1]
# Define variables
beta = cp.Variable(p)
b = cp.Variable(1)
params = cp.vstack([beta, b])
# Define objective
Q = est_sum_dij_dijT(X, rankings, mat_Pij)
objective = cp.quad_form(params, Q)
# Define constraints
constraints = [X * beta + b >= rtol,
cp.sum_entries(X * beta + b) == 1]
start_beta_b = time()
# Optimize
prob = cp.Problem(cp.Minimize(objective), constraints=constraints)
prob.solve(solver='SCS')
time_beta_b_init = time() - start_beta_b
if beta.value is None:
constraints = [X * beta + b >= rtol]
# If cannot be solved, reduce the accuracy requirement
start_beta_b = time()
# Optimize
prob = cp.Problem(cp.Minimize(objective), constraints=constraints)
prob.solve(solver='SCS', eps=1e-2)
time_beta_b_init = time() - start_beta_b
return np.squeeze(np.array(beta.value)), b.value, time_beta_b_init
def est_Pij(n, rankings):
'''
n: number of items
p: number of features
:param rankings: (c_l, A_l): 1...M
:param X: n*p, feature matrix
:return: for each pair (i, j), empirical estimate of the probability of i beating j
sparse version for write once read multiple times:
'''
Pij = lil_matrix((n, n), dtype=float)
for ranking in rankings:
for i, winner in enumerate(ranking):
for loser in ranking[i + 1:]:
Pij[winner, loser] += 1
copy_Pij = lil_matrix.copy(Pij)
for i in range(n):
for j in range(i, n):
summation = copy_Pij[i, j] + copy_Pij[j, i]
if summation:
Pij[i, j] /= summation
Pij[j, i] /= summation
return csc_matrix(Pij)
def est_sum_dij_dijT(X, rankings, mat_Pij):
'''
n: number of items
p: number of features
:param rankings: (c_l, A_l): 1...M
:param X: n*p, feature matrix
:param mat_Pij = est_Pij(n, rankings)
:return: sum for all pairs (i, j): [(P_ij x_j - P_ji x_i); (P_ij - P_ji)][(P_ij x_j - P_ji x_i); (P_ij - P_ji)]^T
'''
n = X.shape[0]
p = X.shape[1]
if mat_Pij is None:
mat_Pij = est_Pij(n, rankings)
sum_dij_dijT = np.zeros((p+1, p+1), dtype=float)
items = np.unique(rankings)
for i in items:
for j in items:
if i != j:
d_ij = np.concatenate(((mat_Pij[i, j] * X[j, :] - mat_Pij[j, i] * X[i, :]),
[mat_Pij[i, j] - mat_Pij[j, i]]))
sum_dij_dijT += np.outer(d_ij, d_ij)
return sum_dij_dijT
def init_beta_b_ls(X, rankings):
'''
least squares initialization for parameter vector beta and bias b
n: number of items
p: number of features
:param rankings: (c_l, A_l): 1...M
:param X: n*p, feature matrix
:return: beta, b, time
'''
## beta is initialized to minimize least squares on the labels, not the scores: (y_ij-(x_i-x_j)^T beta)^2
sum_cross_corr = 0
sum_auto_corr = 0
for ranking in rankings:
for i, winner in enumerate(ranking):
for loser in ranking[i + 1:]:
X_ij = X[winner, :] - X[loser, :]
sum_cross_corr += X_ij
sum_auto_corr += np.outer(X_ij, X_ij)
start_beta_b = time()
beta_init = np.linalg.solve(sum_auto_corr, sum_cross_corr)
## b is initialized so that X*beta + b is positive in the beginning
x_beta_init = np.dot(X, beta_init)
b_init = np.max(-1.0 * x_beta_init) + rtol
## Sum up to 1
#sum_pi = np.sum(x_beta_init + b_init)
#beta_init = beta_init / sum_pi
#b_init = b_init / sum_pi
time_beta_b_init = time() - start_beta_b
return beta_init, b_init, time_beta_b_init
def init_exp_beta(X, rankings, mat_Pij):
'''
least squares initialization for beta in exponential parametrization
(alternative: exp_beta_init = np.ones((p,), dtype=float) * epsilon)
n: number of items
p: number of features
:param rankings: (c_l, A_l): 1...M
:param X: n*p, feature matrix
:param mat_Pij = est_Pij(n, rankings)
:return: exp_beta, time
'''
n = X.shape[0]
if mat_Pij is None:
mat_Pij = est_Pij(n, rankings)
sum_auto_corr = 0
sum_cross_corr = 0
items = np.unique(rankings)
for i in items:
for j in items:
if i != j:
X_ij = X[i, :] - X[j, :]
sum_auto_corr += np.outer(X_ij, X_ij)
if mat_Pij[i, j] > 0 and mat_Pij[j, i] > 0:
s_ij = mat_Pij[i, j] / mat_Pij[j, i]
elif mat_Pij[i, j] == 0 and mat_Pij[j, i] == 0:
s_ij = 1
elif mat_Pij[i, j] == 0 and mat_Pij[j, i] > 0:
s_ij = 1/len(rankings)
elif mat_Pij[i, j] > 0 and mat_Pij[j, i] == 0:
s_ij = len(rankings)
sum_cross_corr += np.log(s_ij) * X_ij
start_exp_beta = time()
exp_beta_init = spl.lstsq(sum_auto_corr, sum_cross_corr)[0] # uses svd
time_exp_beta_init = time() - start_exp_beta
return exp_beta_init, time_exp_beta_init
def statdist(generator, method="power", v_init=None):
"""Compute the stationary distribution of a Markov chain, described by its infinitesimal generator matrix.
Computing the stationary distribution can be done with one of the following methods:
- `kernel`: directly computes the left null space (co-kernel) the generator
matrix using its LU-decomposition. Alternatively: ns = spl.null_space(generator.T)
- `eigenval`: finds the leading left eigenvector of an equivalent
discrete-time MC using `scipy.sparse.linalg.eigs`.
- `power`: finds the leading left eigenvector of an equivalent
discrete-time MC using power iterations. v_init is the initial eigenvector.
"""
n = generator.shape[0]
if method == "kernel":
# `lu` contains U on the upper triangle, including the diagonal.
lu, piv = spl.lu_factor(generator.T, check_finite=False)
# The last row contains 0's only.
left = lu[:-1,:-1]
right = -lu[:-1,-1]
# Solves system `left * x = right`. Assumes that `left` is
# upper-triangular (ignores lower triangle.)
res = spl.solve_triangular(left, right, check_finite=False)
res = np.append(res, 1.0)
return (1.0 / res.sum()) * res
if method == "eigenval":
'''
Arnoldi iteration has cubic convergence rate, but does not guarantee positive eigenvector
'''
if v_init is None:
v_init = np.random.rand(n,)
# mat = generator+eye is row stochastic, i.e. rows add up to 1.
# Multiply by eps to make sure each entry is at least -1. Sum by 1 to make sure that each row sum is exactly 1.
eps = 1.0 / np.max(np.abs(generator))
mat = np.eye(n) + eps*generator
A = mat.T
# Find the leading left eigenvector, corresponding to eigenvalue 1
_, vecs = spsl.eigs(A, k=1, v0=v_init)
res = np.real(vecs[:,0])
return (1.0 / res.sum()) * res
if method == "power":
'''
Power iteration has linear convergence rate and slow for lambda2~lambda1.
But guarantees positive eigenvector, if started accordingly.
'''
if v_init is None:
v = np.random.rand(n,)
else:
v = v_init
# mat = generator+eye is row stochastic, i.e. rows add up to 1.
# Multiply by eps to make sure each entry is at least -1. Sum by 1 to make sure that each row sum is exactly 1.
eps = 1.0 / np.max(np.abs(generator))
mat = np.eye(n) + eps * generator
A = mat.T
# Find the leading left eigenvector, corresponding to eigenvalue 1
normAest = np.sqrt(np.linalg.norm(A, ord=1) * np.linalg.norm(A, ord=np.inf))
v = v/np.linalg.norm(v)
Av = np.dot(A,v)
for ind_iter in range(n_iter):
v = Av/np.linalg.norm(Av)
Av = np.dot(A,v)
lamda = np.dot(v.T, Av)
r = Av-v*lamda
normr = np.linalg.norm(r)
if normr < rtol*normAest:
#print('Power iteration converged in ' + str(ind_iter) + ' iterations.')
break
res = np.real(v)
return (1.0 / res.sum()) * res
else:
raise RuntimeError("not (yet?) implemented")
def objective(weights, rankings):
ll = 0
# against log(0), add epsilon
for ranking in rankings:
sum_weights = sum(weights[x] for x in ranking)
winner = ranking[0]
ll += np.log(weights[winner] + epsilon) - np.log(sum_weights + epsilon)
return ll
def check_global_balance_eqn(generator, state_vect):
'''
:param generator: Matrix containing Markov chain transitions
We need to check the balance eqn. for each column, i.e. for all i for transitions lambda_ji
:return: True if global balance equation is satisfied
'''
# Multiply by eps to make sure each entry is at least -1. Sum by 1 to make sure that each row sum is exactly 1.
generator = 1.0 / np.max(np.abs(generator)) * generator
for i, state_i in enumerate(state_vect):
col_i = generator[:, i]
row_i = generator[i, :]
if np.abs((np.sum(row_i*state_i) - row_i[i]*state_i) - (np.dot(state_vect, col_i) - col_i[i]*state_i)) > epsilon:
return False
return True
def softmax(a):
tmp = np.exp(a - np.max(a))
return tmp / np.sum(tmp)
def top1_test_accuracy(est_scores, rankings_test):
correct = 0
for sample in rankings_test:
k = len(sample)
true_winner = sample[0]
pl_probabilities = est_scores[sample]
if not np.any(np.isnan(pl_probabilities)):
# against equal weights
if np.unique(pl_probabilities).shape[0] == 1:
correct += 1 / k
else:
pred_winner = sample[np.argmax(pl_probabilities)]
if true_winner == pred_winner:
correct += 1
return 1.0 * correct / len(rankings_test)
def kendall_tau_test(est_scores, rankings_test):
corr = 0
for sample in rankings_test:
pl_probabilities = est_scores[sample] / sum(est_scores[sample])
if not np.any(np.isnan(pl_probabilities)):
pred_ranking = sample[np.flip(np.argsort(pl_probabilities), axis=0)]
corr += kendalltau(sample, pred_ranking)[0] #between -1 and 1
return 1.0 * corr / len(rankings_test)