/
survival_problem_prox.py
110 lines (94 loc) · 5.28 KB
/
survival_problem_prox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import time
import numpy as np
import scipy as sp
import logging as log
from profile_support import profile
from common import *
from survival_problem_grad_descent import SurvivalProblemCustom
class SurvivalProblemProximal(SurvivalProblemCustom):
min_diff_thres = 1e-10
def solve_prox(self, theta, step_size):
"""
Do proximal gradient step
"""
raise NotImplementedError()
def solve(self, init_theta, max_iters=1000, init_step_size=1, step_size_shrink=0.5, backtrack_alpha = 0.01, diff_thres=1e-6, min_iters=20, verbose=False):
"""
Runs proximal gradient descent to minimize the negative penalized log likelihood
@param init_theta: where to initialize the gradient descent
@param max_iters: maximum number of iterations of gradient descent
@param init_step_size: how big to initialize the step size factor
@param step_size_shrink: how much to shrink the step size during backtracking line descent
@param backtrack_alpha: the alpha in backtracking line descent (p464 in Boyd)
@param diff_thres: if the difference is less than diff_thres, then stop gradient descent
@param verbose: whether to print out the status at each iteration
@return final fitted value of theta and penalized log likelihood
"""
theta, current_value, upper_bound = self._solve(init_theta, max_iters, init_step_size, step_size_shrink, backtrack_alpha, diff_thres, min_iters, verbose)
return theta, -current_value, -upper_bound
def _get_value_parallel(self, theta):
"""
Calculate the penalized loss value in parallel
@return tuple: negative penalized log likelihood and the vector of log likelihoods
"""
raise NotImplementedError()
def _solve(self, init_theta, max_iters=100, init_step_size=1, step_size_shrink=0.5, backtrack_alpha = 0.01, diff_thres=1e-6, min_iters=10, verbose=False):
"""
Runs proximal gradient descent to minimize the negative penalized log likelihood
@return final fitted value of theta and penalized negative log likelihood and step size
"""
st = time.time()
theta = init_theta
step_size = init_step_size
diff = 0
upper_bound = 0
ase = None
ess = None
# Calculate loglikelihood of current theta
init_value, log_lik_vec_init = self._get_value_parallel(theta)
current_value = init_value
for i in range(max_iters):
if i % self.print_iter == 0:
log.info("PROX iter %d, val %f, time %f" % (i, current_value, time.time() - st))
if ase is not None and ess is not None:
log.info(" ase %f, ess %f" % (ase, ess))
# Calculate gradient of the smooth part
grad = self._get_gradient_log_lik(theta)
potential_theta = theta - step_size * grad
# Do proximal gradient step
potential_theta = self.solve_prox(potential_theta, step_size)
potential_value, potential_log_lik_vec = self._get_value_parallel(potential_theta)
# Do backtracking line search
expected_decrease = backtrack_alpha * np.power(np.linalg.norm(grad), 2)
while potential_value >= current_value - step_size * expected_decrease:
if step_size * expected_decrease < diff_thres and i > 0:
# Stop if difference in objective function is too small
break
elif step_size * expected_decrease < self.min_diff_thres:
break
step_size *= step_size_shrink
log.info("PROX step size shrink %f" % step_size)
potential_theta = theta - step_size * grad
# Do proximal gradient step
potential_theta = self.solve_prox(potential_theta, step_size)
potential_value, potential_log_lik_vec = self._get_value_parallel(potential_theta)
if potential_value > current_value:
# Stop if value is increasing
break
else:
# Calculate lower bound to determine if we need to rerun
# Get the confidence interval around the penalized log likelihood (not the log likelihood itself!)
log_lik_ratio_vec = potential_log_lik_vec - log_lik_vec_init
ll_ratio_vec_grouped = self._group_log_lik_ratio_vec(log_lik_ratio_vec)
# Upper bound becomes the lower bound when we consider the negative of this!
ase, _, upper_bound, ess = get_standard_error_ci_corrected(ll_ratio_vec_grouped, ZSCORE, potential_value - init_value)
# Calculate difference in objective function
theta = potential_theta
diff = current_value - potential_value
current_value = potential_value
if (upper_bound < 0 and i > min_iters) or diff < diff_thres:
# Stop if negative penalized log likelihood has significantly decreased and the minimum number of iters
# has been run or difference in objective function is small
break
log.info("final PROX iter %d, val %f, time %d" % (i, current_value, time.time() - st))
return theta, current_value, upper_bound