-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpriority_lp_solver.py
274 lines (239 loc) · 12.9 KB
/
priority_lp_solver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
'''
Based on codes by Suman K. Bera in https://github.com/nicolasjulioflores/fair_algorithms_for_clustering
'''
from cplex import Cplex
import numpy as np
import time
# Function to add constraints to the priority (p,k)-clustering LP
# Input:
# radii: vector of size nr_points, ith element is point i's distance it's lth nearest neighbor
# neighborhood: a list of lists. for point i, graph[i] is the closest l points to i (including itself)
# n_clusters: the number of clusters
# Output:
# rows: coefficients in each row of the coefficients matrix
# senses: the <= or = or >= of the constraints
# rhs: the RHS of the constraints
# names: the constraint names
def prepare_to_add_constraints(radii, neighborhood, nr_clusters):
nr_points = len(radii)
rows = []
rhs = []
senses = []
names = []
def add_sums_to_one_constraints():
nonlocal rows, rhs, senses, names
rows.extend(
[[["x_{}_{}".format(j, i) for i in neighborhood[j]], [1] * len(neighborhood[j])] for j in range(nr_points)])
rhs.extend([1] * nr_points)
senses.extend(["G"] * nr_points)
names.extend(["cover_{}".format(j) for j in range(nr_points)])
def add_k_fac_constraint():
nonlocal rows, rhs, senses, names
rows.append([["y_{}".format(i) for i in range(nr_points)], [1] * nr_points])
rhs.append(nr_clusters)
senses.append("L")
names.append("At_most_k")
def add_connection_constraints():
nonlocal rows, rhs, senses, names
new_constrains = [[["x_{}_{}".format(j, i), "y_{}".format(i)], [1, -1]] for j in range(nr_points) for i in
neighborhood[j]]
nr_new_constraints = len(new_constrains)
rows.extend(new_constrains)
rhs.extend([0] * nr_new_constraints)
senses.extend(["L"] * nr_new_constraints)
names.extend(["cover_{}_{}".format(j, i) for j in range(nr_points) for i in neighborhood[j]])
add_sums_to_one_constraints()
add_k_fac_constraint()
add_connection_constraints()
return rows, senses, rhs, names
# Function to add variables to the priority (p,k)-clustering LP
# Input:
# point_weights: vector of size <nr_points> elements are point weights in the objective
# radii: vector of size nr_points, ith element is point i's distance it's lth nearest neighbor
# neighborhood: a list of lists. for point i, graph[i] is the closest l points to i (including itself)
# all_pair_costs: the metric. At index [i][j] is the cost of connecting points i and j
# Output:
# objective: coefficients in obj fun
# lower_bounds: variable lower_bounds
# upper_bounds: variable upper_bounds
# variable_names: variable names
def prepare_to_add_variables(point_weights, radii, neighborhood, all_pair_costs):
nr_points = len(radii)
# Preparing the y variables
variable_names = ["y_{}".format(i) for i in range(nr_points)] + ["x_{}_{}".format(j, i) for j in range(nr_points)
for i in neighborhood[j]]
objective = [0 for i in range(nr_points)] + [all_pair_costs[j][i] * point_weights[j] for j in range(nr_points)
for i in
neighborhood[j]]
total_variables = len(objective)
lower_bounds = [0] * total_variables
upper_bounds = [1] * total_variables
return objective, lower_bounds, upper_bounds, variable_names
# LP solver for priority (p,k)-clustering LP
# Input:
# point_weights: vector of size <nr_points> elements are point weights in the objective
# radii: vector of size nr_points, ith element is point i's distance it's lth nearest neighbor
# neighborhood: a list of lists. for point i, graph[i] is the closest l points to i (including itself)
# all_pair_costs: the metric. At index [i][j] is the cost of connecting points i and j
# n_clusters: the number of clusters
# Output:
# problem: the cplex LP problem
# variable_names: names we defined to address the variables in the LP
def get_priority_p_k_clustering_lp(point_weights, radii, neighborhood, all_pair_costs, nr_clusters):
# There are primarily five steps:
# 1. Initiate a model for cplex
# 2. Declare if it is minimization or maximization problem
# 3. Add variables to the model. The variables are generally named.
# The upper bounds and lower bounds on the range for the variables
# are also mentioned at this stage. The coefficient of the objective
# functions are also entered at this step
# 4. Add the constraints to the model. The constraint matrix, denoted by A,
# can be added in three ways - row wise, column wise or non-zero entry wise.
# 5. Finally, call the solver.
# Step 1. Initiate a model for cplex.
print("Initializing Cplex model")
problem = Cplex()
# Step 2. Declare that this is a minimization problem
problem.objective.set_sense(problem.objective.sense.minimize)
# Step 3. Declare and add variables to the model. The function
# prepare_to_add_variables (points, center) prepares all the
# required information for this stage.
#
# objective: a list of coefficients (float) in the linear objective function
# lower bounds: a list of floats containing the lower bounds for each variable
# upper bounds: a list of floats containing the upper bounds for each variable
# variable_name: a list of strings that contains the name of the variables
print("Starting to add variables...")
t1 = time.monotonic()
objective, lower_bounds, upper_bounds, variable_names = prepare_to_add_variables(point_weights, radii, neighborhood,
all_pair_costs)
problem.variables.add(obj=objective,
lb=lower_bounds,
ub=upper_bounds,
names=variable_names)
t2 = time.monotonic()
print("Completed. Time for creating and adding variable = {}".format(t2 - t1))
# Step 4. Declare and add constraints to the model.
# There are few ways of adding constraints: rwo wise, col wise and non-zero entry wise.
# Assume the constraint matrix is A. We add the constraints row wise.
# The function prepare_to_add_constraints_by_entry(points,center,colors,alpha,beta)
# prepares the required data for this step.
#
# constraints_row: Encoding of each row of the constraint matrix
# senses: a list of strings that identifies whether the corresponding constraint is
# an equality or inequality. "E" : equals to (=), "L" : less than (<=), "G" : greater than equals (>=)
# rhs: a list of floats corresponding to the rhs of the constraints.
# constraint_names: a list of string corresponding to the name of the constraint
print("Starting to add constraints...")
t1 = time.monotonic()
objects_returned = prepare_to_add_constraints(radii, neighborhood, nr_clusters)
constraints_row, senses, rhs, constraint_names = objects_returned
problem.linear_constraints.add(lin_expr=constraints_row,
senses=senses,
rhs=rhs,
names=constraint_names)
t2 = time.monotonic()
print("Completed. Time for creating and adding constraints = {}".format(t2 - t1))
# Optional: We can set various parameters to optimize the performance of the lp solver
# As an example, the following sets barrier method as the lp solving method
# Available methods are: auto, primal, dual, sifting, concurrent, and barrier
return problem, variable_names
# LP solver for priority (p,k)-Clustering LP
# Input:
# point_weights: vector of size <nr_points> elements are point weights in the objective
# radii: vector of size <nr_points>, ith element is point i's distance it's lth nearest neighbor
# neighborhood: a list of lists. for point i, graph[i] is the closest l points to i (including itself)
# all_pair_distances: the metric. At index [i][j] is the distance between point i and j
# n_clusters: the number of clusters
# power: A number >= 1 the moment of the distances in the objective function. e.g. p=1 for k-med
# lp_method: integer between 0 to 6 for LP solving method. See Cplex documentation
# Output:
# res: dictionary of LP solving results. Contains fractional y values and cost shares for points
def solve_priority_p_k_clustering_lp(point_weights, radii, neighborhood, all_pair_distances, nr_clusters, power,
lp_method=0):
t1 = time.monotonic()
problem, variable_names = get_priority_p_k_clustering_lp(point_weights, radii, neighborhood,
np.power(all_pair_distances, power), nr_clusters)
t2 = time.monotonic()
lp_defining_time = t2 - t1
t1 = time.monotonic()
problem.parameters.lpmethod.set(lp_method)
problem.solve()
t2 = time.monotonic()
lp_solving_time = t2 - t1
# problem.solution is a weakly referenced object, so we must save its data
# in a dictionary so we can write it to a file later.
values = problem.solution.get_values()
nr_points = len(radii)
y_values = values[0:nr_points]
x_values = values[nr_points:]
# --------- STUFF FOR LOGGING PURPOSE ---------
# threshold = 0.01
# y_names = variable_names[0:nr_points]
x_names = variable_names[nr_points:]
# fractional_openings = dict(zip(y_names, y_values))
# # print("-----------openings-------------")
# # print(fractional_openings)
# fractional_openings = {key: val for key, val in fractional_openings.items() if val > 1e-5}
# # print("-----------non-zero openings-------------")
# # print(fractional_openings)
# open_facilities = [int(key[2:]) for key, val in fractional_openings.items() if val > (1-threshold)]
# print("-----------open facilities-------------")
# print(open_facilities)
#
fractional_assignments = dict(zip(x_names, x_values))
# # print("-----------assignments-------------")
# # print(fractional_assignments)
# fractional_assignments = {key:val for key, val in fractional_assignments.items() if val > threshold}
# # print("-----------non-zero assignments-------------")
# # print(fractional_assignments)
# integral_assignments = []
# non_zero_x_values = []
cost_shares = [0] * nr_points
for (name, val) in fractional_assignments.items():
name_split = name.split('_')
u = int(name_split[1])
v = int(name_split[2])
# non_zero_x_values.append(((u,v), val))
cost_shares[u] = cost_shares[u] + (all_pair_distances[u][v] ** power) * val
# if abs(np.sum(cost_shares) - problem.solution.get_objective_value()) > 0.01:
# raise Exception("Mismatch between sum of cost_shares {} and LP objective {}".format(np.sum(cost_shares),
# problem.solution.get_objective_value()))
# new_cost_shares = [0] * nr_points
# for p in range(nr_points):
# connection_costs = np.array([all_pair_costs[p][c] for c in range(nr_points)])
# sorted_inds = np.argsort(connection_costs)
#
# coverage = 0
# for i in range(nr_points):
# q = int(sorted_inds[i])
# gives_y = min(y_values[q], 1-coverage)
# new_cost_shares[p] = new_cost_shares[p] + gives_y * all_pair_costs[p][q]
# if coverage >= 1: break
#
# if abs(new_cost_shares[p] - cost_shares[p]) > 1e-4:
# Warning("For points {}, new cost share is {} while LP cost share is {}".format(p, new_cost_shares[p], cost_shares[p]))
# if fractional_assignments[name] > (1-threshold):
# integral_assignments.append((u,v))
# print("Sum of cost shares is {} and the total objective reported is {}".format(np.sum(cost_shares), problem.solution.get_objective_value()))
#
# # print("-----------integral assignments-------------")
# # print(integral_assignments)
# # print("Number of fractional assignments: {} - {} = {}".format(nr_points, len(integral_assignments), nr_points-len(integral_assignments)))
# print("Number of integrally open facilitie = {}".format(len(open_facilities)))
res = {
"lp_defining_time": lp_defining_time,
"lp_solving_time": lp_solving_time,
"time": lp_solving_time + lp_defining_time,
"status": problem.solution.get_status(),
"success": problem.solution.get_status_string(),
"cost": np.power(problem.solution.get_objective_value(), 1 / power),
# "fractional_openings":fractional_openings,
# "fractional_assignments":fractional_assignments,
# "open_facilities":open_facilities,
# "integral_assignments":integral_assignments,
"nr_x_ij_variables": len(x_names),
"cost_shares": cost_shares,
"y": y_values
}
return res