From 8e536e1b5faebf067a071aff8cb8270b21b1d7ac Mon Sep 17 00:00:00 2001 From: Nickolai Belakovski Date: Thu, 20 Jul 2023 11:19:59 +0900 Subject: [PATCH 1/6] Initial commit of Python translation calcfc_chebyqud and calcfc_hexagon in Python return values close to what is returned by Fortran, but not exactly the same and the difference is outside of machine precision. For chebyquad the fortran version iterates 387 times within cobylb, whereas the Python version iterates 564 times. The results are as follows Fortran: x(1) = 0.06687201625695266 x(2) = 0.28872054433564054 x(3) = 0.36668681233017669 x(4) = 0.63330363432938008 x(5) = 0.71126615229302259 x(6) = 0.93312277720964698 f = 3.9135366518694255e-10 Python: x[0] = 0.06688196439284098 x[1] = 0.2887610493288529 x[2] = 0.36668226015206873 x[3] = 0.6333320849097046 x[4] = 0.7112583722245663 x[5] = 0.9331254078707577 f = 3.8259601181730434e-10 The first position in x differs by 9.948135888e-6, so it's further away than machine precision, but it's possible that the difference is the result of accumulation of machine precision errors. --- python/README.md | 5 + python/pyproject.toml | 10 + python/src/prima/__init__.py | 0 python/src/prima/cobyla/__init__.py | 0 python/src/prima/cobyla/cobylb.py | 687 ++++++++++++++++++ python/src/prima/cobyla/geometry.py | 192 +++++ python/src/prima/cobyla/trustregion.py | 491 +++++++++++++ python/src/prima/cobyla/update.py | 157 ++++ python/src/prima/common/__init__.py | 0 python/src/prima/common/consts.py | 22 + python/src/prima/common/evaluate.py | 36 + python/src/prima/common/linalg.py | 112 +++ python/src/prima/common/output.py | 20 + python/src/prima/common/powalg.py | 118 +++ python/src/prima/common/selectx.py | 189 +++++ .../prima/examples/cobyla/cobyla_example.py | 325 +++++++++ 16 files changed, 2364 insertions(+) create mode 100644 python/README.md create mode 100644 python/pyproject.toml create mode 100644 python/src/prima/__init__.py create mode 100644 python/src/prima/cobyla/__init__.py create mode 100644 python/src/prima/cobyla/cobylb.py create mode 100644 python/src/prima/cobyla/geometry.py create mode 100644 python/src/prima/cobyla/trustregion.py create mode 100644 python/src/prima/cobyla/update.py create mode 100644 python/src/prima/common/__init__.py create mode 100644 python/src/prima/common/consts.py create mode 100644 python/src/prima/common/evaluate.py create mode 100644 python/src/prima/common/linalg.py create mode 100644 python/src/prima/common/output.py create mode 100644 python/src/prima/common/powalg.py create mode 100644 python/src/prima/common/selectx.py create mode 100644 python/src/prima/examples/cobyla/cobyla_example.py diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000000..d325b1cc99 --- /dev/null +++ b/python/README.md @@ -0,0 +1,5 @@ +To develop, `cd` into the `src` directory and run + +```pip install --editable .``` + +This will install prima locally in an editable fashion. From there you can run the examples/cobyla/cobyla_example.py (from any directory) and go from there. diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000000..d95d6147de --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "prima" +version = "0.0.1" +dependencies = [ + "numpy" +] + +[tool.setuptools.packages.find] +where = ["src"] # ["."] by default +exclude = ["prima.examples*"] # empty by default diff --git a/python/src/prima/__init__.py b/python/src/prima/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/cobyla/__init__.py b/python/src/prima/cobyla/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py new file mode 100644 index 0000000000..dee51f6c80 --- /dev/null +++ b/python/src/prima/cobyla/cobylb.py @@ -0,0 +1,687 @@ +import numpy as np +from prima.common.consts import INFO_DEFAULT, REALMAX, EPS, CONSTRMAX, \ + MAXTR_REACHED, DAMAGING_ROUNDING, SMALL_TR_RADIUS, \ + NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED +from prima.common.evaluate import evaluate, moderatec, moderatef +from prima.cobyla.update import updatepole, findpole, updatexfc +from prima.cobyla.geometry import assess_geo, setdrop_tr, geostep, setdrop_geo +from prima.common.selectx import savefilt , selectx +from prima.cobyla.trustregion import trstlp, redrat, trrad + +def fcratio(conmat, fval): + ''' + This function calculates the ratio between the "typical changre" of F and that of CONSTR. + See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio. + ''' + cmin = np.min(conmat, axis=1) + cmax = np.max(conmat, axis=1) + fmin = min(fval) + fmax = max(fval) + if any(cmin < 0.5 * cmax) and fmin < fmax: + denom = np.min(np.maximum(cmax, 0) - cmin, where=cmin < 0.5 * cmax, initial=np.inf) + # Powell mentioned the following alternative in section 4 of his COBYLA paper. According to a test + # on 20230610, it does not make much difference to the performance. + # denom = np.max(max(*cmax, 0) - cmin, mask=(cmin < 0.5 * cmax)) + r = (fmax - fmin) / denom + else: + r = 0 + + return r + +def redrho(rho, rhoend): + ''' + This function calculates RHO when it needs to be reduced. + The sceme is shared by UOBYQA, NEWUOA, BOBYQA, LINCOA. For COBYLA, Powell's code reduces RHO by + 'RHO *= 0.5; if RHO <= 1.5 * RHOEND: RHO = RHOEND' as specified in (11) of the COBYLA + paper. However, this scheme seems to work better, especially after we introduce DELTA. + ''' + + rho_ratio = rho / rhoend + + if rho_ratio > 250: + rho *= 0.1 + elif rho_ratio <= 16: + rho = rhoend + else: + rho = np.sqrt(rho_ratio) * rhoend # rho = np.sqrt(rho * rhoend) + + return rho + +def checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x): + ''' + This function checks whether to break the solver in constrained cases. + ''' + info = INFO_DEFAULT + + # Although X should not contain NaN unless there is a bug, we include the following for security. + # X can be inf, as finite + finite can be inf numerically. + if any(np.isnan(x)) or any(np.isinf(x)): + info = NAN_INF_X + + # Although NAN_INF_F should not happen unless there is a bug, we include the following for security. + if np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv): + info = NAN_INF_F + + if cstrv <= ctol and f <= ftarget: + info = FTARGET_ACHIEVED + + if nf >= maxfun: + info = MAXFUN_REACHED + return info + +def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, + conmat, cval, fval): + ''' + This subroutine does the initialization concerning X, function values, and constraints. + ''' + + num_constraints = conmat.shape[0] + num_vars = x0.size + + # Initialize info to the default value. At return, a value different from this value will + # indicate an abnormal return + info = INFO_DEFAULT + + # Initialize the simplex. It will be revised during the initialization. + sim = np.eye(num_vars, num_vars+1) * rhobeg + sim[:, -1] = x0 + + # evaluated[j] = True iff the function/constraint of SIM[:, j] has been evaluated. + evaluated = np.zeros(num_vars+1, dtype=bool) + + # Here the FORTRAN code initializes some history values, but apparently only for the benefit of compilers, + # so we skip it. + + for k in range(num_vars + 1): + x = sim[:, num_vars].copy() + # We will evaluate F corresponding to SIM(:, J). + if k == 0: + j = num_vars + f = moderatef(f0) + constr = moderatec(constr0) + cstrv = max(max(-constr), 0) + else: + j = k - 1 + x[j] += rhobeg + f, constr, cstrv = evaluate(calcfc, x) + + # Print a message about the function/constraint evaluation according to IPRINT. + # TODO: Finish implementing fmsg, if decided that its worth it + # fmsg(solver, iprint, k, f, x, cstrv, constr) + + # Save X, F, CONSTR, CSTRV into the history. + # TODO: Implement history (maybe we need it for the iterations?) + # savehist(k, x, xhist, f, fhist, cstrv, chist, constr, conhist) + + # Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively. + evaluated[j] = True + fval[j] = f + conmat[:, j] = constr + cval[j] = cstrv + + # Check whether to exit. + # subinfo = checkexit(maxfun, k, cstrv, ctol, f, ftarget, x) + # if subinfo != INFO_DEFAULT: + # info = subinfo + # break + + # Exchange the new vertex of the initial simplex with the optimal vertex if necessary. + # This is the ONLY part that is essentially non-parallel. + if j < num_vars and fval[j] < fval[num_vars]: + fval[j], fval[num_vars] = fval[num_vars], fval[j] + cval[j], cval[num_vars] = cval[num_vars], cval[j] + conmat[:, [j, num_vars]] = conmat[:, [num_vars, j]] + sim[:, num_vars] = x + sim[j, :j+1] = -rhobeg + + nf = np.count_nonzero(evaluated) + + if evaluated.all(): + # Initialize SIMI to the inverse of SIM[:, :num_vars] + simi = np.linalg.inv(sim[:, :num_vars]) + + return evaluated, sim, simi, nf, info + + +def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt): + ''' + This function initializes the filter (XFILT, etc) that will be used when selecting + x at the end of the solver. + N.B.: + 1. Why not initialize the filters using XHIST, etc? Because the history is empty if + the user chooses not to output it. + 2. We decouple INITXFC and INITFILT so that it is easier to parallelize the former + if needed. + ''' + + # Sizes + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + maxfilt = len(ffilt) + + # Precondictions + assert num_constraints >= 0 + assert num_vars >= 1 + assert maxfilt >= 1 + assert confilt.shape == (num_constraints, maxfilt) + assert cfilt.shape == (maxfilt,) + assert xfilt.shape == (num_vars, maxfilt) + assert ffilt.shape == (maxfilt,) + assert conmat.shape == (num_constraints, num_vars+1) + # TODO: Need to finish these preconditions + + + nfilt = 0 + for i in range(num_vars+1): + if evaluated[i]: + if i < num_vars: + x = sim[:, i] + sim[:, num_vars] + else: + x = sim[:, i] # i == num_vars, i.e. the last column + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt) + + + return nfilt + + +def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, + gamma1, gamma2, rhobeg, rhoend, constr, f, x, + cstrv): + ''' + This subroutine performs the actual computations of COBYLA. + ''' + + # A[:, :num_constraints] contains the approximate gradient for the constraints, and A[:, num_constraints] is minus the + # approximate gradient for the objective function. + A = np.zeros((len(x), len(constr) + 1)) + + # CPENMIN is the minimum of the penalty parameter CPEN for the L-infinity constraint violation in + # the merit function. Note that CPENMIN = 0 in Powell's implementation, which allows CPEN to be 0. + # Here, we take CPENMIN > 0 so that CPEN is always positive. This avoids the situation where PREREM + # becomes 0 when PREREF = 0 = CPEN. It brings two advantages as follows. + # 1. If the trust-region subproblem solver works correctly and the trust-region center is not + # optimal for the subproblem, then PREREM > 0 is guaranteed. This is because, in theory, PREREC >= 0 + # and MAX(PREREC, PREREF) > 0 , and the definition of CPEN in GETCPEN ensures that PREREM > 0. + # 2. There is no need to revise ACTREM and PREREM when CPEN = 0 and F = FVAL(N+1) as in lines + # 312--314 of Powell's cobylb.f code. Powell's code revises ACTREM to CVAL(N + 1) - CSTRV and PREREM + # to PREREC in this case, which is crucial for feasibility problems. + cpenmin = EPS + # FACTOR_ALPHA, FACTOR_BETA, FACTOR_GAMMA, and FACTOR_DELTA are four factors that COBYLB uses + # when managing the simplex. Note the following. + # 1. FACTOR_ALPHA < FACTOR_GAMMA < 1 < FACTOR_DELTA <= FACTOR_BETA. + # 2. FACTOR_DELTA has nothing to do with DELTA, which is the trust-region radius. + # 3. FACTOR_GAMMA has nothing to do with GAMMA1 and GAMMA2, which are the contracting/expanding + # factors for updating the trust-region radius DELTA. + factor_alpha = 0.25 # The factor alpha in the COBYLA paper + factor_beta = 2.1 # The factor beta in the COBYLA paper + factor_delta = 1.1 # The factor delta in the COBYLA paper + factor_gamma = 0.5 # The factor gamma in the COBYLA paper + num_constraints = len(constr) + num_vars = len(x) + # maxxhist = xhist.shape[1] + # maxfhist = len(fhist) + # maxconhist = conhist.shape[1] + # maxchist = len(chist) + # maxhist = max(maxxhist, maxfhist, maxconhist, maxchist) + conmat = np.zeros((num_constraints, num_vars+1)) - REALMAX + cval = np.zeros(num_vars+1) + REALMAX + fval = np.zeros(num_vars+1) + REALMAX + + subinfo = 0 + evaluated, sim, simi, nf, subinfo = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, + conmat, cval, fval) + # TODO: Need to initialize history with f and x and constr + + # Initialize the filter, including xfilt, ffilt, confiolt, cfilt, and nfilt. + # N.B.: The filter is used only when selecting which iterate to return. It does not + # interfere with the iterations. COBYLA is NOT a filter method but a trust-region method + # based on an L-inifinity merit function. Powell's implementation does not use a + # filter to select the iterate, possibly returning a suboptimal iterate. + cfilt = np.zeros(min(max(maxfilt, 1), maxfun)) + confilt = np.zeros((len(constr), len(cfilt))) + ffilt = np.zeros(len(cfilt)) + xfilt = np.zeros((len(x), len(cfilt))) + nfilt = initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt) + + # TODO: Initfilt and savefilt appear to be working. It did not get tested with a rising edge in the + # keep array (i.e. a False followed by a True), but it looks like that shouldn't be a problem. + # For now we should move on to the rest of the algorithm so that we can update the PR with a milestone. + + # Check whether to return due to abnormal cases that may occur during the initialization. + if subinfo != INFO_DEFAULT: + info = subinfo + # Return the best calculated values of the variables + # N.B: Selectx and findpole choose X by different standards, one cannot replace the other + kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], cweight, ctol) + x = xfilt[:, kopt] + f = ffilt[kopt] + constr = confilt[:, kopt] + cstrv = cfilt[kopt] + # Arrange chist, conhist, fhist, and xhist so that they are in chronological order. + # TODO: Implement me + # rangehist(nf, xhist, fhist, chist, conhist) + # print a return message according to IPRINT. + # TODO: Implement me + # retmsg("COBYLA", info, iprint, nf, f, x, cstrv, constr) + + + # Set some more initial values. + # We must initialize ACTREM and PREREM. Otherwise, when SHORTD = TRUE, compilers may raise a + # run-time error that they are undefined. But their values will not be used: when SHORTD = FALSE, + # they will be overwritten; when SHORTD = TRUE, the values are used only in BAD_TRSTEP, which is + # TRUE regardless of ACTREM or PREREM. Similar for PREREC, PREREF, PREREM, RATIO, and JDROP_TR. + # No need to initialize SHORTD unless MAXTR < 1, but some compilers may complain if we do not do it. + # Our initialization of CPEN differs from Powell's in two ways. First, we use the ratio defined in + # (13) of Powell's COBYLA paper to initialize CPEN. Second, we impose CPEN >= CPENMIN > 0. Powell's + # code simply initializes CPEN to 0. + rho = rhobeg + delta = rhobeg + cpen = max(cpenmin, min(1.0E3, fcratio(conmat, fval))) # Powell's code: CPEN = ZERO + prerec = -REALMAX + preref = -REALMAX + prerem = -REALMAX + actrem = -REALMAX # TODO: Is it necessary to define this and prerem here, per the above comment? + shortd = False + trfail = False + ratio = -1 + jdrop_tr = 0 + jdrop_geo = 0 + + # If DELTA <= GAMMA3*RHO after an update, we set DELTA to RHO. GAMMA3 must be less than GAMMA2. The + # reason is as follows. Imagine a very successful step with DNORM = the un-updated DELTA = RHO. + # The TRRAD will update DELTA to GAMMA2*RHO. If GAMMA3 >= GAMMA2, then DELTA will be reset to RHO, + # which is not reasonable as D is very successful. See paragraph two of Sec 5.2.5 in + # T. M. Ragonneau's thesis: "Model-Based Derivative-Free Optimization Methods and Software." + # According to test on 20230613, for COBYLA, this Powellful updating scheme of DELTA works slightly + # better than setting directly DELTA = max(NEW_DELTA, RHO). + gamma3 = max(1, min(0.75 * gamma2, 1.5)) + + # MAXTR is the maximal number of trust region iterations. Each trust-region iteration takes 1 or 2 + # function evaluations unless the trust-region step is short or the trust-region subproblem solver + # fails but the geometry step is not invoked. Thus the following MAXTR is unlikely to be reached + # + # + # Normally each trust-region + # iteration takes 1 or 2 function evaluations except for the following cases: + # 1. The update of cpen alters the optimal vertex + # 2. The trust-region step is short or fails to reduce either the + # linearized objective or the linearized constraint violation but + # the geometry step is not invoked. + # The following maxtr is unlikely to be reached. + maxtr = max(maxfun, 2*maxfun) # max: precaution against overflow, which will make 2*MAXFUN < 0 + info = MAXTR_REACHED + + # Begin the iterative procedure + # After solving a trust-region subproblem, we use three boolean variables to control the workflow. + # SHORTD - Is the trust-region trial step too short to invoke a function evaluation? + # IMPROVE_GEO - Will we improve the model after the trust-region iteration? If yes, a geometry + # step will be taken, corresponding to the "Branch (Delta)" in the COBYLA paper. + # REDUCE_RHO - Will we reduce rho after the trust-region iteration? + # COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously. + + for tr in range(maxtr): + # Increase the penalty parameter CPEN, if needed, so that PREREM = PREREF + CPEN * PREREC > 0. + # This is the first (out of two) update of CPEN, where CPEN increases or remains the same. + # N.B.: CPEN and the merit function PHI = FVAL + CPEN*CVAL are used in three places only. + # 1. In FINDPOLE/UPDATEPOLE, deciding the optimal vertex of the current simplex. + # 2. After the trust-region trial step, calculating the reduction ratio. + # 3. In GEOSTEP, deciding the direction of the geometry step. + # They do not appear explicitly in the trust-region subproblem, though the trust-region center + # (i.e. the current optimal vertex) is defined by them. + cpen = getcpen(conmat, cpen, cval, delta, fval, rho, sim, simi) + + # Switch the best vertex of the current simplex to SIM[:, NUM_VARS]. + conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi) + # Check whether to exit due to damaging rounding in UPDATEPOLE. + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or simply continue? + + # Does the interpolation set have acceptable geometry? It affects improve_geo and reduce_rho + adequate_geo = assess_geo(delta, factor_alpha, factor_beta, sim, simi) + + # Calculate the linear approximations to the objective and constraint functions, placing + # minus the objective function gradient after the constraint gradients in the array A. + # N.B.: TRSTLP accesses A mostly by colums, so it is more reasonable to save A instead of A.T + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + # Theoretically, but not numerically, the last entry of b does not affect the result of TRSTLP + # We set it to -fval[num_vars] following Powell's code. + b = np.hstack((-conmat[:, num_vars], -fval[num_vars])) + + # Calculate the trust-region trial step d. Note that d does NOT depend on cpen. + d = trstlp(A, b, delta) + dnorm = min(delta, np.linalg.norm(d)) + + # Is the trust-region trial step short? N.B.: we compare DNORM with RHO, not DELTA. + # Powell's code especially defines SHORTD by SHORTD = (DNORM < 0.5 * RHO). In our tests + # 1/10 seems to work better than 1/2 or 1/4, especially for linearly constrained + # problems. Note that LINCOA has a slightly more sophisticated way of defining SHORTD, + # taking into account whether D casues a change to the active set. Should we try the same here? + shortd = (dnorm < 0.1 * rho) + + # Predict the change to F (PREREF) and to the constraint violation (PREREC) due to D. + # We have the following in precise arithmetic. They may fail to hold due to rounding errors. + # 1. B[:NUM_CONSTRAINTS] = -CONMAT[:, NUM_VARS] and hence max(max(B[:NUM_CONSTRAINTS] - D@A[:, :NUM_CONSTRAINTS]), 0) is the + # L-infinity violation of the linearized constraints corresponding to D. When D=0, the + # violation is max(max(B[:NUM_CONSTRAINTS]), 0) = CVAL[NUM_VARS]. PREREC is the reduction of this + # violation achieved by D, which is nonnegative in theory; PREREC = 0 iff B[:NUM_CONSTRAINTS] <= 0, i.e. the + # trust-region center satisfies the linearized constraints. + # 2. PREREF may be negative or 0, but it is positive when PREREC = 0 and shortd is False + # 3. Due to 2, in theory, max(PREREC, PREREF) > 0 if shortd is False. + prerec = cval[num_vars] - max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + preref = np.dot(d, A[:, num_constraints]) # Can be negative + + # Evaluate PREREM, which is the predicted reduction in the merit function. + # In theory, PREREM >= 0 and it is 0 iff CPEN = 0 = PREREF. This may not be true numerically. + prerem = preref + cpen * prerec + trfail = prerem < 1e-5 * min(cpen, 1) * rho**2 or np.isnan(prerem) # PREREM is tiny/negative or NaN. + + if shortd or trfail: + # Reduce DELTA if D is short or if D fails to render PREREM > 0. The latter can only happen due to + # rounding errors. This seems quite important for performance + delta *= 0.1 + if delta <= gamma3 * rho: + delta = rho # set delta to rho when it is close to or below + else: + x = sim[:, num_vars] + d + # Evaluate the objective and constraints at X, taking care of possible inf/nan values. + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + + # Print a message about the function/constraint evaluation accoring to iprint + # TODO: Implement me + # fmsg(solver, iprint, nf, f, x, cstrv, constr) + # Save X, F, CONSTR, CSTRV into the history. + # TODO: Implement me + # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist) + # Save X, F, CONSTR, CSTRV into the filter. + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + # Evaluate ACTREM, which is the actual reduction in the merit function + actrem = (fval[num_vars] + cpen * cval[num_vars]) - (f + cpen * cstrv) + + # Calculate the reduction ratio by redrat, which hands inf/nan carefully + ratio = redrat(actrem, prerem, eta1) + + # Update DELTA. After this, DELTA < DNORM may hold. + # N.B.: + # 1. Powell's code uses RHO as the trust-region radius and updates it as follows: + # Reduce RHO to GAMMA1*RHO if ADEQUATE_GEO is TRUE and either SHORTD is TRUE or RATIO < ETA1, + # and then revise RHO to RHOEND if its new value is not more than GAMMA3*RHOEND; RHO remains + # unchanged in all other cases; in particular, RHO is never increased. + # 2. Our implementation uses DELTA as the trust-region radius, while using RHO as a lower + # bound for DELTA. DELTA is updated in a way that is typical for trust-region methods, and + # it is revised to RHO if its new value is not more than GAMMA3*RHO. RHO reflects the current + # resolution of the algorithm; its update is essentially the same as the update of RHO in + # Powell's code (see the definition of REDUCE_RHO below). Our implementation aligns with + # UOBYQA/NEWUOA/BOBYQA/LINCOA and improves the performance of COBYLA. + # 3. The same as Powell's code, we do not reduce RHO unless ADEQUATE_GEO is TRUE. This is + # also how Powell updated RHO in UOBYQA/NEWUOA/BOBYQA/LINCOA. What about we also use + # ADEQUATE_GEO == TRUE as a prerequisite for reducing DELTA? The argument would be that the + # bad (small) value of RATIO may be because of a bad geometry (and hence a bad model) rather + # than an improperly large DELTA, and it might be good to try improving the geometry first + # without reducing DELTA. However, according to a test on 20230206, it does not improve the + # performance if we skip the update of DELTA when ADEQUATE_GEO is FALSE and RATIO < 0.1. + # Therefore, we choose to update DELTA without checking ADEQUATE_GEO. + + delta = trrad(delta, dnorm, eta1, eta2, gamma1, gamma2, ratio) + if delta <= gamma3*rho: + delta = rho # Set delta to rho when it is close to or below. + + # Is the newly generated X better than the current best point? + ximproved = actrem > 0 # If ACTREM is NaN, then XIMPROVED should and will be False + + # Set JDROP_TR to the index of the vertex to be replaced with X. JDROP_TR = 0 means there + # is no good point to replace, and X will not be included into the simplex; in this case, + # the geometry of the simplex likely needs improvement, which will be handled below. + jdrop_tr = setdrop_tr(ximproved, d, delta, rho, sim, simi) + + # Update SIM, SIMI, FVAL, CONMAT, and CVAL so that SIM[:, JDROP_TR] is replaced with D. + # UPDATEXFC does nothing if JDROP_TR = 0, as the algorithm decides to discard X. + sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_tr, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi) + # Check whether to break due to dmaging rounding in UPDATEXFC + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or a RESCUE as in BOBYQA? + + # Check whether to break due to maxfun, ftarget, etc. + subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + # End of if SHORTD or TRFAIL. The normal trust-region calculation ends. + + # Before the next trust-region iteration, we possibly improve the geometry of the simplex or + # reduce RHO according to IMPROVE_GEO and REDUCE_RHO. Now we decide these indicators. + # N.B.: We must ensure that the algorithm does not set IMPROVE_GEO = True at infinitely many + # consecutive iterations without moving SIM[:, NUM_VARS] or reducing RHO. Otherwise, the algorithm + # will get stuck in repetitive invocations of GEOSTEP. This is ensured by the following facts: + # 1. If an iteration sets IMPROVE_GEO to True, it must also reduce DELTA or set DELTA to RHO. + # 2. If SIM[:, NUM_VARS] and RHO remain unchanged, then ADEQUATE_GEO will become True after at + # most NUM_VARS invocations of GEOSTEP. + + # BAD_TRSTEP: Is the last trust-region step bad? + bad_trstep = shortd or trfail or ratio <= 0 or jdrop_tr == None + # IMPROVE_GEO: Should we take a geometry step to improve the geometry of the interpolation set? + improve_geo = bad_trstep and not adequate_geo + # REDUCE_RHO: Should we enhance the resolution by reducing rho? + reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho + + # COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously. + # assert not (IMPROVE_GEO and REDUCE_RHO), 'IMPROVE_GEO or REDUCE_RHO are not both TRUE, COBYLA' + + # If SHORTD or TRFAIL is True, then either IMPROVE_GEO or REDUCE_RHO is True unless ADEQUATE_GEO + # is True and max(DELTA, DNORM) > RHO. + assert not (shortd or trfail) or (improve_geo or reduce_rho or (adequate_geo and max(delta, dnorm) > rho)), \ + 'If SHORTD or TRFAIL is TRUE, then either IMPROVE_GEO or REDUCE_RHO is TRUE unless ADEQUATE_GEO is TRUE and MAX(DELTA, DNORM) > RHO' + + # Comments on BAD_TRSTEP: + # 1. Powell's definition of BAD_TRSTEP is as follows. The one used above seems to work better, + # especially for linearly constrained problems due to the factor TENTH (= ETA1). + # !bad_trstep = (shortd .or. actrem <= 0 .or. actrem < TENTH * prerem .or. jdrop_tr == 0) + # Besides, Powell did not check PREREM > 0 in BAD_TRSTEP, which is reasonable to do but has + # little impact upon the performance. + # 2. NEWUOA/BOBYQA/LINCOA would define BAD_TRSTEP, IMPROVE_GEO, and REDUCE_RHO as follows. Two + # different thresholds are used in BAD_TRSTEP. It outperforms Powell's version. + # !bad_trstep = (shortd .or. trfail .or. ratio <= eta1 .or. jdrop_tr == 0) + # !improve_geo = bad_trstep .and. .not. adequate_geo + # !bad_trstep = (shortd .or. trfail .or. ratio <= 0 .or. jdrop_tr == 0) + # !reduce_rho = bad_trstep .and. adequate_geo .and. max(delta, dnorm) <= rho + # 3. Theoretically, JDROP_TR > 0 when ACTREM > 0 (guaranteed by RATIO > 0). However, in Powell's + # implementation, JDROP_TR may be 0 even RATIO > 0 due to NaN. The modernized code has rectified + # this in the function SETDROP_TR. After this rectification, we can indeed simplify the + # definition of BAD_TRSTEP by removing the condition JDROP_TR == 0. We retain it for robustness. + + # Comments on REDUCE_RHO: + # When SHORTD is TRUE, UOBYQA/NEWUOA/BOBYQA/LINCOA all set REDUCE_RHO to TRUE if the recent + # models are sufficiently accurate according to certain criteria. See the paragraph around (37) + # in the UOBYQA paper and the discussions about Box 14 in the NEWUOA paper. This strategy is + # crucial for the performance of the solvers. However, as of 20221111, we have not managed to + # make it work in COBYLA. As in NEWUOA, we recorded the errors of the recent models, and set + # REDUCE_RHO to true if they are small (e.g., ALL(ABS(MODERR_REC) <= 0.1 * MAXVAL(ABS(A))*RHO) or + # ALL(ABS(MODERR_REC) <= RHO**2)) when SHORTD is TRUE. It made little impact on the performance. + + + # Since COBYLA never sets IMPROVE_GEO and REDUCE_RHO to TRUE simultaneously, the following + # two blocks are exchangeable: IF (IMPROVE_GEO) ... END IF and IF (REDUCE_RHO) ... END IF. + + # Improve the geometry of the simplex by removing a point and adding a new one. + # If the current interpolation set has acceptable geometry, then we skip the geometry step. + # The code has a small difference from Powell's original code here: If the current geometry + # is acceptable, then we will continue with a new trust-region iteration; however, at the + # beginning of the iteration, CPEN may be updated, which may alter the pole point SIM(:, N+1) + # by UPDATEPOLE; the quality of the interpolation point depends on SIM(:, N + 1), meaning + # that the same interpolation set may have good or bad geometry with respect to different + # "poles"; if the geometry turns out bad with the new pole, the original COBYLA code will + # take a geometry step, but our code here will NOT do it but continue to take a trust-region + # step. The argument is this: even if the geometry step is not skipped in the first place, the + # geometry may turn out bad again after the pole is altered due to an update to CPEN; should + # we take another geometry step in that case? If no, why should we do it here? Indeed, this + # distinction makes no practical difference for CUTEst problems with at most 100 variables + # and 5000 constraints, while the algorithm framework is simplified. + if improve_geo and not assess_geo(delta, factor_alpha, factor_beta, sim, simi): + # Before the geometry step, updatepole has been called either implicitly by UPDATEXFC or + # explicitly after CPEN is updated, so that SIM[:, :NUM_VARS] is the optimal vertex. + + # Decide a vertex to drop from the simplex. It will be replaced with SIM[:, NUM_VARS] + D to + # improve acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. + # N.B.: COBYLA never sets JDROP_GEO = num_vars. + jdrop_geo = setdrop_geo(delta, factor_alpha, factor_beta, sim, simi) + + # The following JDROP_GEO comes from UOBYQA/NEWUOA/BOBYQA/LINCOA. It performs poorly! + # jdrop_geo = np.argmax(np.sum(sim[:, :num_vars]**2, axis=0), axis=0) + + # JDROP_GEO is between 0 and NUM_VARS unless SIM and SIMI contain NaN, which should not happen + # at this point unless there is a bug. Nevertheless, for robustness, we include the + # following instruction to break when JDROP_GEO == None (if JDROP_GEO does become None, then a + # TypeError will occur if we continue, as JDROP_GEO will be used as an index of arrays.) + if jdrop_geo is None: + info = DAMAGING_ROUNDING + break + + # Calculate the geometry step D. + # In NEWUOA, GEOSTEP takes DELBAR = max(min(0.1 * sqrt(max(DISTSQ)), 0.% * DELTA), RHO) + # rather than DELTA. This should not be done here, because D should improve the geometry of + # the simplex when SIM[:, JDROP] is replaced with D; the quality of the geometry is defined + # by DELTA instead of DELBAR as in (14) of the COBYLA paper. See GEOSTEP for more detail. + d = geostep(jdrop_geo, cpen, conmat, cval, delta, fval, factor_gamma, simi) + + x = sim[:, num_vars] + d + # Evaluate the objective and constraints at X, taking care of possible inf/nan values. + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + + # Print a message about the function/constraint evaluation accoring to iprint + # TODO: Implement me + # fmsg(solver, iprint, nf, f, x, cstrv, constr) + # Save X, F, CONSTR, CSTRV into the history. + # TODO: Implement me + # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist) + # Save X, F, CONSTR, CSTRV into the filter. + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + subinfo = updatexfc(jdrop_geo, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi) + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break + + # Check whether to break due to maxfun, ftarget, etc. + subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + # end of if improve_geo. The procedure of improving the geometry ends. + + # The calculations with the current RHO are complete. Enhance the resolution of the algorithm + # by reducing RHO; update DELTA and CPEN at the same time. + if reduce_rho: + if rho <= rhoend: + info = SMALL_TR_RADIUS + break + delta = max(0.5 * rho, redrho(rho, rhoend)) + rho = redrho(rho, rhoend) + # THe second (out of two) updates of CPEN, where CPEN decreases or remains the same. + # Powell's code: cpen = min(cpen, fcratio(fval, conmat)), which may set CPEN to 0. + cpen = max(cpenmin, min(cpen, fcratio(conmat, fval))) + # Print a message about the reduction of rho according to iprint + # TODO: implement me! + #call rhomsg(solver, iprint, nf, fval(n + 1), rho, sim(:, n + 1), cval(n + 1), conmat(:, n + 1), cpen) + conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi) + # Check whether to break due to damaging rounding detected in updatepole + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or simply continue? + # End of if reduce_rho. The procedure of reducing RHO ends. + # End of for loop. The iterative procedure ends + + # Return from the calculation, after trying the last trust-region step if it has not been tried yet. + if info == SMALL_TR_RADIUS and shortd and nf < maxfun: + # Zaikun 20230615: UPDATEXFC or UPDATEPOLE is not called since the last trust-region step. Hence + # SIM[:, NUM_VARS] remains unchanged. Otherwise SIM[:, NUM_VARS] + D would not make sense. + x = sim[:, num_vars] + d + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + # TODO: msg, savehist + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + # Return the best calculated values of the variables + # N.B.: SELECTX and FINDPOLE choose X by different standards, one cannot replace the other. + kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], max(cpen, cweight), ctol) + x = xfilt[:, kopt] + f = ffilt[kopt] + constr = confilt[:, kopt] + cstrv = cfilt[kopt] + + # Arrange CHIST, CONHIST, FHIST, and XHIST so that they are in the chronological order. + # TODO: Implement me + # call rangehist(nf, xhist, fhist, chist, conhist) + + # Print a return message according to IPRINT. + # TODO: Implement me + #call retmsg(solver, info, iprint, nf, f, x, cstrv, constr) + return x, f, nf, info + + + +def getcpen(conmat, cpen, cval, delta, fval, rho, sim, simi): + ''' + This function gets the penalty parameter CPEN so that PREREM = PREREF + CPEN * PREREC > 0. + See the discussions around equation (9) of the COBYLA paper. + ''' + + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + + A = np.zeros((num_vars, num_constraints + 1)) + + # Initialize INFO, PREREF, and PREREC, which are needed in the postconditions + info = INFO_DEFAULT + preref = 0 + prerec = 0 + + # Increase CPEN if neccessary to ensure PREREM > 0. Branch back for the next loop if this change + # alters the optimal vertex of the current simplex. Note the following: + # 1. In each loop, CPEN is changed only if PREREC > 0 > PREREF, in which case PREREM is guaranteed + # positive after the update. Note that PREREC >= 0 and max(PREREC, PREREF) > 0 in theory. If this + # holds numerically as well then CPEN is not changed only if PREREC = 0 or PREREF >= 0, in which + # case PREREM is currently positive, explaining why CPEN needs no update. + # 2. Even without an upper bound for the loop counter, the loop can occur at most NUM_VARS+1 times. This + # is because the update of CPEN does not decrease CPEN, and hence it can make vertex J (J <= NUM_VARS) become + # the new optimal vertex only if CVAL[J] is less than CVAL[NUM_VARS], which can happen at most NUM_VARS times. + # See the paragraph below (9) in the COBYLA paper. After the "correct" optimal vertex is found, + # one more loop is needed to calculate CPEN, and hence the loop can occur at most NUM_VARS+1 times. + for iter in range(num_vars + 1): + # Switch the best vertex of the current simplex to SIM[:, NUM_VARS] + conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim, simi) + # Check whether to exit due to damaging rounding in UPDATEPOLE + if info == DAMAGING_ROUNDING: + break + + # Calculate the linear approximations to the objective and constraint functions, placing minus + # the objective function gradient after the constraint gradients in the array A. + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + b = np.hstack((-conmat[:, num_vars], -fval[num_vars])) + + # Calculate the trust-region trial step D. Note that D does NOT depend on CPEN. + d = trstlp(A, b, delta) + + # Predict the change to F (PREREF) and to the constraint violation (PREREC) due to D. + prerec = cval[num_vars] - max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + preref = np.dot(d, A[:, num_constraints]) # Can be negative + + if not (prerec > 0 and preref < 0): # PREREC <= 0 or PREREF >=0 or either is NaN + break + + # Powell's code defines BARMU = -PREREF / PREREC, and CPEN is increased to 2*BARMU if and + # only if it is currently less than 1.5*BARMU, a very "Powellful" scheme. In our + # implementation, however, we seet CPEN directly to the maximum between its current value and + # 2*BARMU while handling possible overflow. The simplifies the scheme without worsening the + # performance of COBYLA. + cpen = max(cpen, min(-2 * preref / prerec, REALMAX)) + + if findpole(cpen, cval, fval) == num_vars: + break + + return cpen \ No newline at end of file diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py new file mode 100644 index 0000000000..e5bff7cf0b --- /dev/null +++ b/python/src/prima/cobyla/geometry.py @@ -0,0 +1,192 @@ +import numpy as np + +def setdrop_geo(delta, factor_alpha, factor_beta, sim, simi): + ''' + This function finds (the index) of a current interpolation point to be replaced with a + geometry-improving point. See (15)-(16) of the COBYLA paper. + N.B.: COBYLA never sets jdrop to NUM_VARS + ''' + + num_vars = sim.shape[0] + + # Calculate the values of sigma and eta + # VSIG[j] for j = 0...NUM_VARS-1 is the Euclidean distance from vertex J to the opposite face of the simplex. + vsig = 1 / np.sqrt(np.sum(simi**2, axis=1)) + veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) + + # Decide which vertex to drop from the simplex. It will be replaced with a new point to improve the + # acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. + if any(veta > factor_beta * delta): + jdrop = np.where(veta == max(veta[~np.isnan(veta)]))[0][0] + elif any(vsig < factor_alpha * delta): + jdrop = np.where(vsig == min(vsig[~np.isnan(vsig)]))[0][0] + else: + # We arrive here if vsig and veta are all nan, which can happen due to nan in sim and simi + # which should not happen unless there is a bug + jdrop = None + + # Zaikun 230202: What if we consider veta and vsig together? The following attempts do not work well. + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)*np.sum(simi**2, axis=1)) # Condition number + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)**2 * np.sum(simi**2, axis=1)) # Condition number times distance + return jdrop + +def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi): + ''' + This function calculates a geometry step so that the geometry of the interpolation set is improved + when SIM[: JDROP_GEO] is replaced with SIM[:NUM_VARS] + D. See (15)--(17) of the COBYLA paper. + ''' + num_constraints = conmat.shape[0] + num_vars = simi.shape[0] + + # SIMI[JDROP, :] is a vector perpendicular to the face of the simplex to the opposite of vertex + # JDROP. Thus VSIGJ * SIMI[JDROP, :] is the unit vector in this direction + vsigj = 1 / np.sqrt(np.sum(simi[jdrop, :]**2)) + + # Set D to the vector in the above-mentioned direction and with length FACTOR_GAMMA * DELTA. Since + # FACTOR_ALPHA < FACTOR_GAMMA < FACTOR_BETA, D improves the geometry of the simplex as per (14) of + # the COBYLA paper. This also explains why this subroutine does not replace DELTA with + # DELBAR = max(min(0.1 * np.sqrt(max(DISTSQ)), 0.5 * DELTA), RHO) as in NEWUOA. + d = factor_gamma * delta * (vsigj * simi[jdrop, :]) + + # Calculate the coefficients of the linear approximations to the objective and constraint functions, + # placing minus the objective function gradient after the constraint gradients in the array A + A = np.zeros((num_vars, num_constraints + 1)) + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + cvmaxp = max(0, max(-d@A[:, :num_constraints] - conmat[:, num_vars])) + cvmaxn = max(0, max(d@A[:, :num_constraints] - conmat[:, num_vars])) + if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn): + d *= -1 + return d + +def setdrop_tr(ximproved, d, delta, rho, sim, simi): + ''' + This function finds (the index) of a current interpolation point to be replaced with the + trust-region trial point. See (19)-(22) of the COBYLA paper. + N.B.: + 1. If XIMPROVED == True, then JDROP >= 0 so that D is included into XPT. Otherwise, it is a bug. + 2. COBYLA never sets JDROP = NUM_VARS + TODO: Check whether it improves the performance if JDROP = NUM_VARS is allowed when XIMPROVED is True. + Note that UPDATEXFC should be revised accordingly. + ''' + + num_vars = sim.shape[0] + + # -------------------------------------------------------------------------------------------------- # + # The following code is Powell's scheme for defining JDROP. + # -------------------------------------------------------------------------------------------------- # + # ! JDROP = 0 by default. It cannot be removed, as JDROP may not be set below in some cases (e.g., + # ! when XIMPROVED == FALSE, MAXVAL(ABS(SIMID)) <= 1, and MAXVAL(VETA) <= EDGMAX). + # jdrop = 0 + # + # ! SIMID(J) is the value of the J-th Lagrange function at D. It is the counterpart of VLAG in UOBYQA + # ! and DEN in NEWUOA/BOBYQA/LINCOA, but it excludes the value of the (N+1)-th Lagrange function. + # simid = matprod(simi, d) + # if (any(abs(simid) > 1) .or. (ximproved .and. any(.not. is_nan(simid)))) then + # jdrop = int(maxloc(abs(simid), mask=(.not. is_nan(simid)), dim=1), kind(jdrop)) + # !!MATLAB: [~, jdrop] = max(simid, [], 'omitnan'); + # end if + # + # ! VETA(J) is the distance from the J-th vertex of the simplex to the best vertex, taking the trial + # ! point SIM(:, N+1) + D into account. + # if (ximproved) then + # veta = sqrt(sum((sim(:, 1:n) - spread(d, dim=2, ncopies=n))**2, dim=1)) + # !!MATLAB: veta = sqrt(sum((sim(:, 1:n) - d).^2)); % d should be a column! Implicit expansion + # else + # veta = sqrt(sum(sim(:, 1:n)**2, dim=1)) + # end if + # + # ! VSIG(J) (J=1, .., N) is the Euclidean distance from vertex J to the opposite face of the simplex. + # vsig = ONE / sqrt(sum(simi**2, dim=2)) + # sigbar = abs(simid) * vsig + # + # ! The following JDROP will overwrite the previous one if its premise holds. + # mask = (veta > factor_delta * delta .and. (sigbar >= factor_alpha * delta .or. sigbar >= vsig)) + # if (any(mask)) then + # jdrop = int(maxloc(veta, mask=mask, dim=1), kind(jdrop)) + # !!MATLAB: etamax = max(veta(mask)); jdrop = find(mask & ~(veta < etamax), 1, 'first'); + # end if + # + # ! Powell's code does not include the following instructions. With Powell's code, if SIMID consists + # ! of only NaN, then JDROP can be 0 even when XIMPROVED == TRUE (i.e., D reduces the merit function). + # ! With the following code, JDROP cannot be 0 when XIMPROVED == TRUE, unless VETA is all NaN, which + # ! should not happen if X0 does not contain NaN, the trust-region/geometry steps never contain NaN, + # ! and we exit once encountering an iterate containing Inf (due to overflow). + # if (ximproved .and. jdrop <= 0) then ! Write JDROP <= 0 instead of JDROP == 0 for robustness. + # jdrop = int(maxloc(veta, mask=(.not. is_nan(veta)), dim=1), kind(jdrop)) + # !!MATLAB: [~, jdrop] = max(veta, [], 'omitnan'); + # end if + # -------------------------------------------------------------------------------------------------- # + # Powell's scheme ends here. + # -------------------------------------------------------------------------------------------------- # + + # The following definition of JDROP is inspired by SETDROP_TR in UOBYQA/NEWUOA/BOBYQA/LINCOA. + # It is simpler and works better than Powell's scheme. Note that we allow JDROP to be NUM_VARS if + # XIMPROVED is True, whereas Powell's code does not. + # See also (4.1) of Scheinberg-Toint-2010: Self-Correcting Geometry in Model-Based Algorithms for + # Derivative-Free Unconstrained Optimization, which refers to the strategy here as the "combined + # distance/poisedness criteria". + + # DISTSQ[j] is the square of the distance from the jth vertex of the simplex to get "best" point so + # far, taking the trial point SIM[:, NUM_VARS] + D into account. + distsq = np.zeros(sim.shape[1]) + if ximproved: + distsq[:num_vars] = np.sum((sim[:, :num_vars] - np.tile(d, (num_vars, 1)).T)**2, axis=0) + distsq[num_vars] = sum(d**2) + else: + distsq[:num_vars] = np.sum(sim[:, :num_vars]**2, axis=0) + distsq[num_vars] = 0 + + weight = np.maximum(np.ones(len(distsq)), distsq / max(rho, delta/10)**2) # Similar to Powell's NEWUOA code. + + # Other possible definitions of weight. They work almost the same as the one above. + # weight = distsq # Similar to Powell's LINCOA code, but WRONG. See comments in LINCOA/geometry.f90. + # weight = max(1, max(25 * distsq / delta**2)) # Similar to Powell's BOBYQA code, works well. + # weight = max(1, max(10 * distsq / delta**2)) + # weight = max(1, max(1e2 * distsq / delta**2)) + # weight = max(1, max(distsq / max(rho, delta/10)**2)) + # weight = max(1, max(distsq / rho**2)) ! Similar to Powell's UOBYQA + + # If 0 <= j < NUM_VARS, SIMID[j] is the value of the jth Lagrange function at D; the value of the + # (NUM_VARS+1)th Lagrange function is 1 - sum(SIMID). [SIMID, 1 - sum(SIMID)] is the counterpart of + # VLAG in UOBYQA and DEN in NEWUOA/BOBYQA/LINCOA. + simid = simi@d + score = weight * abs(np.array([*simid, 1 - sum(simid)])) + + # If XIMPORVED = False (D does not render a better X), set SCORE[NUM_VARS] = -1 to avoid JDROP = NUM_VARS. + if not ximproved: + score[num_vars] = -1 + + # The following if statement works a bit better than `if any(score > 1) or (any(score > 0) and ximproved)` + # from Powell's UOBYQA and NEWUOA code. + if any(score > 0): # Powell's BOBYQA and LINCOA code. + jdrop = np.where(score == max(score[~np.isnan(score)]))[0][0] + elif ximproved: + jdrop = np.argmax(distsq) + else: + jdrop = None # We arrive here when XIMPROVED = False and no entry of score is positive. + + return jdrop + +def assess_geo(delta, factor_alpha, factor_beta, sim, simi): + ''' + This function checks if an interpolation set has acceptable geometry as (14) of the COBYLA paper + ''' + + num_vars = sim.shape[0] + + # Calculate the values of sigma and eta + # veta[j] (0 <= j < num_vars) is the distance between the vertices j and 0 (the best vertex) + # of the simplex. + # vsig[j] (0 <= j < num_vars) is the distance from vertex j to its opposite face of the simplex. + # Thus vsig <= veta. + # N.B.: What about the distance from vertex N+1 to its opposite face? Consider the simplex + # {V_{N+1}, V_{N+1} + L*e_1,... v_{N+1} + L*e_N}, where V_{N+1} is vertex N+1, + # namely the current "best" point, [e_1, ..., e_n] is an orthogonal matrix, and L is a + # constant in the order of delta. This simplex is optimal in the sense that the interpolation + # system has the minimal condition number, i.e. 1. For this simplex, the distance from + # V_{N+1} to its opposite face is L/sqrt(n). + vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) # TODO: Check axis + veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) + adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta) + return adequate_geo \ No newline at end of file diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py new file mode 100644 index 0000000000..ebccb25c8f --- /dev/null +++ b/python/src/prima/cobyla/trustregion.py @@ -0,0 +1,491 @@ +''' +This module provides subrouties concerning the trust-region calculations of COBYLA. + +Coded by Zaikun ZHANG (www.zhangzk.net) based on Powell's code and the COBYLA paper. + +Dedicated to late Professor M. J. D. Powell FRS (1936--2015). + +Python implementation by Nickolai Belakovski +''' + +import numpy as np +import numpy.typing as npt +from prima.common.consts import REALMIN, REALMAX, EPS +from prima.common.powalg import qradd_Rdiag, qrexc_Rdiag +from prima.common.linalg import isminor + +def trrad(delta_in, dnorm, eta1, eta2, gamma1, gamma2, ratio): + ''' + This function updates the trust region radius according to RATIO and DNORM. + ''' + + if ratio <= eta1: + delta = gamma1 * dnorm # Powell's UOBYQA/NEWOUA + # delta = gamma1 * delta_in # Powell's COBYLA/LINCOA + # delta = min(gamma1 * delta_in, dnorm) # Powell's BOBYQA + elif ratio <= eta2: + delta = max(gamma1 * delta_in, dnorm) # Powell's UOBYQA/NEWOUA/BOBYQA/LINCOA + else: + delta = max(gamma1 * delta_in, gamma2 * dnorm) # Powell's NEWUOA/BOBYQA + # delta = max(delta_in, gamma2 * dnorm) # Modified version. Works well for UOBYQA + # For noise-free CUTEst problems of <= 100 variables, Powell's version works slightly better + # than the modified one. + # delta = max(delta_in, 1.25*dnorm, dnorm + rho) # Powell's UOBYQA + # delta = min(max(gamma1 * delta_in, gamma2 * dnorm), gamma3 * delta_in) # Powell's LINCOA, gamma3 = np.sqrt(2) + + # For noisy problems, the following may work better. + # if ratio <= eta1: + # delta = gamma1 * dnorm + # elseif ratio <= eta2: # Ensure DELTA >= DELTA_IN + # delta = delta_in + # else: # Ensure DELTA > DELTA_IN with a constant factor + # delta = max(delta_in * (1 + gamma2) / 2, gamma2 * dnorm) + return delta + +def redrat(ared, pred, rshrink): + # This function evaluates the reduction ratio of a trust-region step, handling inf/nan properly. + if np.isnan(ared): + # This should not happen in unconstrained problems due to the moderated extreme barrier. + ratio = -REALMAX + elif np.isnan(pred) or pred <= 0: + # The trust-region subproblem solver fails in this rare case. Instead of terminating as Powell's + # original code does, we set ratio as follows so that the solver may continue to progress. + if ared > 0: + # The trial point will be accepted, but the trust-region radius will be shrunk if rshrink>0 + ratio = rshrink/2 + else: + # Set the ration to a large negative number to signify a bad trust-region step, so that the + # solver will check whether to take a geometry step or reduce rho. + ratio = -REALMAX + elif np.isposinf(pred) and np.isposinf(ared): + ratio = 1 # ared/pred = NaN if calculated directly + elif np.isposinf(pred) and np.isneginf(ared): + ratio = -REALMAX # ared/pred = NaN if calculated directly + else: + ratio = ared/pred + return ratio + + +def trstlp(A_in, b_in, delta): + ''' + This function calculated an n-component vector d by the following two stages. In the first + stage, d is set to the shortest vector that minimizes the greatest violation of the constraints + np.dot(A[:n, k], d) >= b(k), k = 0, 1, 2, ..., m-1 + subject to the Euclidean length of d being at most delta. If its length is strictly less than + delta, then the second stage uses the resultant freedom in d to minimize the objective function + np.dot(-A[:n, m], d) + subject to no increase in any greatest constraint violation. This notation allows the gradient of + the objective function to be regarded as the gradient of a constraint. Therefore the two stages + are distinguished by mcon == m and mcon > m respectively. + + It is possible but rare that a degeneracy may prevent d from attaining the target length delta. + + cviol is the largest constraint violation of the current d: max(b[:m] - A[:,:m].T@d, 0). TODO: Not sure if it should be an @, or what max(vector, scalar) means + icon is the index of a most violated constraint if cviol is positive. + + nact is the number of constraints in the active set and iact[0], ..., iact[nact-1] are their indices, + while the remainder of the iact contains a permutation of the remaining constraint indicies. + N.B.: nact <= min(num_constraints, num_vars). Obviously nact <= num_constraints. In addition, the constraints + in iact[0, ..., nact-1] have linearly independent gradients (see the comments above the instruction + that delete a constraint from the active set to make room for the new active constraint with index iact[icon]); + it can also be seen from the update of nact: starting from 0, nact is incremented only if nact < n. + + Further, Z is an orthogonal matrix whose first nact columns can be regarded as the result of + Gram-Schmidt applied to the active constraint gradients. For j = 0, 1, ..., nact-1, the number + zdota[j] is the scalar product of the jth column of Z with the gradient of the jth active + constraint. d is the current vector of variables and here the residuals of the active constraints + should be zero. Further, the active constraints have nonnegative Lagrange multipliers that are + held at the beginning of vmultc. The remaineder of this vector holds the residuals of the inactive + constraints at d, the ordering of the componenets of vmultc being in agreement with the permutation + of the indices of the constraints that is in iact. All these residuals are nonnegative, which is + achieved by the shift cviol that makes the least residual zero. + + N.B.: + 1. The algorithm was NOT documented in the COBYLA paper. A note should be written to introduce it! + 2. As a major part of the algorithm (see trstlp_sub), the code maintains and updates the QR + factorization of A[iact[:nact]], i.e. the gradients of all the active (linear) constraints. The + matrix Z is indeed Q, and the vector zdota is the diagonal of R. The factorization is updated by + Givens rotations when an index is added in or removed from iact. + 3. There are probably better algorithms available for this trust-region linear programming problem. + ''' + + A = A_in.copy() + b = b_in.copy() + + # Sizes + num_constraints = A.shape[1] - 1 + + # Preconditions + assert num_constraints >= 0 + assert A.shape[0] >= 1 and A.shape[1] >= 1 + assert b.shape[0] == A.shape[1] + assert delta > 0 + + + vmultc = np.zeros(len(b)) + iact = np.zeros(len(b), dtype=int) + nact = 0 + d = np.zeros(A_in.shape[0]) + z = np.zeros((len(d), len(d))) + + + + # Scale the problem if A contains large values. Otherwise floating point exceptions may occur. + # Note that the trust-region step is scale invariant. + for i in range(num_constraints+1): # Note that A.shape[1] == len(b) == num_constraints+1 != num_constraints + if maxval:=max(abs(A_in[:, i]) > 1e12): + modscal = max(2*REALMIN, 1/maxval) + A[:, i] *= modscal + b[i] *= modscal + + # Stage 1: minimize the 1+infinity constraint violation of the linnearized constraints. + iact[:num_constraints], nact, d, vmultc[:num_constraints], z = trstlp_sub(iact[:num_constraints], nact, 1, A[:, :num_constraints], b[:num_constraints], delta, d, vmultc[:num_constraints], z) + + # Stage 2: minimize the linearized objective without increasing the 1_infinity constraint violation. + iact, nact, d, vmultc, z = trstlp_sub(iact, nact, 2, A, b, delta, d, vmultc, z) + + # ================ + # Calculation ends + # ================ + + # Postconditions + assert d.shape[0] == A.shape[0] + assert all(np.isfinite(d)) + # Due to rounding, it may happen that ||D|| > DELTA, but ||D|| > 2*DELTA is highly improbable. + assert np.linalg.norm(d) <= 2 * delta + + return d + +def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): + ''' + This subroutine does the real calculations for trstlp, both stage 1 and stage 2. + Major differences between stage 1 and stage 2: + 1. Initializiation. Stage 2 inherits the values of some variables from stage 1, so they are + initialized in stage 1 but not in stage 2. + 2. cviol. cviol is updated after at iteration in stage 1, while it remains a constant in stage2. + 3. sdirn. See the definition of sdirn in the code for details. + 4. optnew. The two stages have different objectives, so optnew is updated differently. + 5. step. step <= cviol in stage 1. + ''' + # zdasav = np.zeros(z.shape[1]) + vmultd = np.zeros(len(vmultc)) + zdota = np.zeros(z.shape[1]) + + # Sizes + num_vars = A.shape[0] + mcon = A.shape[1] + + # Precondictions + assert num_vars >= 1 + assert stage == 1 or stage == 2 + assert (mcon >= 0 and stage == 1) or (mcon >= 1 and stage == 2) + assert b.shape[0] == mcon + assert iact.shape[0] == mcon + assert vmultc.shape[0] == mcon + assert d.shape[0] == num_vars + assert z.shape[0] == num_vars and z.shape[1] == num_vars + assert delta > 0 + if stage == 2: + assert all(np.isfinite(d)) and np.linalg.norm(d) <= 2 * delta + assert nact >= 0 and nact <= min(mcon, num_vars) + assert all(vmultc[:mcon]) >= 0 + # N.B.: Stage 1 defines only VMULTC(1:M); VMULTC(M+1) is undefined! + + + # Initialize according to stage + if stage == 1: + iact = np.linspace(0, mcon-1, mcon, dtype=int) + nact = 0 + d = np.zeros(num_vars) + cviol = max(*b, 0) + vmultc = cviol - b + z = np.eye(num_vars) + if mcon == 0 or cviol <= 0: + # Check whether a quick return is possible. Make sure the in-outputs have been initialized. + return iact, nact, d, vmultc, z + + if all(np.isnan(b)): + return iact, nact, d, vmultc, z + else: + icon = np.where(b == max(b[~np.isnan(b)]))[0][0] + num_constraints = mcon + sdirn = np.zeros(len(d)) + else: + if np.dot(d, d) >= delta**2: + # Check whether a quick return is possible. + return iact, nact, d, vmultc, z + + iact[mcon-1] = mcon-1 + vmultc[mcon-1] = 0 + num_constraints = mcon - 1 + icon = mcon - 1 + + # In Powell's code, stage 2 uses the zdota and cviol calculated by stage1. Here we recalculate + # them so that they need not be passed from stage 1 to 2, and hence the coupling is reduced. + cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + zdota[:nact] = [np.dot(z[:, k], A[:, iact[k]]) for k in range(nact)] + + # More initialization + optold = REALMAX + nactold = nact + nfail = 0 + + # Zaikun 20211011: vmultd is computed from scratch at each iteration, but vmultc is inherited + + # Powell's code can encounter infinite cycling, which did happen when testing the following CUTEst + # problems: DANWOODLS, GAUSS1LS, GAUSS2LS, GAUSS3LS, KOEBHELB, TAX13322, TAXR13322. Indeed, in all + # these cases, Inf/NaN appear in d due to extremely large values in A (up to 10^219). To resolve + # this, we set the maximal number of iterations to maxiter, and terminate if Inf/NaN occurs in d. + maxiter = min(10000, 100*max(num_constraints, num_vars)) + for iter in range(maxiter): + if stage == 1: + optnew = cviol + else: + optnew = -np.dot(d, A[:, mcon-1]) + + # End the current stage of the calculation if 3 consecutive iterations have either failed to + # reduce the best calculated value of the objective function or to increase the number of active + # constraints since the best value was calculated. This strategy prevents cycling, but there is + # a remote possibility that it will cause premature termination. + if optnew < optold or nact > nactold: + nactold = nact + nfail = 0 + else: + nfail += 1 + optold = min(optold, optnew) + if nfail == 3: + break + + # TODO: nact needs to be changed to nact-1 in a lot of places, essentially everywhere it acts as an index + # If icon exceeds nact, then we add the constraint with index iact[icon] to the active set. + # Apply Givens rotations so that the last num_vars-nact-1 columns of Z are orthogonal to the gradient + # of the new constraint, a scalar product being set to 0 if its nonzero value could be due to + # computer rounding errors, which is tested by ISMINOR. + if icon >= nact: # In Python this needs to be >= since Python is 0-indexed (in Fortran we have 1 > 0, in Python we need 0 >= 0) + # zdasav[:nact] = zdota[:nact] + nactsav = nact + z, zdota, nact = qradd_Rdiag(A[:, iact[icon]], z, zdota, nact) # May update nact to nact+1 + # Indeed it suffices to pass zdota[:min(num_vars, nact+1)] to qradd as follows: + # qradd(A[:, iact[icon]], z, zdota[:min(num_vars, nact+1)], nact) + + if nact == nactsav + 1: + # N.B.: It is possible to index arrays using [nact, icon] when nact == icon. + # Zaikun 20211012: Why should vmultc[nact] = 0? + if nact != (icon + 1): # Need to add 1 to Python for 0 indexing + vmultc[[icon, nact-1]] = vmultc[nact-1], 0 + iact[[icon, nact-1]] = iact[[nact-1, icon]] + else: + vmultc[nact-1] = 0 + else: + # Zaikun 20211011: + # 1. VMULTD is calculated from scratch for the first time (out of 2) in one iteration. + # 2. Note that IACT has not been updated to replace IACT[NACT] with IACT[ICON]. Thus + # A[:, IACT[:NACT]] is the UNUPDATED version before QRADD (note Z[:, :NACT] remains the + # same before and after QRADD). Therefore if we supply ZDOTA to LSQR (as Rdiag) as + # Powell did, we should use the UNUPDATED version, namely ZDASAV. + # vmultd[:nact] = lsqr(A[:, iact[:nact]], A[:, iact[icon]], z[:, :nact], zdasav[:nact]) + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], A[:, iact[icon]])[0] + if not any(np.logical_and(vmultd[:nact] > 0, iact[:nact] <= num_constraints)): + # N.B.: This can be triggered by NACT == 0 (among other possibilities)! This is + # important, because NACT will be used as an index in the sequel. + break + # vmultd[NACT+1:mcon] is not used, but we have to initialize it in Fortran, or compilers + # complain about the where construct below (another solution: restrict where to 1:NACT). + # TODO: How does this apply to Python? + vmultd[nact:mcon] = -1 # len(vmultd) == mcon + + # Revise the Lagrange multipliers. The revision is not applicable to vmultc[nact:num_constraints]. + fracmult = [vmultc[i]/vmultd[i] if vmultd[i] > 0 and iact[i] <= num_constraints else REALMAX for i in range(nact)] + # Only the places with vmultd > 0 and iact <= m is relevant below, if any. + frac = min(fracmult[:nact]) # fracmult[nact:mcon] may contain garbage + vmultc[:nact] = np.maximum(np.zeros(len(vmultc[:nact])), vmultc[:nact] - frac*vmultd[:nact]) + + # Zaikun 20210811: Powell's code includes the following, which is IMPOSSIBLE TO REACH + # !if (icon < nact) then + # ! do k = icon, nact-1 + # ! hypt = sqrt(zdota(k+1)**2+inprod(z(:, k), A(:, iact(k+1)))**2) + # ! grot = planerot([zdota(k+1), inprod(z(:, k), A(:, iact(k+1)))]) + # ! z(:, [k, k+1]) = matprod(z(:, [k+1, k]), transpose(grot)) + # ! zdota([k, k+1]) = [hypt, (zdota(k+1) / hypt) * zdota(k)] + # ! end do + # ! iact(icon:nact) = [iact(icon+1:nact), iact(icon)] + # ! vmultc(icon:nact) = [vmultc(icon+1:nact), vmultc(icon)] + # !end if + + # Reorder the active constraints so that the one to be replaced is at the end of the list. + # Exit if the new value of zdota[nact] is not acceptable. Powell's condition for the + # following If: non abs(zdota[nact]) > 0. Note that it is different from + # 'abs(zdota[nact]) <=0)' as zdota[nact] can be NaN. TODO: Does that make sense with Python? + # It seems Python returns false in both cases, whereas Fortran returns true for abs(NaN) > 0 + # and false for the other one. + # N.B.: We cannot arrive here with nact == 0, which should have triggered a break above + if np.isnan(zdota[nact - 1]) or abs(zdota[nact - 1]) <= EPS**2: + break + vmultc[[icon, nact - 1]] = 0, frac # vmultc[[icon, nact]] is valid as icon > nact + iact[[icon, nact - 1]] = iact[[nact - 1, icon]] + # end if nact == nactsav + 1 + + # In stage 2, ensure that the objective continues to be treated as the last active constraint. + # Zaikun 20211011, 20211111: Is it guaranteed for stage 2 that iact[nact-1] = mcon when + # iact[nact] != mcon??? If not, then how does the following procedure ensure that mcon is + # the last of iact[:nact]? + if stage == 2 and iact[nact - 1] != (mcon - 1): + assert nact > 0, "nact > 0 is required" + z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], nact - 2) # We pass nact-2 in Python instead of nact-1 + # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows: + # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], nact - 1) + iact[[nact-2, nact-1]] = iact[[nact-1, nact-2]] + vmultc[[nact-2, nact-1]] = vmultc[[nact-1, nact-2]] + # Zaikun 20211117: It turns out that the last few lines do not guarantee iact[nact] == num_vars in + # stage 2; the following test cannot be passed. IS THIS A BUG?! + # assert iact[nact] == mcon or stage == 1, 'iact[nact] must == mcon in stage 2' + + # Powell's code does not have the following. It avoids subsequent floating points exceptions. + if np.isnan(zdota[nact-1]) or abs(zdota[nact-1]) <= EPS**2: + break + + # Set sdirn to the direction of the next change to the current vector of variables + # Usually during stage 1 the vector sdirn gives a search direction that reduces all the + # active constraint violations by one simultaneously. + if stage == 1: + sdirn -= ((np.dot(sdirn, A[:, iact[nact-1]]) - 1)/zdota[nact-1])*z[:, nact-1] + else: + sdirn = 1/zdota[nact-1]*z[:, nact-1] + else: # icon < nact + # Delete the constraint with the index iact[icon] from the active set, which is done by + # reordering iact[icont:nact] into [iact[icon+1:nact], iact[icon]] and then reduce nact to + # nact - 1. In theory, icon > 0. + # assert icon > 0, "icon > 0 is required" # For Python I think this is irrelevant + z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], icon) # qrexc does nothing if icon == nact + # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows: + # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], icon) + iact[icon:nact] = [*iact[icon+1:nact], iact[icon]] + vmultc[icon:nact] = [*vmultc[icon+1:nact], vmultc[icon]] + nact -= 1 + + # Powell's code does not have the following. It avoids subsequent exceptions. + # Zaikun 20221212: In theory, nact > 0 in stage 2, as the objective function should always + # be considered as an "active constraint" --- more precisely, iact[nact] = mcon. However, + # looking at the copde, I cannot see why in stage 2 nact must be positive after the reduction + # above. It did happen in stage 1 that nact became 0 after the reduction --- this is + # extremely rare, and it was never observed until 20221212, after almost one year of + # random tests. Maybe nact is theoretically positive even in stage 1? + assert stage == 1 or nact > 0, "nact > 0 is required in stage 2" + if stage == 2 and nact < 0: + break # If this case ever occurs, we have to break, as nact is used as an index below. + if nact > 0: + if np.isnan(zdota[nact-1]) or abs(zdota[nact-1]) <= EPS**2: + break + + # Set sdirn to the direction of the next change to the current vector of variables. + if stage == 1: + sdirn -= np.dot(sdirn, z[:, nact]) * z[:, nact] + # sdirn is orthogonal to z[:, nact+1] + else: + sdirn = 1/zdota[nact-1] * z[:, nact-1] + # end if icon > nact + + # Calculate the step to the trust region boundary or take the step that reduces cviol to 0. + # ----------------------------------------------------------------------------------------- # + # The following calculation of step is adopted from NEWUOA/BOBYQA/LINCOA. It seems to improve + # the performance of COBYLA. We also found that removing the precaution about underflows is + # beneficial to the overall performance of COBYLA --- the underflows are harmless anyway. + dd = delta**2 - np.dot(d, d) + ss = np.dot(sdirn, sdirn) + sd = np.dot(sdirn, d) + if dd <= 0 or ss <= EPS * delta**2 or np.isnan(sd): + break + # sqrtd: square root of a discriminant. The max avoids sqrtd < abs(sd) due to underflow + sqrtd = max(np.sqrt(ss*dd + sd**2), abs(sd), np.sqrt(ss * dd)) + if sd > 0: + step = dd / (sqrtd + sd) + else: + step = (sqrtd - sd) / ss + # step < 0 should not happen. Step can be 0 or NaN when, e.g., sd or ss becomes inf + if step <= 0 or not np.isfinite(step): + break + + # Powell's approach and comments are as follows. + # -------------------------------------------------- # + # The two statements below that include the factor eps prevent + # some harmless underflows that occurred in a test calculation + # (Zaikun: here, eps is the machine epsilon; Powell's original + # code used 1.0e-6, and Powell's code was written in single + # precision). Further, we skip the step if it could be 0 within + # a reasonable tolerance for computer rounding errors. + + # !dd = delta**2 - sum(d**2, mask=(abs(d) >= EPS * delta)) + # !ss = inprod(sdirn, sdirn) + # !if (dd <= 0) then + # ! exit + # !end if + # !sd = inprod(sdirn, d) + # !if (abs(sd) >= EPS * sqrt(ss * dd)) then + # ! step = dd / (sqrt(ss * dd + sd**2) + sd) + # !else + # ! step = dd / (sqrt(ss * dd) + sd) + # !end if + # -------------------------------------------------- # + + if stage == 1: + if isminor(cviol, step): + break + step = min(step, cviol) + + # Set dnew to the new variables if step is the steplength, and reduce cviol to the corresponding + # maximum residual if thage 1 is being done + dnew = d + step * sdirn + if stage == 1: + cviol = max(max(b[iact[:nact]] - dnew@A[:, iact[:nact]]), 0) + # N.B.: cviol will be used when calculating vmultd[nact+1:mcon]. + + # Zaikun 20211011: + # 1. vmultd is computed from scratch for the second (out of 2) time in one iteration. + # 2. vmultd[:nact] and vmultd[nact:mcon] are calculated separately with no coupling. + # 3. vmultd will be calculated from scratch again in the next iteration. + # Set vmultd to the vmultc vector that would occur if d became dnew. A device is included to + # force multd[k] = 0 if deviations from this value can be attributed to computer rounding + # errors. First calculate the new Lagrange multipliers. + # vmultd[:nact] = lsqr(A[:, iact[:nact]], dnew, z[:, :nact], zdota[:nact]) + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], dnew)[0] + if stage == 2: + vmultd[nact-1] = max(0, vmultd[nact-1]) # This seems never activated. + # Complete vmultd by finding the new constraint residuals. (Powell wrote "Complete vmultc ...") + cvshift = dnew@A[:, iact] - b[iact] + cviol # Only cvshift[nact+1:mcon] is needed + # TODO: The fortran code has a matprod for two vectors, not sure what that's supposed to return? cvshift is supposed to be a vector + cvsabs = abs(dnew)@abs(A[:, iact]) + abs(b[iact]) + cviol + cvshift[isminor(cvshift, cvsabs)] = 0 + vmultd[nact:mcon] = cvshift[nact:mcon] + + # Calculate the fraction of the step from d to dnew that will be taken + fracmult = [vmultc[i]/(vmultc[i] - vmultd[i]) if vmultd[i] < 0 else REALMAX for i in range(len(vmultd))] + # Only the places with vmultd < 0 are relevant below, if any. + icon = np.where((temp_arr := np.array([1, *fracmult])) == min(temp_arr))[0][0] - 1 + # TODO: The above code is a little strange. It seems like icon could be -1 which in our context is invalid + frac = min(temp_arr) + + # Update d, vmultc, and cviol + dold = d + d = (1 - frac)*d + frac * dnew + # Break in the case of inf/nan in d. + if not np.isfinite(d).all(): + d = dold # Should we restore also iact, nact, vmultc, and z? + break + + vmultc = np.maximum(np.zeros(len(vmultc)), (1 - frac)*vmultc + frac*vmultd) + if stage == 1: + # cviol = (1 - frac) * cvold + frac * cviol # Powell's version + # In theory, cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0), yet the + # cviol updated as above can be quite different from this value if A has huge entries (e.g., > 1e20) + cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + + if icon < 0 or icon >= mcon: + # In Powell's code, the condition is icon == 0. Indeed, icon < 0 cannot hold unless + # fracmult contains only nan, which should not happen; icon >= mcon should never occur. + break + return iact, nact, d, vmultc, z + + + + + diff --git a/python/src/prima/cobyla/update.py b/python/src/prima/cobyla/update.py new file mode 100644 index 0000000000..fda45db27e --- /dev/null +++ b/python/src/prima/cobyla/update.py @@ -0,0 +1,157 @@ +from prima.common.consts import DAMAGING_ROUNDING, INFO_DEFAULT +import numpy as np + + +def updatexfc(jdrop, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi): + ''' + This function revises the simplex by updating the elements of SIM, SIMI, FVAL, CONMAT, and CVAL + ''' + + num_constraints = len(constr) + num_vars = sim.shape[0] + + # Do nothing when JDROP is None. This can only happen after a trust-region step. + if jdrop is None: # JDROP is None is impossible if the input is correct. + return conmat, cval, fval, sim, simi, INFO_DEFAULT + + sim_old = sim + simi_old = simi + if jdrop < num_vars: + sim[:, jdrop] = d + simi_jdrop = simi[jdrop, :] / np.dot(simi[jdrop, :], d) + simi -= np.outer(simi@d, simi_jdrop) + simi[jdrop, :] = simi_jdrop + else: # jdrop == num_vars + sim[:, num_vars] += d + sim[:, :num_vars] -= np.tile(d, (num_vars, 1)).T + simid = simi@d + sum_simi = np.sum(simi, axis=0) + simi += np.outer(simid, sum_simi / (1 - sum(simid))) + + # Check whether SIMI is a poor approximation to the inverse of SIM[:, :NUM_VARS] + # Calculate SIMI from scratch if the current one is damaged by rounding errors. + itol = 1 + erri = np.max(abs(simi@sim[:, :num_vars] - np.eye(num_vars))) # np.max returns NaN if any input is NaN + if erri > 0.1 * itol or np.isnan(erri): + simi_test = np.linalg.inv(sim[:, :num_vars]) + erri_test = np.max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars))) + if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)): + simi = simi_test + erri = erri_test + + # If SIMI is satisfactory, then update FVAL, CONMAT, CVAL, and the pole position. Otherwise restore + # SIM and SIMI, and return with INFO = DAMAGING_ROUNDING. + if erri <= itol: + fval[jdrop] = f + conmat[:, jdrop] = constr + cval[jdrop] = cstrv + # Switch the best vertex to the pole position SIM[:, NUM_VARS] if it is not there already + conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim, simi) + else: + info = DAMAGING_ROUNDING + sim = sim_old + simi = simi_old + + + return sim, simi, fval, conmat, cval, info + +def findpole(cpen, cval, fval): + # This subroutine identifies the best vertex of the current simplex with respect to the merit + # function PHI = F + CPEN * CSTRV. + n = len(fval) - 1 # used for debugging which I have not implemented yet + + # Identify the optimal vertex of the current simplex + jopt = len(fval) - 1 + phi = fval + cpen * cval + phimin = min(phi) + joptcandidate = np.argmin(phi) + if phi[joptcandidate] < phi[jopt]: + jopt = joptcandidate + if cpen <= 0 and any(np.logical_and(cval < cval[jopt], phi <= phimin)): + # jopt is the index of the minimum value of cval + # on the set of cval values where phi <= phimin + jopt = np.where(cval == min(cval[phi <= phimin]))[0][0] + return jopt + + +def updatepole(cpen, conmat, cval, fval, sim, simi): + #--------------------------------------------------------------------------------------------------! + # This subroutine identifies the best vertex of the current simplex with respect to the merit + # function PHI = F + CPEN * CSTRV, and then switch this vertex to SIM[:, NUM_VARS], which Powell called + # the "pole position" in his comments. CONMAT, CVAL, FVAL, and SIMI are updated accordingly. + # + # N.B. 1: In precise arithmetic, the following two procedures produce the same results: + # 1) apply UPDATEPOLE to SIM twice, first with CPEN = CPEN1 and then with CPEN = CPEN2; + # 2) apply UPDATEPOLE to SIM with CPEN = CPEN2. + # In finite-precision arithmetic, however, they may produce different results unless CPEN1 = CPEN2. + # + # N.B. 2: When JOPT == N+1, the best vertex is already at the pole position, so there is nothing to + # switch. However, as in Powell's code, the code below will check whether SIMI is good enough to + # work as the inverse of SIM(:, 1:N) or not. If not, Powell's code would invoke an error return of + # COBYLB; our implementation, however, will try calculating SIMI from scratch; if the recalculated + # SIMI is still of poor quality, then UPDATEPOLE will return with INFO = DAMAGING_ROUNDING, + # informing COBYLB that SIMI is poor due to damaging rounding errors. + # + # N.B. 3: UPDATEPOLE should be called when and only when FINDPOLE can potentially returns a value + # other than N+1. The value of FINDPOLE is determined by CPEN, CVAL, and FVAL, the latter two being + # decided by SIM. Thus UPDATEPOLE should be called after CPEN or SIM changes. COBYLA updates CPEN at + # only two places: the beginning of each trust-region iteration, and when REDRHO is called; + # SIM is updated only by UPDATEXFC, which itself calls UPDATEPOLE internally. Therefore, we only + # need to call UPDATEPOLE after updating CPEN at the beginning of each trust-region iteration and + # after each invocation of REDRHO. + + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + info = INFO_DEFAULT + + # Identify the optimal vertex of the current simplex. + jopt = findpole(cpen, cval, fval) + + # Switch the best vertex to the pole position SIM[:, NUM_VARS] if it is not there already and update + # SIMI. Before the update, save a copy of SIM and SIMI. If the update is unsuccessful due to + # damaging rounding errors, we restore them and return with INFO = DAMAGING_ROUNDING. + sim_old = sim.copy() + simi_old = simi.copy() + if 0 <= jopt < num_vars: + # Unless there is a bug in FINDPOLE it is guaranteed that JOPT >= 0 + # When JOPT == NUM_VARS, there is nothing to switch; in addition SIMI[JOPT, :] will be illegal. + # fval[[jopt, -1]] = fval[[-1, jopt]] + # conmat[:, [jopt, -1]] = conmat[:, [-1, jopt]] # Exchange CONMAT[:, JOPT] AND CONMAT[:, -1] + # cval[[jopt, -1]] = cval[[-1, jopt]] + sim[:, num_vars] += sim[:, jopt] + sim_jopt = sim[:, jopt].copy() + sim[:, jopt] = 0 # np.zeros(num_constraints)? + sim[:, :num_vars] -= np.tile(sim_jopt, (num_vars, 1)).T + # The above update is equivalent to multiplying SIM[:, :NUM_VARS] from the right side by a matrix whose + # JOPT-th row is [-1, -1, ..., -1], while all the other rows are the same as those of the + # identity matrix. It is easy to check that the inverse of this matrix is itself. Therefore, + # SIMI should be updated by a multiplication with this matrix (i.e. its inverse) from the left + # side, as is done in the following line. The JOPT-th row of the updated SIMI is minus the sum + # of all rows of the original SIMI, whereas all the other rows remain unchanged. + simi[jopt, :] = -np.sum(simi, axis=0) + + # Check whether SIMI is a poor approximation to the inverse of SIM[:, :NUM_VARS] + # Calculate SIMI from scratch if the current one is damaged by rounding errors. + erri = np.max(abs(simi@sim[:, :num_vars] - np.eye(num_vars))) # np.max returns NaN if any input is NaN + itol = 1 + if erri > 0.1 * itol or np.isnan(erri): + simi_test = np.linalg.inv(sim[:, :num_vars]) + erri_test = np.max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars))) + if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)): + simi = simi_test + erri = erri_test + + + # If SIMI is satisfactory, then update FVAL, CONMAT, and CVAL. Otherwiae restore SIM and SIMI, and + # return with INFO = DAMAGING_ROUNDING. + if erri <= itol: + if 0 <= jopt < num_vars: + fval[[jopt, num_vars]] = fval[[num_vars, jopt]] + conmat[:, [jopt, num_vars]] = conmat[:, [num_vars, jopt]] + cval[[jopt, num_vars]] = cval[[num_vars, jopt]] + else: # erri > itol or erri is NaN + info = DAMAGING_ROUNDING + sim = sim_old + simi = simi_old + + return conmat, cval, fval, sim, simi, info \ No newline at end of file diff --git a/python/src/prima/common/__init__.py b/python/src/prima/common/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/common/consts.py b/python/src/prima/common/consts.py new file mode 100644 index 0000000000..5b41d77a70 --- /dev/null +++ b/python/src/prima/common/consts.py @@ -0,0 +1,22 @@ +import numpy as np + +REALMIN = np.finfo(float).tiny +REALMAX = np.finfo(float).max +RHOBEG_DEFAULT = 1 +RHOEND_DEFAULT = 1e-6 +FUNCMAX = 2.0**100 +CONSTRMAX = FUNCMAX +EPS = np.finfo(float).eps +CTOL_DEFAULT = np.sqrt(EPS) +CWEIGHT_DEFAULT = 1e8 +FTARGET_DEFAULT = -REALMAX +IPRINT_DEFAULT = False +MAXFUN_DIM_DEFAULT = 500 +INFO_DEFAULT = 0 +SMALL_TR_RADIUS = 0 # Why is this same as default? +MAXTR_REACHED = 20 +DAMAGING_ROUNDING = 7 +NAN_INF_X = -1 +NAN_INF_F = -2 +FTARGET_ACHIEVED = 1 +MAXFUN_REACHED = 3 \ No newline at end of file diff --git a/python/src/prima/common/evaluate.py b/python/src/prima/common/evaluate.py new file mode 100644 index 0000000000..d16cabf419 --- /dev/null +++ b/python/src/prima/common/evaluate.py @@ -0,0 +1,36 @@ +import numpy as np +from prima.common.consts import FUNCMAX, CONSTRMAX + +# This is a module evaluating the objective/constraint function with +# Nan/Inf handling. + + +def moderatex(x): + np.nan_to_num(x, copy=False, nan=FUNCMAX) + x = np.clip(x, -np.finfo(float).max, np.finfo(float).max) + return x + +def moderatef(f): + f = FUNCMAX if np.isnan(f) else f + return min(FUNCMAX, f) + +def moderatec(c): + np.nan_to_num(c, copy=False, nan=-CONSTRMAX) + c = np.clip(c, -CONSTRMAX, CONSTRMAX) + return c + + +def evaluate(calfun, x): + """ + This function evaluates calfun at x, setting to f the objective function value. + Nan/Inf are handled by a moderated extreme barrier + """ + + assert not any(np.isnan(x)), "x contains NaN" + + f, constr = calfun(moderatex(x)) + f = moderatef(f) + constr = moderatec(constr) + cstrv = max([*-constr, 0]) + + return f, constr, cstrv \ No newline at end of file diff --git a/python/src/prima/common/linalg.py b/python/src/prima/common/linalg.py new file mode 100644 index 0000000000..a9b8810588 --- /dev/null +++ b/python/src/prima/common/linalg.py @@ -0,0 +1,112 @@ +import numpy as np +from prima.common.consts import EPS, REALMAX, REALMIN + +def planerot(x): + ''' + As in MATLAB, planerot(x) returns a 2x2 Givens matrix G for x in R2 so that Y=G@x has Y[1] = 0. + Roughly speaking, G = np.array([[x[0]/R, x[1]/R], [-x[1]/R, x[0]/R]]), where R = np.linalg.norm(x). + 0. We need to take care of the possibilities of R=0, Inf, NaN, and over/underflow. + 1. The G defined above is continuous with respect to X except at 0. Following this definition, + G = np.array([[np.sign(x[0]), 0], [0, np.sign(x[0])]]) if x[1] == 0, + G = np.array([[0, np.sign(x[1])], [np.sign(x[1]), 0]]) if x[0] == 0 + Yet some implementations ignore the signs, leading to discontinuity and numberical instability. + 2. Difference from MATLAB: if x contains NaN of consists of only Inf, MATLAB returns a NaN matrix, + but we return an identity matrix or a matrix of +/-np.sqrt(2). We intend to keep G always orthogonal. + ''' + + # Preconditions + assert len(x) == 2, "x must be a 2-vector" + + # ================== + # Calculation starts + # ================== + + # Define C = X(1) / R and S = X(2) / R with R = HYPOT(X(1), X(2)). Handle Inf/NaN, over/underflow. + if (any(np.isnan(x))): + # In this case, MATLAB sets G to NaN(2, 2). We refrain from doing so to keep G orthogonal. + c = 1 + s = 0 + elif (all(np.isinf(x))): + # In this case, MATLAB sets G to NaN(2, 2). We refrain from doing so to keep G orthogonal. + c = 1 / np.sqrt(2) * np.sign(x[0]) + s = 1 / np.sqrt(2) * np.sign(x[1]) + elif (abs(x[0]) <= 0 and abs(x[1]) <= 0): # X(1) == 0 == X(2). + c = 1 + s = 0 + elif (abs(x[1]) <= EPS * abs(x[0])): + # N.B.: + # 0. With <= instead of <, this case covers X(1) == 0 == X(2), which is treated above separately + # to avoid the confusing SIGN(., 0) (see 1). + # 1. SIGN(A, 0) = ABS(A) in Fortran but sign(0) = 0 in MATLAB, Python, Julia, and R# + # 2. Taking SIGN(X(1)) into account ensures the continuity of G with respect to X except at 0. + c = np.sign(x[0]) + s = 0 + elif (abs(x[0]) <= EPS * abs(x[1])): + # N.B.: SIGN(A, X) = ABS(A) * sign of X /= A * sign of X # Therefore, it is WRONG to define G + # as SIGN(RESHAPE([ZERO, -ONE, ONE, ZERO], [2, 2]), X(2)). This mistake was committed on + # 20211206 and took a whole day to debug# NEVER use SIGN on arrays unless you are really sure. + c = 0 + s = np.sign(x[1]) + else: + # Here is the normal case. It implements the Givens rotation in a stable & continuous way as in: + # Bindel, D., Demmel, J., Kahan, W., and Marques, O. (2002). On computing Givens rotations + # reliably and efficiently. ACM Transactions on Mathematical Software (TOMS), 28(2), 206-238. + # N.B.: 1. Modern compilers compute SQRT(REALMIN) and SQRT(REALMAX/2.1) at compilation time. + # 2. The direct calculation without involving T and U seems to work better; use it if possible. + if (all(np.logical_and(np.sqrt(REALMIN) < np.abs(x), np.abs(x) < np.sqrt(REALMAX / 2.1)))): + # Do NOT use HYPOTENUSE here; the best implementation for one may be suboptimal for the other + r = np.linalg.norm(x) + c = x[0] / r + s = x[1] / r + elif (abs(x[0]) > abs(x[1])): + t = x[1] / x[0] + u = max(1, abs(t), np.sqrt(1 + t**2)) # MAXVAL: precaution against rounding error. + u *= np.sign(x[0]) ##MATLAB: u = sign(x(1))*sqrt(1 + t**2) + c = 1 / u + s = t / u + else: + t = x[0] / x[1] + u = max([1, abs(t), np.sqrt(1 + t**2)]) # MAXVAL: precaution against rounding error. + u *= np.sign(x[1]) ##MATLAB: u = sign(x(2))*sqrt(1 + t**2) + c = t / u + s = 1 / u + + G = np.array([[c, s], [-s, c]]) # MATLAB: G = [c, s; -s, c] + + #====================# + # Calculation ends # + #====================# + + # Postconditions + assert G.shape == (2,2) + assert np.all(np.isfinite(G)) + assert abs(G[0, 0] - G[1, 1]) + abs(G[0, 1] + G[1, 0]) <= 0 + tol = max(1.0E-10, min(1.0E-1, 1.0E6 * EPS)) + # assert isorth(G, tol) # TODO: Implement isorth + if all(np.logical_and(np.isfinite(x), np.abs(x) < np.sqrt(REALMAX / 2.1))): + r = np.linalg.norm(x) + assert max(abs(G@x - [r, 0])) <= max(tol, tol * r), 'G @ X = [||X||, 0]' + + return G + + +def isminor(x, ref): + ''' + This function tests whether x is minor compared to ref. It is used by Powell, e.g., in COBYLA. + In precise arithmetic, isminor(x, ref) is true if and only if x == 0; in floating point + arithmetic, isminor(x, ref) is true if x is 0 or its nonzero value can be attributed to + computer rounding errrors according to ref. + Larger sensitivity means the function is more strict/precise, the value 0.1 being due to Powell. + + For example: + isminor(1e-20, 1e300) -> True, because in floating point arithmetic 1e-20 cannot be added to + 1e300 without being rounded to 1e300. + isminor(1e300, 1e-20) -> False, because in floating point arithmetic adding 1e300 to 1e-20 + dominates the latter number. + isminor(3, 4) -> False, because 3 can be added to 4 without being rounded off + ''' + + sensitivity = 0.1 + refa = abs(ref) + sensitivity * abs(x) + refb = abs(ref) + 2 * sensitivity * abs(x) + return np.logical_or(abs(ref) >= refa, refa >= refb) diff --git a/python/src/prima/common/output.py b/python/src/prima/common/output.py new file mode 100644 index 0000000000..151b150eb9 --- /dev/null +++ b/python/src/prima/common/output.py @@ -0,0 +1,20 @@ +def fmsg(solver, iprint, nf, f, x, cstrv=None, constr=None): + if abs(iprint) < 3: + return + elif iprint > 0: + output = "stdout" + else: + output = "file" + filename = solver + "_output.txt" + + # Decide whether the problem is truly constrained. + if constr is None: + is_constrained = cstrv is not None + else: + is_constrained = bool(len(constr)) + + # Decide the constraint violation + + # Will finish this later, not important for now + + diff --git a/python/src/prima/common/powalg.py b/python/src/prima/common/powalg.py new file mode 100644 index 0000000000..a44b37bc6e --- /dev/null +++ b/python/src/prima/common/powalg.py @@ -0,0 +1,118 @@ +import numpy as np +from prima.common.linalg import isminor, planerot +from prima.common.consts import EPS + + +def qradd_Rdiag(c, Q, Rdiag, n): + ''' + This function updates the QR factorization of an MxN matrix A of full column rank, attempting to + add a new column C to this matrix as the LAST column while maintaining the full-rankness. + Case 1. If C is not in range(A) (theoretically, it implies N < M), then the new matrix is np.hstack([A, C]) + Case 2. If C is in range(A), then the new matrix is np.hstack([A[:, :n-1], C]) + N.B.: + 0. Instead of R, this subroutine updates Rdiag, which is np.diag(R), with a size at most M and at + least min(m, n+1). The number is min(m, n+1) rather than min(m, n) as n may be augmented by 1 in + the function. + 1. With the two cases specified as above, this function does not need A as an input. + 2. The function changes only Q[:, nsave:m] (nsave is the original value of n) and + R[:, n-1] (n takes the updated value) + 3. Indeed, when C is in range(A), Powell wrote in comments that "set iOUT to the index of the + constraint (here, coloumn of A --- Zaikun) to be deleted, but branch if no suitable index can be + found". The idea is to replace a column of A by C so that the new matrix still has full rank + (such a column must exist unless C = 0). But his code essentially sets iout=n always. Maybe he + found this worked well enough in practice. Meanwhile, Powell's code includes a snippet that can + never be reached, which was probably intended to deal with the case that IOUT != n + ''' + m = Q.shape[1] + nsave = n # Needed for debugging (only) + + # As in Powell's COBYLA, CQ is set to 0 at the positions with CQ being negligible as per ISMINOR. + # This may not be the best choice if the subroutine is used in other contexts, e.g. LINCOA. + cq = c @ Q + cqa = abs(c) @ abs(Q) + # The line below basically makes an element of cq 0 if adding it to the corresponding element of + # cqa does not change the latter. + cq = np.array([0 if isminor(cqi, cqai) else cqi for cqi, cqai in zip(cq, cqa)]) + + # Update Q so that the columns of Q[:, n+1:m] are orthogonal to C. This is done by applying a 2D + # Givens rotation to Q[:, [k, k+1]] from the right to zero C' @ Q[:, k+1] out for K=n+1, ... m-1. + # Nothing will be done if n >= m-1 + for k in range(m-2, n-1, -1): + if abs(cq[k+1]) > 0: + # Powell wrote cq[k+1] != 0 instead of abs. The two differ if cq[k+1] is NaN. + # If we apply the rotation below when cq[k+1] = 0, then cq[k] will get updated to |cq[k]|. + G = planerot(cq[k:k+2]) + Q[:, [k, k+1]] = Q[:, [k, k+1]] @ G.T + cq[k] = np.linalg.norm(cq[k:k+2]) + + # Augment n by 1 if C is not in range(A) + # The two ifs cannot be merged as Fortran may evaluate cq[n+1] even if n>=m, leading to a segfault. + # TODO: Reword the above as appropriate for Python. RangeError as opposed to segafult perhaps + if n < m: + # Powell's condition for the following if: cq[n+1] != 0 + if abs(cq[n]) > EPS*2 and not isminor(cq[n], cqa[n]): + n += 1 + + # Update Rdiag so that Rdiag[n] = cq[n] = npdot(c, q[:, n]). Note that N may be been augmented. + if n - 1 >= 0 and n - 1 < m: # n >= m should not happen unless the input is wrong + Rdiag[n - 1] = cq[n - 1] + return Q, Rdiag, n + + +def qrexc_Rdiag(A, Q, Rdiag, i): # Used in COBYLA + ''' + This function updates the QR factorization for an MxN matrix A=Q@R so that the updated Q and + R form a QR factorization of [A_0, ..., A_{I-1}, A_{I+1}, ..., A_{N-1}, A_I] which is the matrix + obtained by rearranging columns [I, I+1, ... N-1] of A to [I+1, ..., N-1, I]. Here A is ASSUMED TO + BE OF FULL COLUMN RANK, Q is a matrix whose columns are orthogonal, and R, which is not present, + is an upper triangular matrix whose diagonal entries are nonzero. Q and R need not be square. + N.B.: + 0. Instead of R, this function updates Rdiag, which is np.diag(R), the size being n. + 1. With L = Q.shape[1] = R.shape[0], we have M >= L >= N. Most often L = M or N. + 2. This function changes only Q[:, i:] and Rdiag[i:] + 3. (NDB 20230919) In Python, i is either icon or nact - 2, whereas in FORTRAN it is either icon or nact - 1. + ''' + + # Sizes + m, n = A.shape + + # Preconditions + assert n >= 1 and n <= m + assert i >= 0 and i < n + assert len(Rdiag) == n + assert Q.shape[0] == m and Q.shape[1] >= n and Q.shape[1] <= m + # tol = max(1.0E-8, min(1.0E-1, 1.0E8 * EPS * m + 1)) + # assert isorth(Q, tol) # Costly! + + + if i < 0 or i >= n: + return Q, Rdiag + + # Let R be the upper triangular matrix in the QR factorization, namely R = Q.T@A. + # For each k, find the Givens rotation G with G@(R[k:k+2, :]) = [hypt, 0], and update Q[:, k:k+2] + # to Q[:, k:k+2]@(G.T). Then R = Q.T@A is an upper triangular matrix as long as A[:, [k, k+1]] is + # updated to A[:, [k+1, k]]. Indeed, this new upper triangular matrix can be obtained by first + # updating R[[k, k+1], :] to G@(R[[k, k+1], :]) and then exchanging its columns K and K+1; at the same + # time, entries k and k+1 of R's diagonal Rdiag become [hypt, -(Rdiag[k+1]/hypt)*RDiag[k]]. + # After this is done for each k = 0, ..., n-2, we obtain the QR factorization of the matrix that + # rearranges columns [i, i+1, ... n-1] of A as [i+1, ..., n-1, i]. + # Powell's code, however, is slightly different: before everything, he first exchanged columns k and + # k+1 of Q (as well as rows k and k+1 of R). This makes sure that the entries of the update Rdiag + # are all positive if it is the case for the original Rdiag. + for k in range(i, n-1): + G = planerot([Rdiag[k+1], np.dot(Q[:, k], A[:, k+1])]) + Q[:, [k, k+1]] = Q[:, [k+1, k]]@(G.T) + # Powell's code updates Rdiag in the following way: + # hypt = np.sqrt(Rdiag[k+1]**2 + np.dot(Q[:, k], A[:, k+1])**2) + # Rdiag[[k, k+1]] = [hypt, (Rdiag[k+1]/hypt)*Rdiag[k]] + # Note that Rdiag[n-1] inherits all rounding in Rdiag[i:n-1] and Q[:, i:n-1] and hence contains + # significant errors. Thus we may modify Powell's code to set only Rdiag[k] = hypt here and then + # calculate Rdiag[n] by an inner product after the loop. Nevertheless, we simple calculate RDiag + # from scratch below. + + # Calculate Rdiag(i:n) from scratch + Rdiag[i:n-1] = [np.dot(Q[:, k], A[:, k+1]) for k in range(i, n-1)] + Rdiag[n-1] = np.dot(Q[:, n-1], A[:, i]) + # Rdiag = np.array(Rdiag) + + return Q, Rdiag \ No newline at end of file diff --git a/python/src/prima/common/selectx.py b/python/src/prima/common/selectx.py new file mode 100644 index 0000000000..22586cab1f --- /dev/null +++ b/python/src/prima/common/selectx.py @@ -0,0 +1,189 @@ +''' +This module provides subroutines that ensure the returned X is optimal among all the calculted +points in the sense that no other point achieves both lower function value and lower constraint +violation at the same time. This module is needed only in the constrained case. + +Coded by Zaikun ZHANG (www.zhangzk.net). + +Python implementation by Nickolai Belakovski +''' + +import numpy as np +import numpy.typing as npt +from prima.common.consts import EPS, CONSTRMAX, REALMAX, FUNCMAX + +def isbetter(f1: float, c1: float, f2: float, c2: float, ctol: float) -> bool: + ''' + This function compares whether FC1 = (F1, C1) is (strictly) better than FC2 = (F2, C2), which + basically means that (F1 < F2 and C1 <= C2) or (F1 <= F2 and C1 < C2). + It takes care of the cases where some of these values are NaN or Inf, even though some cases + should never happen due to the moderated extreme barrier. + At return, BETTER = TRUE if and only if (F1, C1) is better than (F2, C2). + Here, C means constraint violation, which is a nonnegative number. + ''' + isbetter = False + # Even though NaN/+Inf should not occur in FC1 or FC2 due to the moderated extreme barrier, for + # security and robustness, the code below does not make this assumption. + isbetter = isbetter or (any(np.isnan([f1, c1])) and not any(np.isnan([f2, c2]))) + + isbetter = isbetter or (f1 < f2 and c1 <= c2) + isbetter = isbetter or (f1 <= f2 and c1 < c2) + + # If C1 <= CTOL and C2 is significantly larger/worse than CTOL, i.e., C2 > MAX(CTOL,CREF), + # then FC1 is better than FC2 as long as F1 < REALMAX. Normally CREF >= CTOL so MAX(CTOL, CREF) + # is indeed CREF. However, this may not be true if CTOL > 1E-1*CONSTRMAX. + cref = 10 * max(EPS, min(ctol, 1.0E-2 * CONSTRMAX)) # The MIN avoids overflow. + isbetter = isbetter or (f1 < REALMAX and c1 <= ctol and (c2 > max(ctol, cref) or np.isnan(c2))) + return isbetter + + +def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None, confilt=None): + ''' + This subroutine saves X, F, and CSTRV in XFILT, FFILT, and CFILT (and CONSTR in CONFILT + if they are present), unless a vector in XFILT[:, :NFILT] is better than X. + If X is better than some vectors in XFLIT[:, :NFILT] then these vectors will be + removed. If X is not better than any of XFILT[:, :NFILT], but NFILT == MAXFILT, + then we remove a column from XFILT according to the merit function + PHI = FFILT + CWEIGHT * max(CFILT - CTOL, 0) + N.B.: + 1. Only XFILT[:, :NFILT] and FFILT[:, :NFILT] etc contains valid information, + while XFILT[:, NFILT+1:MAXFILT] and FFILT[:, NFILT+1:MAXFILT] etc are not + initialized yet. + 2. We decide whether and X is better than another by the ISBETTER function + ''' + + # Sizes + if constr is not None: + num_constraints = len(constr) + else: + num_constraints = 0 + + num_vars = len(x) + maxfilt = len(ffilt) + + # Preconditions TODO + + # Return immeditely if any column of XFILT is better than X. TODO: Lazy evaluation + if any([isbetter(ffilt_i, cfilt_i, f, cstrv, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]): + return nfilt, cfilt, ffilt, xfilt, confilt + + # Decide which columns of XFILT to keep. + keep = np.logical_not([isbetter(f, cstrv, ffilt_i, cfilt_i, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]) + + # If NFILT == MAXFILT and X is not better than any column of XFILT, then we remove the worst column + # of XFILT according to the merit function PHI = FFILT + CWEIGHT * MAX(CFILT - CTOL, ZERO). + if sum(keep) == maxfilt: # In this case, NFILT = SIZE(KEEP) = COUNT(KEEP) = MAXFILT > 0. + # TODO: This code path has not been vetted + cfilt_shifted = np.maximum(cfilt - ctol, 0) + if cweight <= 0: + phi = ffilt + elif np.isposinf(cweight): + phi = cfilt_shifted + # We should not use CFILT here; if MAX(CFILT_SHIFTED) is attained at multiple indices, then + # we will check FFILT to exhaust the remaining degree of freedom. + else: + phi = np.maximum(ffilt, -REALMAX) + phi = np.nan_to_num(phi, nan=-REALMAX) # Replace NaN with -REALMAX and +/- inf with large numbers + phi += cweight * cfilt_shifted + # We select X to maximize PHI. In case there are multiple maximizers, we take the one with the + # largest CSTRV_SHIFTED; if there are more than one choices, we take the one with the largest F; + # if there are several candidates, we take the one with the largest CSTRV; if the last comparison + # still leads to more than one possibilities, then they are equally bad and we choose the first. + # N.B.: + # 1. This process is the opposite of selecting KOPT in SELECTX. + # 2. In finite-precision arithmetic, PHI_1 == PHI_2 and CSTRV_SHIFTED_1 == CSTRV_SHIFTED_2 do + # not ensure that F_1 == F_2! + phimax = max(phi) + cref = max(cfilt_shifted[phi >= phimax]) + fref = max(ffilt[cfilt_shifted >= cref]) + kworst = np.where(cfilt == max(cfilt[ffilt <= fref]))[0][0] + if kworst < 0 or kworst >= len(keep): # For security. Should not happen. + kworst = 0 + keep[kworst] = False + + # Keep the good xfilt values and remove all the ones that are strictly worse than the new x. + nfilt = sum(keep) + index_to_keep = np.where(keep)[0] + xfilt[:, :nfilt] = xfilt[:, index_to_keep] + ffilt[:nfilt] = ffilt[index_to_keep] + cfilt[:nfilt] = cfilt[index_to_keep] + if confilt is not None and constr is not None: + confilt[:, :nfilt] = confilt[:, index_to_keep] + + # Once we have removed all the vectors that are strictly worse than x, + # we add x to the filter. + xfilt[:, nfilt] = x + ffilt[nfilt] = f + cfilt[nfilt] = cstrv + if confilt is not None and constr is not None: + confilt[:, nfilt] = constr + nfilt += 1 # In Python we need to increment the index afterwards + + return nfilt, cfilt, ffilt, xfilt, confilt + + +def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float): + ''' + This subroutine selects X according to FHIST and CHIST, which represents (a part of) history + of F and CSTRV. Normally, FHIST and CHIST are not the full history but only a filter, e.g. ffilt + and CFILT generated by SAVEFILT. However, we name them as FHIST and CHIST because the [F, CSTRV] + in a filter should not dominate each other, but this subroutine does NOT assume such a property. + N.B.: CTOL is the tolerance of the constraint violation (CSTRV). A point is considered feasible if + its constraint violation is at most CTOL. Not that CTOL is absolute, not relative. + ''' + + nhist = len(fhist) + + # We select X among the points with F < FREF and CSTRV < CREF. + # Do NOT use F <= FREF, because F == FREF (FUNCMAX or REALMAX) may mean F == INF in practice! + if any(np.logical_and(fhist < FUNCMAX, chist < CONSTRMAX)): + fref = FUNCMAX + cref = CONSTRMAX + elif any(np.logical_and(fhist < REALMAX, chist < CONSTRMAX)): + fref = REALMAX + cref = CONSTRMAX + elif any(np.logical_and(fhist < FUNCMAX, chist < REALMAX)): + fref = FUNCMAX + cref = REALMAX + else: + fref = REALMAX + cref = REALMAX + + if not any(np.logical_and(fhist < fref, chist < cref)): + kopt = nhist - 1 + else: + # Shift the constraint violations by ctol, so that cstrv <= ctol is regarded as no violation. + chist_shifted = np.maximum(chist - ctol, 0) + # cmin is the minimal shift constraint violation attained in the history. + cmin = min(chist_shifted[fhist < fref]) + # We consider only the points whose shifted constraint violations are at most the cref below. + # N.B.: Without taking max(EPS, .), cref would be 0 if cmin = 0. In that case, asking for + # cstrv_shift < cref would be WRONG! + cref = max(EPS, 2*cmin) + # We use the following phi as our merit function to select X. + if cweight <= 0: + phi = fhist + elif np.isposinf(cweight): + phi = chist_shifted + # We should not use chist here; if min(chist_shifted) is attained at multiple indices, then + # we will check fhist to exhaust the remaining degree of freedom. + else: + phi = np.maximum(fhist, -REALMAX) + cweight * chist_shifted + # max(fhist, -REALMAX) makes sure that phi will not contain NaN (unless there is a bug). + + # We select X to minimize phi subject to f < fref and cstrv_shift <= cref (see the comments + # above for the reason of taking "<" and "<=" in these two constraints). In case there are + # multiple minimizers, we take the one with the least cstrv_shift; if there is more than one + # choice, we take the one with the least f; if there are several candidates, we take the one + # with the least cstrv; if the last comparison still leads to more than one possibility, then + # they are equally good and we choose the first. + # N.B.: + # 1. This process is the opposite of selecting kworst in savefilt + # 2. In finite-precision arithmetic, phi_2 == phi_2 and cstrv_shift_1 == cstrv_shifted_2 do + # not ensure thatn f_1 == f_2! + phimin = min(phi[np.logical_and(fhist < fref, chist_shifted <= cref)]) + cref = min(chist_shifted[np.logical_and(fhist < fref, phi <= phimin)]) + fref = min(fhist[chist_shifted <= cref]) + kopt = np.where(chist == min(chist[fhist <= fref]))[0][0] + + return kopt \ No newline at end of file diff --git a/python/src/prima/examples/cobyla/cobyla_example.py b/python/src/prima/examples/cobyla/cobyla_example.py new file mode 100644 index 0000000000..9be2f566e3 --- /dev/null +++ b/python/src/prima/examples/cobyla/cobyla_example.py @@ -0,0 +1,325 @@ +import numpy as np +from prima.common.evaluate import evaluate +from prima.common.consts import EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ + CTOL_DEFAULT, CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT, \ + MAXFUN_DIM_DEFAULT +from prima.cobyla.cobylb import cobylb + +debugging = True + +def test_chebyquad(): + f, _ = calcfc_chebyquad([1, 2]) + assert np.isclose(f, 91+1/9, atol=1e-6) + + +def calcfc_chebyquad(x): + x = np.array(x) + n = len(x) # or shape? + y = np.zeros((n + 1, n + 1)) + y[0:n, 0] = 1 + y[0:n, 1] = 2*x - 1 + for i in range(1, n): + y[0:n, i+1] = 2*y[0:n, 1] * y[0:n, i] - y[0:n, i - 1] + + f = 0 + for i in range(n+1): + tmp = sum(y[0:n, i]) / n + if i % 2 == 0: + tmp += 1/((i)**2 - 1) + f += tmp**2 + constr = np.zeros(1) + return f, constr + +def calcfc_hexagon(x): + # Test problem 10 in Powell's original algorithm + + assert len(x) == 9 + + f = -0.5 * (x[0] * x[3] - x[1] * x[2] + x[2] * x[8] - x[4] * x[8] + x[4] * x[7] - x[5] * x[6]) + constr = np.zeros(14) + constr[0] = 1 - x[2]**2 - x[3]**2 + constr[1] = 1 - x[8]**2 + constr[2] = 1 - x[4]**2 - x[5]**2 + constr[3] = 1 - x[1-1]**2 - (x[1] - x[8])**2 + constr[4] = 1 - (x[1-1] - x[4])**2 - (x[1] - x[5])**2 + constr[5] = 1 - (x[1-1] - x[6])**2 - (x[1] - x[7])**2 + constr[6] = 1 - (x[2] - x[4])**2 - (x[3] - x[5])**2 + constr[7] = 1 - (x[2] - x[6])**2 - (x[3] - x[7])**2 + constr[8] = 1 - x[6]**2 - (x[7] - x[8])**2 + constr[9] = x[1-1] * x[3] - x[1] * x[2] + constr[10] = x[2] * x[8] + constr[11] = -x[4] * x[8] + constr[12] = x[4] * x[7] - x[5] * x[6] + constr[13] = x[8] + + return f, constr + +def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, rhobeg=None, + rhoend=None, ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT, + maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None, gamma1=0.5, gamma2=2, + xhist=False, fhist=False, chist=False, conhist=False, maxhist=0, maxfilt=2000): + """ + Among all the arguments, only CALCFC, M, X, and F are obligatory. The others are OPTIONAL and you + can neglect them unless you are familiar with the algorithm. Any unspecified optional input will + take the default value detailed below. For instance, we may invoke the solver as follows. + + ! First define CALCFC, M, and X, and then do the following. + call cobyla(calcfc, m, x, f) + + or + + ! First define CALCFC, M, and X, and then do the following. + call cobyla(calcfc, m, x, f, rhobeg = 0.5D0, rhoend = 1.0D-3, maxfun = 100) + + ! IMPORTANT NOTICE: The user must set M correctly to the number of constraints, namely the size of + ! CONSTR introduced below. Set M to 0 if there is no constraint. + + See examples/cobyla_exmp.py for a concrete example. + + A detailed introduction to the arguments is as follows. + N.B.: RP and IK are defined in the module CONSTS_MOD. See consts.F90 under the directory named + "common". By default, RP = kind(0.0D0) and IK = kind(0), with REAL(RP) being the double-precision + real, and INTEGER(IK) being the default integer. For ADVANCED USERS, RP and IK can be defined by + setting PRIMA_REAL_PRECISION and PRIMA_INTEGER_KIND in common/ppf.h. Use the default if unsure. + + CALCFC + Input, subroutine. + CALCFC(X, F, CONSTR) should evaluate the objective function and constraints at the given + REAL(RP) vector X; it should set the objective function value to the REAL(RP) scalar F and the + constraint value to the REAL(RP) vector CONSTR. It must be provided by the user, and its + definition must conform to the following interface: + !-------------------------------------------------------------------------! + subroutine calcfc(x, f, constr) + real(RP), intent(in) :: x(:) + real(RP), intent(out) :: f + real(RP), intent(out) :: constr(:) + end subroutine calcfc + !-------------------------------------------------------------------------! + Besides, the subroutine should NOT access CONSTR beyond CONSTR(1:M), where M is the second + compulsory argument (see below), signifying the number of constraints. + + M + Input, INTEGER(IK) scalar. + M must be set to the number of constraints, namely the size (length) of CONSTR(X). + N.B.: + 1. M must be specified correctly, or the program will crash!!! + 2. Why don't we define M as optional and default it to 0 when it is absent? This is because + we need to allocate memory for CONSTR_LOC according to M. To ensure that the size of CONSTR_LOC + is correct, we require the user to specify M explicitly. + + X + Input and output, REAL(RP) vector. + As an input, X should be an N-dimensional vector that contains the starting point, N being the + dimension of the problem. As an output, X will be set to an approximate minimizer. + + F + Output, REAL(RP) scalar. + F will be set to the objective function value of X at exit. + + CSTRV + Output, REAL(RP) scalar. + CSTRV will be set to the constraint violation of X at exit, i.e., MAXVAL([-CONSTR(X), 0]). + + CONSTR + Output, ALLOCATABLE REAL(RP) vector. + CONSTR will be set to the constraint value of X at exit. + + F0 + Input, REAL(RP) scalar. + F0, if present, should be set to the objective function value of the starting X. + + CONSTR0 + Input, REAL(RP) vector. + CONSTR0, if present, should be set to the constraint value of the starting X; in addition, + SIZE(CONSTR0) must be M, or the solver will abort. + + NF + Output, INTEGER(IK) scalar. + NF will be set to the number of calls of CALCFC at exit. + + RHOBEG, RHOEND + Inputs, REAL(RP) scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to + the initial and final values of a trust-region radius, both being positive and RHOEND <= RHOBEG. + Typically RHOBEG should be about one tenth of the greatest expected change to a variable, and + RHOEND should indicate the accuracy that is required in the final values of the variables. + + FTARGET + Input, REAL(RP) scalar, default: -Inf. + FTARGET is the target function value. The algorithm will terminate when a feasible point with a + function value <= FTARGET is found. + + CTOL + Input, REAL(RP) scalar, default: machine epsilon. + CTOL is the tolerance of constraint violation. Any X with MAXVAL(-CONSTR(X)) <= CTOL is + considered feasible. + N.B.: 1. CTOL is absolute, not relative. 2. CTOL is used only when selecting the returned X. + It does not affect the iterations of the algorithm. + + CWEIGHT + Input, REAL(RP) scalar, default: CWEIGHT_DFT defined in the module CONSTS_MOD in common/consts.F90. + CWEIGHT is the weight that the constraint violation takes in the selection of the returned X. + + MAXFUN + Input, INTEGER(IK) scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in the module + CONSTS_MOD (see common/consts.F90). MAXFUN is the maximal number of calls of CALCFC. + + IPRINT + Input, INTEGER(IK) scalar, default: 0. + The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls how much + information will be printed during the computation: + 0: there will be no printing; + 1: a message will be printed to the screen at the return, showing the best vector of variables + found and its objective function value; + 2: in addition to 1, each new value of RHO is printed to the screen, with the best vector of + variables so far and its objective function value; each new value of CPEN is also printed; + 3: in addition to 2, each function evaluation with its variables will be printed to the screen; + -1, -2, -3: the same information as 1, 2, 3 will be printed, not to the screen but to a file + named COBYLA_output.txt; the file will be created if it does not exist; the new output will + be appended to the end of this file if it already exists. + Note that IPRINT = +/-3 can be costly in terms of time and/or space. + + ETA1, ETA2, GAMMA1, GAMMA2 + Input, REAL(RP) scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2. + ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the trust-region radius + detailed in the subroutine TRRAD in trustregion.f90. Roughly speaking, the trust-region radius + is contracted by a factor of GAMMA1 when the reduction ratio is below ETA1, and enlarged by a + factor of GAMMA2 when the reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 + < 1 and 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set ETA1 >= 0.5. + + XHIST, FHIST, CHIST, CONHIST, MAXHIST + XHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; + FHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; + CHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; + CONHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; + MAXHIST: Input, INTEGER(IK) scalar, default: MAXFUN + XHIST, if present, will output the history of iterates; FHIST, if present, will output the + history function values; CHIST, if present, will output the history of constraint violations; + CONHIST, if present, will output the history of constraint values; MAXHIST should be a + nonnegative integer, and XHIST/FHIST/CHIST/CONHIST will output only the history of the last + MAXHIST iterations. Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output nothing, + while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to output all the history. + If XHIST is present, its size at exit will be (N, min(NF, MAXHIST)); if FHIST is present, its + size at exit will be min(NF, MAXHIST); if CHIST is present, its size at exit will be + min(NF, MAXHIST); if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)). + + IMPORTANT NOTICE: + Setting MAXHIST to a large value can be costly in terms of memory for large problems. + MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM defined in + CONSTS_MOD (see consts.F90 under the directory named "common"). + Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large problems). + + MAXFILT + Input, INTEGER(IK) scalar. + MAXFILT is a nonnegative integer indicating the maximal length of the filter used for selecting + the returned solution; default: MAXFILT_DFT (a value lower than MIN_MAXFILT is not recommended); + see common/consts.F90 for the definitions of MAXFILT_DFT and MIN_MAXFILT. + + INFO + Output, INTEGER(IK) scalar. + INFO is the exit flag. It will be set to one of the following values defined in the module + INFOS_MOD (see common/infos.f90): + SMALL_TR_RADIUS: the lower bound for the trust region radius is reached; + FTARGET_ACHIEVED: the target function value is reached; + MAXFUN_REACHED: the objective function has been evaluated MAXFUN times; + MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN); + NAN_INF_X: NaN or Inf occurs in X; + DAMAGING_ROUNDING: rounding errors are becoming damaging. + !--------------------------------------------------------------------------! + The following case(s) should NEVER occur unless there is a bug. + NAN_INF_F: the objective function returns NaN or +Inf; + NAN_INF_MODEL: NaN or Inf occurs in the model; + TRSUBP_FAILED: a trust region step failed to reduce the model + !--------------------------------------------------------------------------! + """ + if debugging: + # TODO: need to check how to figure out if python variables are present + # I guess the way to go is to set the default to None + assert (f0 is None) == (constr0 is None), "f0 and constr0 must be both present or both absent" + + n = len(x) + + assert len(constr0) == m if constr0 is not None else True, "len(constr0) != m" + + constr_loc = np.zeros(m) + if f0 is not None and constr0 is not None and all(np.isfinite(x)): + f = f0 + constr_loc = constr0 + else: + # Replace all NaNs with 0s and clip all -inf/inf to -floatmax/floatmax + np.nan_to_num(x, copy=False, neginf=-np.finfo(float).max, posinf=np.finfo(float).max) + f, constr_loc, cstrv_loc = evaluate(calcfc, x) + + if rhobeg is not None: + rhobeg_loc = rhobeg + elif rhoend is not None and np.isfinite(rhoend) and rhoend > 0: + rhobeg_loc = max(10*rhoend, RHOBEG_DEFAULT) + else: + rhobeg_loc = RHOBEG_DEFAULT + + if rhoend is not None: + rhoend_loc = rhoend + elif rhobeg_loc > 0: + rhoend_loc = max(EPS, min(0.1*rhobeg_loc, RHOEND_DEFAULT)) + else: + rhoend_loc = RHOEND_DEFAULT + + maxfun = maxfun if maxfun is not None else MAXFUN_DIM_DEFAULT*n + + if eta1 is not None: + eta1_loc = eta1 + elif eta2 is not None and 0 < eta2 < 1: + eta1_loc = max(EPS, eta2/7) + else: + eta1_loc = 0.1 + + if eta2 is not None: + eta2_loc = eta2 + else: + if 0 < eta1_loc < 1: + eta2_loc = (eta1_loc + 2)/3 + else: + eta2_loc = 0.7 + + if maxhist is not None: + maxhist_loc = maxhist + else: + maxhist_loc = max(maxfun, n+2, MAXFUN_DIM_DEFAULT*n) + + # preprocess + + ''' + ! Further revise MAXHIST_LOC according to MAXHISTMEM, and allocate memory for the history. + ! In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize + ! CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN) + ! if they are requested; replace MAXFUN with 0 for the history that is not requested. + call prehist(maxhist_loc, n, present(xhist), xhist_loc, present(fhist), fhist_loc, & + & present(chist), chist_loc, m, present(conhist), conhist_loc) + ''' + + # call coblyb, which performs the real calculations + x, f, nf, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1_loc, eta2_loc, + ftarget, gamma1, gamma2, rhobeg_loc, rhoend_loc, constr_loc, f, x, + cstrv_loc) + + # Write the outputs. + # TODO: Pick up from here, to finish writing the cobyla top level routine, and then we can start testing + # We'll be able to test up until the trust region calls, until I hear back from Zaikun about how to handle them + return x, f + + +if __name__ == "__main__": + n_chebyquad = 6 + x_chebyquad = np.array([i/n_chebyquad for i in range(1, n_chebyquad+1)]) + m = 0 + f = 0 + cstrv = 0 + x, f = cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv) + print(x, f) + x_hexagon = np.zeros(9) + 2 + m_hexagon = 14 + f = 0 + cstrv = 0 + initial_f, initial_constr = calcfc_hexagon(x_hexagon) + print(initial_f, initial_constr) + x, f = cobyla(calcfc_hexagon, m_hexagon, x_hexagon, f, cstrv) + print(x, f) \ No newline at end of file From 40f7da921b4147224bc0c056962dd66382253a7b Mon Sep 17 00:00:00 2001 From: Nickolai Belakovski Date: Fri, 22 Sep 2023 14:26:09 -0700 Subject: [PATCH 2/6] rearranging some stuff --- python/src/prima/cobyla/cobyla.py | 260 +++++++++++++++++ python/src/prima/cobyla/cobylb.py | 102 ++++--- python/src/prima/cobyla/geometry.py | 4 +- python/src/prima/cobyla/trustregion.py | 10 +- python/src/prima/common/checkbreak.py | 83 ++++++ .../prima/examples/cobyla/cobyla_example.py | 273 +----------------- 6 files changed, 420 insertions(+), 312 deletions(-) create mode 100644 python/src/prima/cobyla/cobyla.py create mode 100644 python/src/prima/common/checkbreak.py diff --git a/python/src/prima/cobyla/cobyla.py b/python/src/prima/cobyla/cobyla.py new file mode 100644 index 0000000000..ffd0ec598e --- /dev/null +++ b/python/src/prima/cobyla/cobyla.py @@ -0,0 +1,260 @@ +from prima.common.evaluate import evaluate +from prima.common.consts import EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ + CTOL_DEFAULT, CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT, \ + MAXFUN_DIM_DEFAULT +from prima.cobyla.cobylb import cobylb +import numpy as np + + +debugging = True + + +def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, rhobeg=None, + rhoend=None, ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT, + maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None, gamma1=0.5, gamma2=2, + xhist=False, fhist=False, chist=False, conhist=False, maxhist=0, maxfilt=2000): + """ + Among all the arguments, only CALCFC, M, X, and F are obligatory. The others are OPTIONAL and you + can neglect them unless you are familiar with the algorithm. Any unspecified optional input will + take the default value detailed below. For instance, we may invoke the solver as follows. + + ! First define CALCFC, M, and X, and then do the following. + call cobyla(calcfc, m, x, f) + + or + + ! First define CALCFC, M, and X, and then do the following. + call cobyla(calcfc, m, x, f, rhobeg = 0.5D0, rhoend = 1.0D-3, maxfun = 100) + + ! IMPORTANT NOTICE: The user must set M correctly to the number of constraints, namely the size of + ! CONSTR introduced below. Set M to 0 if there is no constraint. + + See examples/cobyla_exmp.py for a concrete example. + + A detailed introduction to the arguments is as follows. + N.B.: RP and IK are defined in the module CONSTS_MOD. See consts.F90 under the directory named + "common". By default, RP = kind(0.0D0) and IK = kind(0), with REAL(RP) being the double-precision + real, and INTEGER(IK) being the default integer. For ADVANCED USERS, RP and IK can be defined by + setting PRIMA_REAL_PRECISION and PRIMA_INTEGER_KIND in common/ppf.h. Use the default if unsure. + + CALCFC + Input, subroutine. + CALCFC(X, F, CONSTR) should evaluate the objective function and constraints at the given + REAL(RP) vector X; it should set the objective function value to the REAL(RP) scalar F and the + constraint value to the REAL(RP) vector CONSTR. It must be provided by the user, and its + definition must conform to the following interface: + !-------------------------------------------------------------------------! + subroutine calcfc(x, f, constr) + real(RP), intent(in) :: x(:) + real(RP), intent(out) :: f + real(RP), intent(out) :: constr(:) + end subroutine calcfc + !-------------------------------------------------------------------------! + Besides, the subroutine should NOT access CONSTR beyond CONSTR(1:M), where M is the second + compulsory argument (see below), signifying the number of constraints. + + M + Input, INTEGER(IK) scalar. + M must be set to the number of constraints, namely the size (length) of CONSTR(X). + N.B.: + 1. M must be specified correctly, or the program will crash!!! + 2. Why don't we define M as optional and default it to 0 when it is absent? This is because + we need to allocate memory for CONSTR_LOC according to M. To ensure that the size of CONSTR_LOC + is correct, we require the user to specify M explicitly. + + X + Input and output, REAL(RP) vector. + As an input, X should be an N-dimensional vector that contains the starting point, N being the + dimension of the problem. As an output, X will be set to an approximate minimizer. + + F + Output, REAL(RP) scalar. + F will be set to the objective function value of X at exit. + + CSTRV + Output, REAL(RP) scalar. + CSTRV will be set to the constraint violation of X at exit, i.e., MAXVAL([-CONSTR(X), 0]). + + CONSTR + Output, ALLOCATABLE REAL(RP) vector. + CONSTR will be set to the constraint value of X at exit. + + F0 + Input, REAL(RP) scalar. + F0, if present, should be set to the objective function value of the starting X. + + CONSTR0 + Input, REAL(RP) vector. + CONSTR0, if present, should be set to the constraint value of the starting X; in addition, + SIZE(CONSTR0) must be M, or the solver will abort. + + NF + Output, INTEGER(IK) scalar. + NF will be set to the number of calls of CALCFC at exit. + + RHOBEG, RHOEND + Inputs, REAL(RP) scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to + the initial and final values of a trust-region radius, both being positive and RHOEND <= RHOBEG. + Typically RHOBEG should be about one tenth of the greatest expected change to a variable, and + RHOEND should indicate the accuracy that is required in the final values of the variables. + + FTARGET + Input, REAL(RP) scalar, default: -Inf. + FTARGET is the target function value. The algorithm will terminate when a feasible point with a + function value <= FTARGET is found. + + CTOL + Input, REAL(RP) scalar, default: machine epsilon. + CTOL is the tolerance of constraint violation. Any X with MAXVAL(-CONSTR(X)) <= CTOL is + considered feasible. + N.B.: 1. CTOL is absolute, not relative. 2. CTOL is used only when selecting the returned X. + It does not affect the iterations of the algorithm. + + CWEIGHT + Input, REAL(RP) scalar, default: CWEIGHT_DFT defined in the module CONSTS_MOD in common/consts.F90. + CWEIGHT is the weight that the constraint violation takes in the selection of the returned X. + + MAXFUN + Input, INTEGER(IK) scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in the module + CONSTS_MOD (see common/consts.F90). MAXFUN is the maximal number of calls of CALCFC. + + IPRINT + Input, INTEGER(IK) scalar, default: 0. + The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls how much + information will be printed during the computation: + 0: there will be no printing; + 1: a message will be printed to the screen at the return, showing the best vector of variables + found and its objective function value; + 2: in addition to 1, each new value of RHO is printed to the screen, with the best vector of + variables so far and its objective function value; each new value of CPEN is also printed; + 3: in addition to 2, each function evaluation with its variables will be printed to the screen; + -1, -2, -3: the same information as 1, 2, 3 will be printed, not to the screen but to a file + named COBYLA_output.txt; the file will be created if it does not exist; the new output will + be appended to the end of this file if it already exists. + Note that IPRINT = +/-3 can be costly in terms of time and/or space. + + ETA1, ETA2, GAMMA1, GAMMA2 + Input, REAL(RP) scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2. + ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the trust-region radius + detailed in the subroutine TRRAD in trustregion.f90. Roughly speaking, the trust-region radius + is contracted by a factor of GAMMA1 when the reduction ratio is below ETA1, and enlarged by a + factor of GAMMA2 when the reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 + < 1 and 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set ETA1 >= 0.5. + + XHIST, FHIST, CHIST, CONHIST, MAXHIST + XHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; + FHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; + CHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; + CONHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; + MAXHIST: Input, INTEGER(IK) scalar, default: MAXFUN + XHIST, if present, will output the history of iterates; FHIST, if present, will output the + history function values; CHIST, if present, will output the history of constraint violations; + CONHIST, if present, will output the history of constraint values; MAXHIST should be a + nonnegative integer, and XHIST/FHIST/CHIST/CONHIST will output only the history of the last + MAXHIST iterations. Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output nothing, + while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to output all the history. + If XHIST is present, its size at exit will be (N, min(NF, MAXHIST)); if FHIST is present, its + size at exit will be min(NF, MAXHIST); if CHIST is present, its size at exit will be + min(NF, MAXHIST); if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)). + + IMPORTANT NOTICE: + Setting MAXHIST to a large value can be costly in terms of memory for large problems. + MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM defined in + CONSTS_MOD (see consts.F90 under the directory named "common"). + Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large problems). + + MAXFILT + Input, INTEGER(IK) scalar. + MAXFILT is a nonnegative integer indicating the maximal length of the filter used for selecting + the returned solution; default: MAXFILT_DFT (a value lower than MIN_MAXFILT is not recommended); + see common/consts.F90 for the definitions of MAXFILT_DFT and MIN_MAXFILT. + + INFO + Output, INTEGER(IK) scalar. + INFO is the exit flag. It will be set to one of the following values defined in the module + INFOS_MOD (see common/infos.f90): + SMALL_TR_RADIUS: the lower bound for the trust region radius is reached; + FTARGET_ACHIEVED: the target function value is reached; + MAXFUN_REACHED: the objective function has been evaluated MAXFUN times; + MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN); + NAN_INF_X: NaN or Inf occurs in X; + DAMAGING_ROUNDING: rounding errors are becoming damaging. + !--------------------------------------------------------------------------! + The following case(s) should NEVER occur unless there is a bug. + NAN_INF_F: the objective function returns NaN or +Inf; + NAN_INF_MODEL: NaN or Inf occurs in the model; + TRSUBP_FAILED: a trust region step failed to reduce the model + !--------------------------------------------------------------------------! + """ + if debugging: + # TODO: need to check how to figure out if python variables are present + # I guess the way to go is to set the default to None + assert (f0 is None) == (constr0 is None), "f0 and constr0 must be both present or both absent" + + n = len(x) + + assert len(constr0) == m if constr0 is not None else True, "len(constr0) != m" + + constr_loc = np.zeros(m) + if f0 is not None and constr0 is not None and all(np.isfinite(x)): + f = f0 + constr_loc = constr0 + else: + # Replace all NaNs with 0s and clip all -inf/inf to -floatmax/floatmax + np.nan_to_num(x, copy=False, neginf=-np.finfo(float).max, posinf=np.finfo(float).max) + f, constr_loc, cstrv_loc = evaluate(calcfc, x) + + if rhobeg is not None: + rhobeg_loc = rhobeg + elif rhoend is not None and np.isfinite(rhoend) and rhoend > 0: + rhobeg_loc = max(10*rhoend, RHOBEG_DEFAULT) + else: + rhobeg_loc = RHOBEG_DEFAULT + + if rhoend is not None: + rhoend_loc = rhoend + elif rhobeg_loc > 0: + rhoend_loc = max(EPS, min(0.1*rhobeg_loc, RHOEND_DEFAULT)) + else: + rhoend_loc = RHOEND_DEFAULT + + maxfun = maxfun if maxfun is not None else MAXFUN_DIM_DEFAULT*n + + if eta1 is not None: + eta1_loc = eta1 + elif eta2 is not None and 0 < eta2 < 1: + eta1_loc = max(EPS, eta2/7) + else: + eta1_loc = 0.1 + + if eta2 is not None: + eta2_loc = eta2 + else: + if 0 < eta1_loc < 1: + eta2_loc = (eta1_loc + 2)/3 + else: + eta2_loc = 0.7 + + if maxhist is not None: + maxhist_loc = maxhist + else: + maxhist_loc = max(maxfun, n+2, MAXFUN_DIM_DEFAULT*n) + + # preprocess + + ''' + ! Further revise MAXHIST_LOC according to MAXHISTMEM, and allocate memory for the history. + ! In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize + ! CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN) + ! if they are requested; replace MAXFUN with 0 for the history that is not requested. + call prehist(maxhist_loc, n, present(xhist), xhist_loc, present(fhist), fhist_loc, & + & present(chist), chist_loc, m, present(conhist), conhist_loc) + ''' + + # call coblyb, which performs the real calculations + x, f, nf, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1_loc, eta2_loc, + ftarget, gamma1, gamma2, rhobeg_loc, rhoend_loc, constr_loc, f, x, + cstrv_loc) + + # Write the outputs. + return x, f \ No newline at end of file diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py index dee51f6c80..29109171b2 100644 --- a/python/src/prima/cobyla/cobylb.py +++ b/python/src/prima/cobyla/cobylb.py @@ -3,6 +3,7 @@ MAXTR_REACHED, DAMAGING_ROUNDING, SMALL_TR_RADIUS, \ NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED from prima.common.evaluate import evaluate, moderatec, moderatef +from prima.common.checkbreak import checkbreak_con from prima.cobyla.update import updatepole, findpole, updatexfc from prima.cobyla.geometry import assess_geo, setdrop_tr, geostep, setdrop_geo from prima.common.selectx import savefilt , selectx @@ -47,50 +48,47 @@ def redrho(rho, rhoend): return rho -def checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x): - ''' - This function checks whether to break the solver in constrained cases. - ''' - info = INFO_DEFAULT - - # Although X should not contain NaN unless there is a bug, we include the following for security. - # X can be inf, as finite + finite can be inf numerically. - if any(np.isnan(x)) or any(np.isinf(x)): - info = NAN_INF_X - - # Although NAN_INF_F should not happen unless there is a bug, we include the following for security. - if np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv): - info = NAN_INF_F - - if cstrv <= ctol and f <= ftarget: - info = FTARGET_ACHIEVED - - if nf >= maxfun: - info = MAXFUN_REACHED - return info def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, - conmat, cval, fval): + conmat, cval, srname): ''' This subroutine does the initialization concerning X, function values, and constraints. ''' + # Sizes num_constraints = conmat.shape[0] num_vars = x0.size + # Preconditions + assert num_constraints >= 0, f'M >= 0 {srname}' + assert num_vars >= 1, f'N >= 1 {srname}' + assert abs(iprint) <= 3, f'IPRINT is 0, 1, -1, 2, -2, 3, or -3 {srname}' + assert conmat.shape == (num_constraints , num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + assert cval.size == num_vars + 1, f'CVAL.size == N+1 {srname}' + # assert maxchist * (maxchist - maxhist) == 0, f'CHIST.shape == 0 or MAXHIST {srname}' + # assert conhist.shape[0] == num_constraints and maxconhist * (maxconhist - maxhist) == 0, 'CONHIST.shape[0] == num_constraints, SIZE(CONHIST, 2) == 0 or MAXHIST {srname)}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + # assert xhist.shape[0] == num_vars and maxxhist * (maxxhist - maxhist) == 0, 'XHIST.shape[0] == N, SIZE(XHIST, 2) == 0 or MAXHIST {srname)}' + assert all(np.isfinite(x0)), f'X0 is finite {srname}' + assert rhobeg > 0, f'RHOBEG > 0 {srname}' + + #====================# + # Calculation starts # + #====================# + # Initialize info to the default value. At return, a value different from this value will # indicate an abnormal return info = INFO_DEFAULT # Initialize the simplex. It will be revised during the initialization. sim = np.eye(num_vars, num_vars+1) * rhobeg - sim[:, -1] = x0 + sim[:, num_vars] = x0 # evaluated[j] = True iff the function/constraint of SIM[:, j] has been evaluated. evaluated = np.zeros(num_vars+1, dtype=bool) - # Here the FORTRAN code initializes some history values, but apparently only for the benefit of compilers, - # so we skip it. + # Initialize fval + fval = np.zeros(num_vars+1) + REALMAX for k in range(num_vars + 1): x = sim[:, num_vars].copy() @@ -99,7 +97,7 @@ def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, j = num_vars f = moderatef(f0) constr = moderatec(constr0) - cstrv = max(max(-constr), 0) + cstrv = np.max(np.append(-constr, 0)) else: j = k - 1 x[j] += rhobeg @@ -120,10 +118,10 @@ def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, cval[j] = cstrv # Check whether to exit. - # subinfo = checkexit(maxfun, k, cstrv, ctol, f, ftarget, x) - # if subinfo != INFO_DEFAULT: - # info = subinfo - # break + subinfo = checkbreak_con(maxfun, k, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break # Exchange the new vertex of the initial simplex with the optimal vertex if necessary. # This is the ONLY part that is essentially non-parallel. @@ -132,15 +130,38 @@ def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, cval[j], cval[num_vars] = cval[num_vars], cval[j] conmat[:, [j, num_vars]] = conmat[:, [num_vars, j]] sim[:, num_vars] = x - sim[j, :j+1] = -rhobeg + sim[j, :j+1] = -rhobeg # SIM[:, :j+1] is lower triangular nf = np.count_nonzero(evaluated) if evaluated.all(): # Initialize SIMI to the inverse of SIM[:, :num_vars] simi = np.linalg.inv(sim[:, :num_vars]) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + assert nf <= maxfun, f'NF <= MAXFUN {srname}' + assert evaluated.size == num_vars + 1, f'EVALUATED.size == Num_vars + 1 {srname}' + # assert chist.size == maxchist, f'CHIST.size == MAXCHIST {srname}' + # assert conhist.shape== (num_constraints, maxconhist), f'CONHIST.shape == [M, MAXCONHIST] {srname}' + assert conmat.shape == (num_constraints, num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + assert not (np.isnan(conmat).any() or np.isneginf(conmat).any()), f'CONMAT does not contain NaN/-Inf {srname}' + assert cval.size == num_vars + 1 and not (any(cval < 0) or any(np.isnan(cval)) or any(np.isposinf(cval))), f'CVAL.shape == Num_vars+1 and CVAL does not contain negative values or NaN/+Inf {srname}' + # assert fhist.shape == maxfhist, f'FHIST.shape == MAXFHIST {srname}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + assert fval.size == num_vars + 1 and not (any(np.isnan(fval)) or any(np.isposinf(fval))), f'FVAL.shape == Num_vars+1 and FVAL is not NaN/+Inf {srname}' + # assert xhist.shape == (num_vars, maxxhist), f'XHIST.shape == [N, MAXXHIST] {srname}' + assert sim.shape == (num_vars, num_vars + 1), f'SIM.shape == [N, N+1] {srname}' + assert np.isfinite(sim).all(), f'SIM is finite {srname}' + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0), f'SIM(:, 1:N) has no zero column {srname}' + assert simi.shape == (num_vars, num_vars), f'SIMI.shape == [N, N] {srname}' + assert np.isfinite(simi).all(), f'SIMI is finite {srname}' + assert np.allclose(sim[:, :num_vars] @ simi, np.eye(num_vars), rtol=0.1, atol=0.1) or not all(evaluated), f'SIMI = SIM(:, 1:N)^{-1} {srname}' - return evaluated, sim, simi, nf, info + return evaluated, sim, simi, fval, nf, info def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt): @@ -225,11 +246,10 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # maxhist = max(maxxhist, maxfhist, maxconhist, maxchist) conmat = np.zeros((num_constraints, num_vars+1)) - REALMAX cval = np.zeros(num_vars+1) + REALMAX - fval = np.zeros(num_vars+1) + REALMAX subinfo = 0 - evaluated, sim, simi, nf, subinfo = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, - conmat, cval, fval) + evaluated, sim, simi, fval, nf, subinfo = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, + conmat, cval, srname="COBYLA") # TODO: Need to initialize history with f and x and constr # Initialize the filter, including xfilt, ffilt, confiolt, cfilt, and nfilt. @@ -362,14 +382,14 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # Predict the change to F (PREREF) and to the constraint violation (PREREC) due to D. # We have the following in precise arithmetic. They may fail to hold due to rounding errors. - # 1. B[:NUM_CONSTRAINTS] = -CONMAT[:, NUM_VARS] and hence max(max(B[:NUM_CONSTRAINTS] - D@A[:, :NUM_CONSTRAINTS]), 0) is the + # 1. B[:NUM_CONSTRAINTS] = -CONMAT[:, NUM_VARS] and hence np.max(np.append(B[:NUM_CONSTRAINTS] - D@A[:, :NUM_CONSTRAINTS], 0)) is the # L-infinity violation of the linearized constraints corresponding to D. When D=0, the - # violation is max(max(B[:NUM_CONSTRAINTS]), 0) = CVAL[NUM_VARS]. PREREC is the reduction of this + # violation is np.max(np.append(B[:NUM_CONSTRAINTS], 0)) = CVAL[NUM_VARS]. PREREC is the reduction of this # violation achieved by D, which is nonnegative in theory; PREREC = 0 iff B[:NUM_CONSTRAINTS] <= 0, i.e. the # trust-region center satisfies the linearized constraints. # 2. PREREF may be negative or 0, but it is positive when PREREC = 0 and shortd is False # 3. Due to 2, in theory, max(PREREC, PREREF) > 0 if shortd is False. - prerec = cval[num_vars] - max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + prerec = cval[num_vars] - np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) preref = np.dot(d, A[:, num_constraints]) # Can be negative # Evaluate PREREM, which is the predicted reduction in the merit function. @@ -446,7 +466,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, break # Better action to take? Geometry step, or a RESCUE as in BOBYQA? # Check whether to break due to maxfun, ftarget, etc. - subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x) + subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x) if subinfo != INFO_DEFAULT: info = subinfo break @@ -567,7 +587,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, break # Check whether to break due to maxfun, ftarget, etc. - subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x) + subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x) if subinfo != INFO_DEFAULT: info = subinfo break @@ -668,7 +688,7 @@ def getcpen(conmat, cpen, cval, delta, fval, rho, sim, simi): d = trstlp(A, b, delta) # Predict the change to F (PREREF) and to the constraint violation (PREREC) due to D. - prerec = cval[num_vars] - max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + prerec = cval[num_vars] - np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) preref = np.dot(d, A[:, num_constraints]) # Can be negative if not (prerec > 0 and preref < 0): # PREREC <= 0 or PREREF >=0 or either is NaN diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py index e5bff7cf0b..9a6e0487d2 100644 --- a/python/src/prima/cobyla/geometry.py +++ b/python/src/prima/cobyla/geometry.py @@ -53,8 +53,8 @@ def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi): A = np.zeros((num_vars, num_constraints + 1)) A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi - cvmaxp = max(0, max(-d@A[:, :num_constraints] - conmat[:, num_vars])) - cvmaxn = max(0, max(d@A[:, :num_constraints] - conmat[:, num_vars])) + cvmaxp = np.max(np.append(0, -d@A[:, :num_constraints] - conmat[:, num_vars])) + cvmaxn = np.max(np.append(0, d@A[:, :num_constraints] - conmat[:, num_vars])) if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn): d *= -1 return d diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py index ebccb25c8f..a971c478b6 100644 --- a/python/src/prima/cobyla/trustregion.py +++ b/python/src/prima/cobyla/trustregion.py @@ -197,7 +197,7 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): iact = np.linspace(0, mcon-1, mcon, dtype=int) nact = 0 d = np.zeros(num_vars) - cviol = max(*b, 0) + cviol = np.max(np.append(b, 0)) vmultc = cviol - b z = np.eye(num_vars) if mcon == 0 or cviol <= 0: @@ -222,7 +222,7 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): # In Powell's code, stage 2 uses the zdota and cviol calculated by stage1. Here we recalculate # them so that they need not be passed from stage 1 to 2, and hence the coupling is reduced. - cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + cviol = np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) zdota[:nact] = [np.dot(z[:, k], A[:, iact[k]]) for k in range(nact)] # More initialization @@ -436,7 +436,7 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): # maximum residual if thage 1 is being done dnew = d + step * sdirn if stage == 1: - cviol = max(max(b[iact[:nact]] - dnew@A[:, iact[:nact]]), 0) + cviol = np.max(np.append(b[iact[:nact]] - dnew@A[:, iact[:nact]], 0)) # N.B.: cviol will be used when calculating vmultd[nact+1:mcon]. # Zaikun 20211011: @@ -475,9 +475,9 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): vmultc = np.maximum(np.zeros(len(vmultc)), (1 - frac)*vmultc + frac*vmultd) if stage == 1: # cviol = (1 - frac) * cvold + frac * cviol # Powell's version - # In theory, cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0), yet the + # In theory, cviol = np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)), yet the # cviol updated as above can be quite different from this value if A has huge entries (e.g., > 1e20) - cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + cviol = np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) if icon < 0 or icon >= mcon: # In Powell's code, the condition is icon == 0. Indeed, icon < 0 cannot hold unless diff --git a/python/src/prima/common/checkbreak.py b/python/src/prima/common/checkbreak.py new file mode 100644 index 0000000000..b74d476ab5 --- /dev/null +++ b/python/src/prima/common/checkbreak.py @@ -0,0 +1,83 @@ +from prima.common.consts import INFO_DEFAULT, NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED + +import numpy as np + +def checkbreak_unc(maxfun, nf, f, ftarget, x): + ''' + This module checks whether to break out of the solver loop in the unconstrained case. + ''' + + # Outputs + info = INFO_DEFAULT + + # Local variables + srname = "CHECKbreak_UNC" + + # Preconditions + assert INFO_DEFAULT not in [NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED], f'NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, and MAXFUN_REACHED differ from INFO_DFT {srname}' + # X does not contain NaN if the initial X does not contain NaN and the subroutines generating + # trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf. + assert not any(np.isnan(x)), f'X does not contain NaN {srname}' + # With the moderated extreme barrier, F cannot be NaN/+Inf. + assert not (any(np.isnan(f)) or any(np.isposinf(f))), f'F is not NaN/+Inf {srname}' + + #====================# + # Calculation starts # + #====================# + + # Although X should not contain NaN unless there is a bug, we include the following for security. + # X can be Inf, as finite + finite can be Inf numerically. + if any(np.isnan(x)) or any(np.isinf(x)): + info = NAN_INF_X + + # Although NAN_INF_F should not happen unless there is a bug, we include the following for security. + if any(np.isnan(f)) or any(np.isposinf(f)): + info = NAN_INF_F + + if f <= ftarget: + info = FTARGET_ACHIEVED + + if nf >= maxfun: + info = MAXFUN_REACHED + + return info + +def checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x): + ''' + This module checks whether to break out of the solver loop in the constrained case. + ''' + + # Outputs + info = INFO_DEFAULT + + # Local variables + srname = "CHECKbreak_CON" + + # Preconditions + assert INFO_DEFAULT not in [NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED], f'NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, and MAXFUN_REACHED differ from INFO_DFT {srname}' + # X does not contain NaN if the initial X does not contain NaN and the subroutines generating + # trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf. + assert not any(np.isnan(x)), f'X does not contain NaN {srname}' + # With the moderated extreme barrier, F or CSTRV cannot be NaN/+Inf. + assert not (np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv)), f'F or CSTRV is not NaN/+Inf {srname}' + + #====================# + # Calculation starts # + #====================# + + # Although X should not contain NaN unless there is a bug, we include the following for security. + # X can be Inf, as finite + finite can be Inf numerically. + if any(np.isnan(x)) or any(np.isinf(x)): + info = NAN_INF_X + + # Although NAN_INF_F should not happen unless there is a bug, we include the following for security. + if np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv): + info = NAN_INF_F + + if cstrv <= ctol and f <= ftarget: + info = FTARGET_ACHIEVED + + if nf >= maxfun: + info = MAXFUN_REACHED + + return info \ No newline at end of file diff --git a/python/src/prima/examples/cobyla/cobyla_example.py b/python/src/prima/examples/cobyla/cobyla_example.py index 9be2f566e3..fba02303a9 100644 --- a/python/src/prima/examples/cobyla/cobyla_example.py +++ b/python/src/prima/examples/cobyla/cobyla_example.py @@ -1,11 +1,6 @@ import numpy as np -from prima.common.evaluate import evaluate -from prima.common.consts import EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ - CTOL_DEFAULT, CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT, \ - MAXFUN_DIM_DEFAULT -from prima.cobyla.cobylb import cobylb +from prima.cobyla.cobyla import cobyla -debugging = True def test_chebyquad(): f, _ = calcfc_chebyquad([1, 2]) @@ -16,10 +11,10 @@ def calcfc_chebyquad(x): x = np.array(x) n = len(x) # or shape? y = np.zeros((n + 1, n + 1)) - y[0:n, 0] = 1 - y[0:n, 1] = 2*x - 1 + y[:n, 0] = 1 + y[:n, 1] = 2*x - 1 for i in range(1, n): - y[0:n, i+1] = 2*y[0:n, 1] * y[0:n, i] - y[0:n, i - 1] + y[:n, i+1] = 2*y[:n, 1] * y[:n, i] - y[:n, i - 1] f = 0 for i in range(n+1): @@ -27,7 +22,7 @@ def calcfc_chebyquad(x): if i % 2 == 0: tmp += 1/((i)**2 - 1) f += tmp**2 - constr = np.zeros(1) + constr = np.zeros(0) return f, constr def calcfc_hexagon(x): @@ -54,267 +49,17 @@ def calcfc_hexagon(x): return f, constr -def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, rhobeg=None, - rhoend=None, ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT, - maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None, gamma1=0.5, gamma2=2, - xhist=False, fhist=False, chist=False, conhist=False, maxhist=0, maxfilt=2000): - """ - Among all the arguments, only CALCFC, M, X, and F are obligatory. The others are OPTIONAL and you - can neglect them unless you are familiar with the algorithm. Any unspecified optional input will - take the default value detailed below. For instance, we may invoke the solver as follows. - - ! First define CALCFC, M, and X, and then do the following. - call cobyla(calcfc, m, x, f) - - or - - ! First define CALCFC, M, and X, and then do the following. - call cobyla(calcfc, m, x, f, rhobeg = 0.5D0, rhoend = 1.0D-3, maxfun = 100) - - ! IMPORTANT NOTICE: The user must set M correctly to the number of constraints, namely the size of - ! CONSTR introduced below. Set M to 0 if there is no constraint. - - See examples/cobyla_exmp.py for a concrete example. - - A detailed introduction to the arguments is as follows. - N.B.: RP and IK are defined in the module CONSTS_MOD. See consts.F90 under the directory named - "common". By default, RP = kind(0.0D0) and IK = kind(0), with REAL(RP) being the double-precision - real, and INTEGER(IK) being the default integer. For ADVANCED USERS, RP and IK can be defined by - setting PRIMA_REAL_PRECISION and PRIMA_INTEGER_KIND in common/ppf.h. Use the default if unsure. - - CALCFC - Input, subroutine. - CALCFC(X, F, CONSTR) should evaluate the objective function and constraints at the given - REAL(RP) vector X; it should set the objective function value to the REAL(RP) scalar F and the - constraint value to the REAL(RP) vector CONSTR. It must be provided by the user, and its - definition must conform to the following interface: - !-------------------------------------------------------------------------! - subroutine calcfc(x, f, constr) - real(RP), intent(in) :: x(:) - real(RP), intent(out) :: f - real(RP), intent(out) :: constr(:) - end subroutine calcfc - !-------------------------------------------------------------------------! - Besides, the subroutine should NOT access CONSTR beyond CONSTR(1:M), where M is the second - compulsory argument (see below), signifying the number of constraints. - - M - Input, INTEGER(IK) scalar. - M must be set to the number of constraints, namely the size (length) of CONSTR(X). - N.B.: - 1. M must be specified correctly, or the program will crash!!! - 2. Why don't we define M as optional and default it to 0 when it is absent? This is because - we need to allocate memory for CONSTR_LOC according to M. To ensure that the size of CONSTR_LOC - is correct, we require the user to specify M explicitly. - - X - Input and output, REAL(RP) vector. - As an input, X should be an N-dimensional vector that contains the starting point, N being the - dimension of the problem. As an output, X will be set to an approximate minimizer. - - F - Output, REAL(RP) scalar. - F will be set to the objective function value of X at exit. - - CSTRV - Output, REAL(RP) scalar. - CSTRV will be set to the constraint violation of X at exit, i.e., MAXVAL([-CONSTR(X), 0]). - - CONSTR - Output, ALLOCATABLE REAL(RP) vector. - CONSTR will be set to the constraint value of X at exit. - - F0 - Input, REAL(RP) scalar. - F0, if present, should be set to the objective function value of the starting X. - - CONSTR0 - Input, REAL(RP) vector. - CONSTR0, if present, should be set to the constraint value of the starting X; in addition, - SIZE(CONSTR0) must be M, or the solver will abort. - - NF - Output, INTEGER(IK) scalar. - NF will be set to the number of calls of CALCFC at exit. - - RHOBEG, RHOEND - Inputs, REAL(RP) scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to - the initial and final values of a trust-region radius, both being positive and RHOEND <= RHOBEG. - Typically RHOBEG should be about one tenth of the greatest expected change to a variable, and - RHOEND should indicate the accuracy that is required in the final values of the variables. - - FTARGET - Input, REAL(RP) scalar, default: -Inf. - FTARGET is the target function value. The algorithm will terminate when a feasible point with a - function value <= FTARGET is found. - - CTOL - Input, REAL(RP) scalar, default: machine epsilon. - CTOL is the tolerance of constraint violation. Any X with MAXVAL(-CONSTR(X)) <= CTOL is - considered feasible. - N.B.: 1. CTOL is absolute, not relative. 2. CTOL is used only when selecting the returned X. - It does not affect the iterations of the algorithm. - - CWEIGHT - Input, REAL(RP) scalar, default: CWEIGHT_DFT defined in the module CONSTS_MOD in common/consts.F90. - CWEIGHT is the weight that the constraint violation takes in the selection of the returned X. - - MAXFUN - Input, INTEGER(IK) scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in the module - CONSTS_MOD (see common/consts.F90). MAXFUN is the maximal number of calls of CALCFC. - - IPRINT - Input, INTEGER(IK) scalar, default: 0. - The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls how much - information will be printed during the computation: - 0: there will be no printing; - 1: a message will be printed to the screen at the return, showing the best vector of variables - found and its objective function value; - 2: in addition to 1, each new value of RHO is printed to the screen, with the best vector of - variables so far and its objective function value; each new value of CPEN is also printed; - 3: in addition to 2, each function evaluation with its variables will be printed to the screen; - -1, -2, -3: the same information as 1, 2, 3 will be printed, not to the screen but to a file - named COBYLA_output.txt; the file will be created if it does not exist; the new output will - be appended to the end of this file if it already exists. - Note that IPRINT = +/-3 can be costly in terms of time and/or space. - - ETA1, ETA2, GAMMA1, GAMMA2 - Input, REAL(RP) scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2. - ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the trust-region radius - detailed in the subroutine TRRAD in trustregion.f90. Roughly speaking, the trust-region radius - is contracted by a factor of GAMMA1 when the reduction ratio is below ETA1, and enlarged by a - factor of GAMMA2 when the reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 - < 1 and 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set ETA1 >= 0.5. - - XHIST, FHIST, CHIST, CONHIST, MAXHIST - XHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; - FHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; - CHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; - CONHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; - MAXHIST: Input, INTEGER(IK) scalar, default: MAXFUN - XHIST, if present, will output the history of iterates; FHIST, if present, will output the - history function values; CHIST, if present, will output the history of constraint violations; - CONHIST, if present, will output the history of constraint values; MAXHIST should be a - nonnegative integer, and XHIST/FHIST/CHIST/CONHIST will output only the history of the last - MAXHIST iterations. Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output nothing, - while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to output all the history. - If XHIST is present, its size at exit will be (N, min(NF, MAXHIST)); if FHIST is present, its - size at exit will be min(NF, MAXHIST); if CHIST is present, its size at exit will be - min(NF, MAXHIST); if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)). - - IMPORTANT NOTICE: - Setting MAXHIST to a large value can be costly in terms of memory for large problems. - MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM defined in - CONSTS_MOD (see consts.F90 under the directory named "common"). - Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large problems). - - MAXFILT - Input, INTEGER(IK) scalar. - MAXFILT is a nonnegative integer indicating the maximal length of the filter used for selecting - the returned solution; default: MAXFILT_DFT (a value lower than MIN_MAXFILT is not recommended); - see common/consts.F90 for the definitions of MAXFILT_DFT and MIN_MAXFILT. - - INFO - Output, INTEGER(IK) scalar. - INFO is the exit flag. It will be set to one of the following values defined in the module - INFOS_MOD (see common/infos.f90): - SMALL_TR_RADIUS: the lower bound for the trust region radius is reached; - FTARGET_ACHIEVED: the target function value is reached; - MAXFUN_REACHED: the objective function has been evaluated MAXFUN times; - MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN); - NAN_INF_X: NaN or Inf occurs in X; - DAMAGING_ROUNDING: rounding errors are becoming damaging. - !--------------------------------------------------------------------------! - The following case(s) should NEVER occur unless there is a bug. - NAN_INF_F: the objective function returns NaN or +Inf; - NAN_INF_MODEL: NaN or Inf occurs in the model; - TRSUBP_FAILED: a trust region step failed to reduce the model - !--------------------------------------------------------------------------! - """ - if debugging: - # TODO: need to check how to figure out if python variables are present - # I guess the way to go is to set the default to None - assert (f0 is None) == (constr0 is None), "f0 and constr0 must be both present or both absent" - - n = len(x) - - assert len(constr0) == m if constr0 is not None else True, "len(constr0) != m" - - constr_loc = np.zeros(m) - if f0 is not None and constr0 is not None and all(np.isfinite(x)): - f = f0 - constr_loc = constr0 - else: - # Replace all NaNs with 0s and clip all -inf/inf to -floatmax/floatmax - np.nan_to_num(x, copy=False, neginf=-np.finfo(float).max, posinf=np.finfo(float).max) - f, constr_loc, cstrv_loc = evaluate(calcfc, x) - - if rhobeg is not None: - rhobeg_loc = rhobeg - elif rhoend is not None and np.isfinite(rhoend) and rhoend > 0: - rhobeg_loc = max(10*rhoend, RHOBEG_DEFAULT) - else: - rhobeg_loc = RHOBEG_DEFAULT - - if rhoend is not None: - rhoend_loc = rhoend - elif rhobeg_loc > 0: - rhoend_loc = max(EPS, min(0.1*rhobeg_loc, RHOEND_DEFAULT)) - else: - rhoend_loc = RHOEND_DEFAULT - - maxfun = maxfun if maxfun is not None else MAXFUN_DIM_DEFAULT*n - - if eta1 is not None: - eta1_loc = eta1 - elif eta2 is not None and 0 < eta2 < 1: - eta1_loc = max(EPS, eta2/7) - else: - eta1_loc = 0.1 - - if eta2 is not None: - eta2_loc = eta2 - else: - if 0 < eta1_loc < 1: - eta2_loc = (eta1_loc + 2)/3 - else: - eta2_loc = 0.7 - - if maxhist is not None: - maxhist_loc = maxhist - else: - maxhist_loc = max(maxfun, n+2, MAXFUN_DIM_DEFAULT*n) - - # preprocess - - ''' - ! Further revise MAXHIST_LOC according to MAXHISTMEM, and allocate memory for the history. - ! In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize - ! CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN) - ! if they are requested; replace MAXFUN with 0 for the history that is not requested. - call prehist(maxhist_loc, n, present(xhist), xhist_loc, present(fhist), fhist_loc, & - & present(chist), chist_loc, m, present(conhist), conhist_loc) - ''' - - # call coblyb, which performs the real calculations - x, f, nf, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1_loc, eta2_loc, - ftarget, gamma1, gamma2, rhobeg_loc, rhoend_loc, constr_loc, f, x, - cstrv_loc) - - # Write the outputs. - # TODO: Pick up from here, to finish writing the cobyla top level routine, and then we can start testing - # We'll be able to test up until the trust region calls, until I hear back from Zaikun about how to handle them - return x, f - if __name__ == "__main__": n_chebyquad = 6 - x_chebyquad = np.array([i/n_chebyquad for i in range(1, n_chebyquad+1)]) + x_chebyquad = np.array([i/(n_chebyquad+1) for i in range(1, n_chebyquad+1)]) m = 0 f = 0 - cstrv = 0 + cstrv = np.inf x, f = cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv) print(x, f) + # evaluate + f, constr = calcfc_chebyquad(x_chebyquad) x_hexagon = np.zeros(9) + 2 m_hexagon = 14 f = 0 From b233dfad1271f13638983d08d9e8df5b5be6b56c Mon Sep 17 00:00:00 2001 From: Nickolai Belakovski Date: Mon, 25 Sep 2023 17:43:23 -0700 Subject: [PATCH 3/6] Added a readme with some style guidelines. Implemented savehist and modified the return values to be more usable. I've gone through cobyla.py and it seems pretty close to the original. Remaining work is to implement retmsg and to add in all pre/post conditions and retrofit existing pre/post conditions to match the style guide. Then we're ready for a deeper conversation on whether any of this is useful XD --- python/README.md | 69 +++++ python/src/prima/cobyla/cobyla.py | 257 ++++++++-------- python/src/prima/cobyla/cobylb.py | 282 +++++------------- python/src/prima/cobyla/initialize.py | 163 ++++++++++ python/src/prima/common/consts.py | 38 ++- python/src/prima/common/evaluate.py | 59 +++- python/src/prima/common/history.py | 17 ++ python/src/prima/common/preproc.py | 267 +++++++++++++++++ python/src/prima/common/present.py | 5 + python/src/prima/common/redrho.py | 34 +++ .../prima/examples/cobyla/cobyla_example.py | 60 +++- 11 files changed, 884 insertions(+), 367 deletions(-) create mode 100644 python/src/prima/cobyla/initialize.py create mode 100644 python/src/prima/common/history.py create mode 100644 python/src/prima/common/preproc.py create mode 100644 python/src/prima/common/present.py create mode 100644 python/src/prima/common/redrho.py diff --git a/python/README.md b/python/README.md index d325b1cc99..0d60cfadc9 100644 --- a/python/README.md +++ b/python/README.md @@ -3,3 +3,72 @@ To develop, `cd` into the `src` directory and run ```pip install --editable .``` This will install prima locally in an editable fashion. From there you can run the examples/cobyla/cobyla_example.py (from any directory) and go from there. + +## Style notes + +- In cases where there are Python or numpy functions that can achieve the same aim (i.e. sum vs np.sum) we prefer the numpy functions. + - Rationale: + - In some cases we must use the numpy function because it has functionality the Python function doesn't, i.e. np.sum has an axis argument, whereas sum does not. + - In light of this, for the sake of consistency we prefer to use the numpy functions everywhere. +- Most of the comments are copied from Fortran verbatim, except in cases where they need to modified due to specifics of the Python language. In these cases a note will be made of the difference between Fortran and Python + - Rationale: + - The main purpose of this is to keep the Python and Fortran codebases as similar as possible. +- For determining the dimensions of an array, we exclusively use `np.size` instead of `np.shape` or `some_array.shape` or `len` + - Rationale: + - Fortran uses `SIZE` so this helps us to be as consistent with the Fortran code as possible. + +## A note on Fortran's `max` and `maxval` and their Python equivalents + +| Fortran | Python | +|---------|--------| +| `max` | `np.maximum` | +| `maxval` | `np.max` or `max` | + +Fortran's `max` and numpy's `maximum` accept two arguments, either of which can be a scalar or an array, +and returns an elementwise maximum of the two arguments. In the case of a scalar and an array argument it +returns an elementwise maximum of the scalar and each element of the array. + +Fortran's `maxval` takes a single argument and returns the largest item in the list, similar to numpy's `max`. +Python's built-in `max` can either take a single argument and return the largest element, or it can take a +variable number of arguments and return the largest of all of them. + +Per the style notes, prefer `np.max` over the built-in `max`. + +For the larger of two numbers Fortran uses `max`, and we prefer `np.maximum` over the Python `max` for consistenchy in the translation. + +This note applies to `min` and `minval` as well. + + +## A note on indices + +Consider the following Fortran code + +``` +do i=0:5 + print *, * +end do +``` + +It can be easily and automatically translated to Python as + +``` +for i in range(0, 6): + print(i) +``` + +Now consider the following similar loop + +``` +do i=1:5 + print *, some_array(i) +end do +``` + +This can be translated to Python as + +``` +for i in range(1, 6): + print(some_array[i-1]) +``` + +This leads to awkward Python code, since the more pythonic code would range from 0 to 5, and the indexing would be `some_array[i]`. In order to make the Python code more usable, we will attempt to write more "pythonic" code, even though that makes the translation a little bit more difficult. \ No newline at end of file diff --git a/python/src/prima/cobyla/cobyla.py b/python/src/prima/cobyla/cobyla.py index ffd0ec598e..497c8a6ed2 100644 --- a/python/src/prima/cobyla/cobyla.py +++ b/python/src/prima/cobyla/cobyla.py @@ -1,125 +1,113 @@ -from prima.common.evaluate import evaluate +from prima.common.evaluate import evaluate, moderatex from prima.common.consts import EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ CTOL_DEFAULT, CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT, \ - MAXFUN_DIM_DEFAULT + MAXFUN_DIM_DEFAULT, DEBUGGING +from prima.common.preproc import preproc +from prima.common.present import present from prima.cobyla.cobylb import cobylb import numpy as np +from dataclasses import dataclass -debugging = True +# Return type for COBYLA +@dataclass +class COBYLAResult: + x: np.ndarray + f: float + constr: np.ndarray + cstrv: float + nf: int + xhist: np.ndarray | None + fhist: np.ndarray | None + chist: np.ndarray | None + conhist: np.ndarray | None + info: int -def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, rhobeg=None, +def cobyla(calcfc, num_constraints, x, f0=None, constr0=None, rhobeg=None, rhoend=None, ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT, maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None, gamma1=0.5, gamma2=2, - xhist=False, fhist=False, chist=False, conhist=False, maxhist=0, maxfilt=2000): + maxhist=None, maxfilt=2000): """ - Among all the arguments, only CALCFC, M, X, and F are obligatory. The others are OPTIONAL and you + Among all the arguments, only CALCFC, M, and X are obligatory. The others are OPTIONAL and you can neglect them unless you are familiar with the algorithm. Any unspecified optional input will take the default value detailed below. For instance, we may invoke the solver as follows. - ! First define CALCFC, M, and X, and then do the following. - call cobyla(calcfc, m, x, f) + First define CALCFC, NUM_CONSTRAINTS, and X, and then do the following. + result = cobyla(calcfc, num_constraints, x) or - ! First define CALCFC, M, and X, and then do the following. - call cobyla(calcfc, m, x, f, rhobeg = 0.5D0, rhoend = 1.0D-3, maxfun = 100) + First define CALCFC, NUM_CONSTRAINTS, and X, and then do the following. + result = cobyla(calcfc, num_constraints, x, rhobeg=0.5, rhoend=1E-3, maxfun=100) - ! IMPORTANT NOTICE: The user must set M correctly to the number of constraints, namely the size of - ! CONSTR introduced below. Set M to 0 if there is no constraint. + IMPORTANT NOTICE: The user must set NUM_CONSTRAINTS correctly to the number of constraints, namely + the size of CONSTR introduced below. Set NUM_CONSTRAINTS to 0 if there is no constraint. - See examples/cobyla_exmp.py for a concrete example. + See examples/cobyla_example.py for a concrete example. - A detailed introduction to the arguments is as follows. - N.B.: RP and IK are defined in the module CONSTS_MOD. See consts.F90 under the directory named - "common". By default, RP = kind(0.0D0) and IK = kind(0), with REAL(RP) being the double-precision - real, and INTEGER(IK) being the default integer. For ADVANCED USERS, RP and IK can be defined by - setting PRIMA_REAL_PRECISION and PRIMA_INTEGER_KIND in common/ppf.h. Use the default if unsure. + A detailed introduction to the arguments is as follows. The section below is largely similar to the + comments in the Fortran version but has been slightly modified to reflect the differences in + some of the arguments. CALCFC - Input, subroutine. - CALCFC(X, F, CONSTR) should evaluate the objective function and constraints at the given - REAL(RP) vector X; it should set the objective function value to the REAL(RP) scalar F and the - constraint value to the REAL(RP) vector CONSTR. It must be provided by the user, and its - definition must conform to the following interface: + F, CONSTR = CALCFC(X) should evaluate the objective function and constraints at the given + vector X; it should return the objective function value and the constraint value. It must be + provided by the user, and its definition must conform to the following interface: !-------------------------------------------------------------------------! - subroutine calcfc(x, f, constr) - real(RP), intent(in) :: x(:) - real(RP), intent(out) :: f - real(RP), intent(out) :: constr(:) - end subroutine calcfc + def calcfc(x): + # ... calculations ... + return f, constr !-------------------------------------------------------------------------! - Besides, the subroutine should NOT access CONSTR beyond CONSTR(1:M), where M is the second - compulsory argument (see below), signifying the number of constraints. + X, F, and CONSTR are numpy arrays. If there are no constraints, CONSTR is expected to be an + empty numpy array. - M - Input, INTEGER(IK) scalar. - M must be set to the number of constraints, namely the size (length) of CONSTR(X). + NUM_CONSTRAINTS + NUM_CONSTRAINTS must be set to the number of constraints N.B.: - 1. M must be specified correctly, or the program will crash!!! - 2. Why don't we define M as optional and default it to 0 when it is absent? This is because - we need to allocate memory for CONSTR_LOC according to M. To ensure that the size of CONSTR_LOC - is correct, we require the user to specify M explicitly. + 1. Why don't we define NUM_CONSTRAINTS as optional and determine it from running CALCFC? + Because we need to know it ahead of time to setup up various matrices and vectors. Since CALCFC + might be expensive, we prefer to avoid running it unnecessarily. X - Input and output, REAL(RP) vector. - As an input, X should be an N-dimensional vector that contains the starting point, N being the - dimension of the problem. As an output, X will be set to an approximate minimizer. - - F - Output, REAL(RP) scalar. - F will be set to the objective function value of X at exit. - - CSTRV - Output, REAL(RP) scalar. - CSTRV will be set to the constraint violation of X at exit, i.e., MAXVAL([-CONSTR(X), 0]). - - CONSTR - Output, ALLOCATABLE REAL(RP) vector. - CONSTR will be set to the constraint value of X at exit. + X should be an N-dimensional vector that contains the starting point, N being the + dimension of the problem. F0 - Input, REAL(RP) scalar. F0, if present, should be set to the objective function value of the starting X. CONSTR0 - Input, REAL(RP) vector. CONSTR0, if present, should be set to the constraint value of the starting X; in addition, - SIZE(CONSTR0) must be M, or the solver will abort. - - NF - Output, INTEGER(IK) scalar. - NF will be set to the number of calls of CALCFC at exit. + SIZE(CONSTR0) must be NUM_CONSTRAINTS, or the solver will abort. RHOBEG, RHOEND - Inputs, REAL(RP) scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to + Default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to the initial and final values of a trust-region radius, both being positive and RHOEND <= RHOBEG. Typically RHOBEG should be about one tenth of the greatest expected change to a variable, and RHOEND should indicate the accuracy that is required in the final values of the variables. FTARGET - Input, REAL(RP) scalar, default: -Inf. + Default: -Inf. FTARGET is the target function value. The algorithm will terminate when a feasible point with a function value <= FTARGET is found. CTOL - Input, REAL(RP) scalar, default: machine epsilon. + Default: machine epsilon. CTOL is the tolerance of constraint violation. Any X with MAXVAL(-CONSTR(X)) <= CTOL is considered feasible. N.B.: 1. CTOL is absolute, not relative. 2. CTOL is used only when selecting the returned X. It does not affect the iterations of the algorithm. CWEIGHT - Input, REAL(RP) scalar, default: CWEIGHT_DFT defined in the module CONSTS_MOD in common/consts.F90. + Default: CWEIGHT_DFT defined in common/consts.py. CWEIGHT is the weight that the constraint violation takes in the selection of the returned X. MAXFUN - Input, INTEGER(IK) scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in the module - CONSTS_MOD (see common/consts.F90). MAXFUN is the maximal number of calls of CALCFC. + Default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in common/consts.py. + MAXFUN is the maximal number of calls of CALCFC. IPRINT - Input, INTEGER(IK) scalar, default: 0. + Default: 0. The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls how much information will be printed during the computation: 0: there will be no printing; @@ -134,18 +122,14 @@ def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, r Note that IPRINT = +/-3 can be costly in terms of time and/or space. ETA1, ETA2, GAMMA1, GAMMA2 - Input, REAL(RP) scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2. + Default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2. ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the trust-region radius - detailed in the subroutine TRRAD in trustregion.f90. Roughly speaking, the trust-region radius + detailed in the subroutine TRRAD in trustregion.py. Roughly speaking, the trust-region radius is contracted by a factor of GAMMA1 when the reduction ratio is below ETA1, and enlarged by a factor of GAMMA2 when the reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 < 1 and 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set ETA1 >= 0.5. XHIST, FHIST, CHIST, CONHIST, MAXHIST - XHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; - FHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; - CHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; - CONHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; MAXHIST: Input, INTEGER(IK) scalar, default: MAXFUN XHIST, if present, will output the history of iterates; FHIST, if present, will output the history function values; CHIST, if present, will output the history of constraint violations; @@ -169,8 +153,25 @@ def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, r the returned solution; default: MAXFILT_DFT (a value lower than MIN_MAXFILT is not recommended); see common/consts.F90 for the definitions of MAXFILT_DFT and MIN_MAXFILT. + Output: A Python dataclass with the following fields + X + X will be set to the best vector of variables found. + + F + F will be set to the objective function value at X. + + CSTRV + Optional, only appears if CSTRV is True. CSTRV will be set to the constraint violation of X at + exit, i.e., MAXVAL([-CONSTR(X), 0]). + + CONSTR + Optional, only appears if CONSTR is True. CONSTR will be set to the constraint value of X at + exit. + + NF + NF will be set to the number of calls of CALCFC at exit. + INFO - Output, INTEGER(IK) scalar. INFO is the exit flag. It will be set to one of the following values defined in the module INFOS_MOD (see common/infos.f90): SMALL_TR_RADIUS: the lower bound for the trust region radius is reached; @@ -186,75 +187,81 @@ def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, r TRSUBP_FAILED: a trust region step failed to reduce the model !--------------------------------------------------------------------------! """ - if debugging: - # TODO: need to check how to figure out if python variables are present - # I guess the way to go is to set the default to None + + # Local variables + solver = 'COBYLA' + + # Preconditions + if DEBUGGING: assert (f0 is None) == (constr0 is None), "f0 and constr0 must be both present or both absent" - n = len(x) + # Sizes + num_vars = len(x) - assert len(constr0) == m if constr0 is not None else True, "len(constr0) != m" + # Exit if constr0 is not consistent with num_constraints + if present(constr0): + assert np.size(constr0) == num_constraints - constr_loc = np.zeros(m) - if f0 is not None and constr0 is not None and all(np.isfinite(x)): - f = f0 - constr_loc = constr0 - else: - # Replace all NaNs with 0s and clip all -inf/inf to -floatmax/floatmax - np.nan_to_num(x, copy=False, neginf=-np.finfo(float).max, posinf=np.finfo(float).max) - f, constr_loc, cstrv_loc = evaluate(calcfc, x) + + # If the user provides the function & constraint value at X0, then use them. + # Otherwise, set [F0, CONSTR0] = [F(X0), CONSTR(X0)], so that COBYLB only needs a single interface. + if (f0 is None and constr0 is None) or not all(np.isfinite(x)): + # Replace any NaN in X by ZERO and Inf/-Inf in X by REALMAX/-REALMAX. + x = moderatex(x) + f0, constr0, cstrv0 = evaluate(calcfc, x) - if rhobeg is not None: - rhobeg_loc = rhobeg - elif rhoend is not None and np.isfinite(rhoend) and rhoend > 0: - rhobeg_loc = max(10*rhoend, RHOBEG_DEFAULT) + if present(rhobeg): + rhobeg = rhobeg + elif present(rhoend) and np.isfinite(rhoend) and rhoend > 0: + rhobeg = np.maximum(10*rhoend, RHOBEG_DEFAULT) else: - rhobeg_loc = RHOBEG_DEFAULT + rhobeg = RHOBEG_DEFAULT - if rhoend is not None: - rhoend_loc = rhoend - elif rhobeg_loc > 0: - rhoend_loc = max(EPS, min(0.1*rhobeg_loc, RHOEND_DEFAULT)) + if present(rhoend): + rhoend = rhoend + elif rhobeg > 0: + rhoend = np.maximum(EPS, np.minimum(0.1*rhobeg, RHOEND_DEFAULT)) else: - rhoend_loc = RHOEND_DEFAULT + rhoend = RHOEND_DEFAULT - maxfun = maxfun if maxfun is not None else MAXFUN_DIM_DEFAULT*n + maxfun = maxfun if present(maxfun) else MAXFUN_DIM_DEFAULT*num_vars - if eta1 is not None: - eta1_loc = eta1 - elif eta2 is not None and 0 < eta2 < 1: - eta1_loc = max(EPS, eta2/7) + if present(eta1): + eta1 = eta1 + elif present(eta2) and 0 < eta2 < 1: + eta1 = np.maximum(EPS, eta2/7) else: - eta1_loc = 0.1 + eta1 = 0.1 - if eta2 is not None: - eta2_loc = eta2 + if present(eta2): + eta2 = eta2 else: - if 0 < eta1_loc < 1: - eta2_loc = (eta1_loc + 2)/3 + if 0 < eta1 < 1: + eta2 = (eta1 + 2)/3 else: - eta2_loc = 0.7 + eta2 = 0.7 - if maxhist is not None: - maxhist_loc = maxhist - else: - maxhist_loc = max(maxfun, n+2, MAXFUN_DIM_DEFAULT*n) + maxhist = maxhist if present(maxhist) else np.max([maxfun, num_vars+2, MAXFUN_DIM_DEFAULT*num_vars]) - # preprocess + # Preprocess the inputs in case some of them are invalid. It does nothing if all inputs are valid. + # Python: The underscores refer to `npt` and `x0`, neither of which are relevant here. + iprint, maxfun, maxhist, ftarget, rhobeg, rhoend, _, maxfilt, ctol, cweight, \ + eta1, eta2, gamma1, gamma2, _ = preproc(solver, num_vars, iprint, maxfun, maxhist, \ + ftarget, rhobeg, rhoend, num_constraints=num_constraints, ctol=ctol, cweight=cweight, \ + eta1=eta1, eta2=eta2, gamma1=gamma1, gamma2=gamma2, maxfilt=maxfilt) - ''' - ! Further revise MAXHIST_LOC according to MAXHISTMEM, and allocate memory for the history. - ! In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize - ! CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN) - ! if they are requested; replace MAXFUN with 0 for the history that is not requested. - call prehist(maxhist_loc, n, present(xhist), xhist_loc, present(fhist), fhist_loc, & - & present(chist), chist_loc, m, present(conhist), conhist_loc) - ''' + + # Further revise MAXHIST according to MAXHISTMEM, and allocate memory for the history. + # In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize + # CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN) + # if they are requested; replace MAXFUN with 0 for the history that is not requested. + # prehist(maxhist, num_vars, present(xhist), xhist_loc, present(fhist), fhist_loc, & + # & present(chist), chist_loc, m, present(conhist), conhist_loc) + # call coblyb, which performs the real calculations - x, f, nf, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1_loc, eta2_loc, - ftarget, gamma1, gamma2, rhobeg_loc, rhoend_loc, constr_loc, f, x, - cstrv_loc) + x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, + ftarget, gamma1, gamma2, rhobeg, rhoend, constr0, f0, x, + cstrv0, maxhist) - # Write the outputs. - return x, f \ No newline at end of file + return COBYLAResult(x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info) diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py index 29109171b2..d37d028ee4 100644 --- a/python/src/prima/cobyla/cobylb.py +++ b/python/src/prima/cobyla/cobylb.py @@ -1,19 +1,30 @@ import numpy as np -from prima.common.consts import INFO_DEFAULT, REALMAX, EPS, CONSTRMAX, \ - MAXTR_REACHED, DAMAGING_ROUNDING, SMALL_TR_RADIUS, \ - NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED -from prima.common.evaluate import evaluate, moderatec, moderatef from prima.common.checkbreak import checkbreak_con +from prima.common.consts import INFO_DEFAULT, REALMAX, EPS, MAXTR_REACHED, \ + DAMAGING_ROUNDING, SMALL_TR_RADIUS, DEBUGGING, MIN_MAXFILT +from prima.common.evaluate import evaluate +from prima.common.history import savehist +from prima.common.redrho import redrho +from prima.common.selectx import savefilt, selectx from prima.cobyla.update import updatepole, findpole, updatexfc from prima.cobyla.geometry import assess_geo, setdrop_tr, geostep, setdrop_geo -from prima.common.selectx import savefilt , selectx from prima.cobyla.trustregion import trstlp, redrat, trrad +from prima.cobyla.initialize import initxfc, initfilt def fcratio(conmat, fval): ''' This function calculates the ratio between the "typical changre" of F and that of CONSTR. See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio. ''' + + # Preconditions + assert np.size(fval) >= 1 + assert np.size(conmat, 1) == np.size(fval) + + #====================# + # Calculation starts # + #====================# + cmin = np.min(conmat, axis=1) cmax = np.max(conmat, axis=1) fmin = min(fval) @@ -27,195 +38,30 @@ def fcratio(conmat, fval): else: r = 0 - return r - -def redrho(rho, rhoend): - ''' - This function calculates RHO when it needs to be reduced. - The sceme is shared by UOBYQA, NEWUOA, BOBYQA, LINCOA. For COBYLA, Powell's code reduces RHO by - 'RHO *= 0.5; if RHO <= 1.5 * RHOEND: RHO = RHOEND' as specified in (11) of the COBYLA - paper. However, this scheme seems to work better, especially after we introduce DELTA. - ''' - - rho_ratio = rho / rhoend - - if rho_ratio > 250: - rho *= 0.1 - elif rho_ratio <= 16: - rho = rhoend - else: - rho = np.sqrt(rho_ratio) * rhoend # rho = np.sqrt(rho * rhoend) - - return rho - - -def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, - conmat, cval, srname): - ''' - This subroutine does the initialization concerning X, function values, and constraints. - ''' - - # Sizes - num_constraints = conmat.shape[0] - num_vars = x0.size - - # Preconditions - assert num_constraints >= 0, f'M >= 0 {srname}' - assert num_vars >= 1, f'N >= 1 {srname}' - assert abs(iprint) <= 3, f'IPRINT is 0, 1, -1, 2, -2, 3, or -3 {srname}' - assert conmat.shape == (num_constraints , num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' - assert cval.size == num_vars + 1, f'CVAL.size == N+1 {srname}' - # assert maxchist * (maxchist - maxhist) == 0, f'CHIST.shape == 0 or MAXHIST {srname}' - # assert conhist.shape[0] == num_constraints and maxconhist * (maxconhist - maxhist) == 0, 'CONHIST.shape[0] == num_constraints, SIZE(CONHIST, 2) == 0 or MAXHIST {srname)}' - # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' - # assert xhist.shape[0] == num_vars and maxxhist * (maxxhist - maxhist) == 0, 'XHIST.shape[0] == N, SIZE(XHIST, 2) == 0 or MAXHIST {srname)}' - assert all(np.isfinite(x0)), f'X0 is finite {srname}' - assert rhobeg > 0, f'RHOBEG > 0 {srname}' - - #====================# - # Calculation starts # - #====================# - - # Initialize info to the default value. At return, a value different from this value will - # indicate an abnormal return - info = INFO_DEFAULT - - # Initialize the simplex. It will be revised during the initialization. - sim = np.eye(num_vars, num_vars+1) * rhobeg - sim[:, num_vars] = x0 - - # evaluated[j] = True iff the function/constraint of SIM[:, j] has been evaluated. - evaluated = np.zeros(num_vars+1, dtype=bool) - - # Initialize fval - fval = np.zeros(num_vars+1) + REALMAX - - for k in range(num_vars + 1): - x = sim[:, num_vars].copy() - # We will evaluate F corresponding to SIM(:, J). - if k == 0: - j = num_vars - f = moderatef(f0) - constr = moderatec(constr0) - cstrv = np.max(np.append(-constr, 0)) - else: - j = k - 1 - x[j] += rhobeg - f, constr, cstrv = evaluate(calcfc, x) - - # Print a message about the function/constraint evaluation according to IPRINT. - # TODO: Finish implementing fmsg, if decided that its worth it - # fmsg(solver, iprint, k, f, x, cstrv, constr) - - # Save X, F, CONSTR, CSTRV into the history. - # TODO: Implement history (maybe we need it for the iterations?) - # savehist(k, x, xhist, f, fhist, cstrv, chist, constr, conhist) - - # Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively. - evaluated[j] = True - fval[j] = f - conmat[:, j] = constr - cval[j] = cstrv - - # Check whether to exit. - subinfo = checkbreak_con(maxfun, k, cstrv, ctol, f, ftarget, x) - if subinfo != INFO_DEFAULT: - info = subinfo - break - - # Exchange the new vertex of the initial simplex with the optimal vertex if necessary. - # This is the ONLY part that is essentially non-parallel. - if j < num_vars and fval[j] < fval[num_vars]: - fval[j], fval[num_vars] = fval[num_vars], fval[j] - cval[j], cval[num_vars] = cval[num_vars], cval[j] - conmat[:, [j, num_vars]] = conmat[:, [num_vars, j]] - sim[:, num_vars] = x - sim[j, :j+1] = -rhobeg # SIM[:, :j+1] is lower triangular - - nf = np.count_nonzero(evaluated) - - if evaluated.all(): - # Initialize SIMI to the inverse of SIM[:, :num_vars] - simi = np.linalg.inv(sim[:, :num_vars]) - #==================# # Calculation ends # #==================# # Postconditions - assert nf <= maxfun, f'NF <= MAXFUN {srname}' - assert evaluated.size == num_vars + 1, f'EVALUATED.size == Num_vars + 1 {srname}' - # assert chist.size == maxchist, f'CHIST.size == MAXCHIST {srname}' - # assert conhist.shape== (num_constraints, maxconhist), f'CONHIST.shape == [M, MAXCONHIST] {srname}' - assert conmat.shape == (num_constraints, num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' - assert not (np.isnan(conmat).any() or np.isneginf(conmat).any()), f'CONMAT does not contain NaN/-Inf {srname}' - assert cval.size == num_vars + 1 and not (any(cval < 0) or any(np.isnan(cval)) or any(np.isposinf(cval))), f'CVAL.shape == Num_vars+1 and CVAL does not contain negative values or NaN/+Inf {srname}' - # assert fhist.shape == maxfhist, f'FHIST.shape == MAXFHIST {srname}' - # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' - assert fval.size == num_vars + 1 and not (any(np.isnan(fval)) or any(np.isposinf(fval))), f'FVAL.shape == Num_vars+1 and FVAL is not NaN/+Inf {srname}' - # assert xhist.shape == (num_vars, maxxhist), f'XHIST.shape == [N, MAXXHIST] {srname}' - assert sim.shape == (num_vars, num_vars + 1), f'SIM.shape == [N, N+1] {srname}' - assert np.isfinite(sim).all(), f'SIM is finite {srname}' - assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0), f'SIM(:, 1:N) has no zero column {srname}' - assert simi.shape == (num_vars, num_vars), f'SIMI.shape == [N, N] {srname}' - assert np.isfinite(simi).all(), f'SIMI is finite {srname}' - assert np.allclose(sim[:, :num_vars] @ simi, np.eye(num_vars), rtol=0.1, atol=0.1) or not all(evaluated), f'SIMI = SIM(:, 1:N)^{-1} {srname}' - - return evaluated, sim, simi, fval, nf, info - - -def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt): - ''' - This function initializes the filter (XFILT, etc) that will be used when selecting - x at the end of the solver. - N.B.: - 1. Why not initialize the filters using XHIST, etc? Because the history is empty if - the user chooses not to output it. - 2. We decouple INITXFC and INITFILT so that it is easier to parallelize the former - if needed. - ''' - - # Sizes - num_constraints = conmat.shape[0] - num_vars = sim.shape[0] - maxfilt = len(ffilt) - - # Precondictions - assert num_constraints >= 0 - assert num_vars >= 1 - assert maxfilt >= 1 - assert confilt.shape == (num_constraints, maxfilt) - assert cfilt.shape == (maxfilt,) - assert xfilt.shape == (num_vars, maxfilt) - assert ffilt.shape == (maxfilt,) - assert conmat.shape == (num_constraints, num_vars+1) - # TODO: Need to finish these preconditions - - - nfilt = 0 - for i in range(num_vars+1): - if evaluated[i]: - if i < num_vars: - x = sim[:, i] + sim[:, num_vars] - else: - x = sim[:, i] # i == num_vars, i.e. the last column - nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt) + assert r >= 0 - - return nfilt + return r def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, gamma1, gamma2, rhobeg, rhoend, constr, f, x, - cstrv): + cstrv, maxhist): ''' This subroutine performs the actual computations of COBYLA. ''' - # A[:, :num_constraints] contains the approximate gradient for the constraints, and A[:, num_constraints] is minus the - # approximate gradient for the objective function. - A = np.zeros((len(x), len(constr) + 1)) - + # Outputs + xhist = [] + fhist = [] + chist = [] + conhist = [] + + # Local variables # CPENMIN is the minimum of the penalty parameter CPEN for the L-infinity constraint violation in # the merit function. Note that CPENMIN = 0 in Powell's implementation, which allows CPEN to be 0. # Here, we take CPENMIN > 0 so that CPEN is always positive. This avoids the situation where PREREM @@ -237,35 +83,52 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, factor_beta = 2.1 # The factor beta in the COBYLA paper factor_delta = 1.1 # The factor delta in the COBYLA paper factor_gamma = 0.5 # The factor gamma in the COBYLA paper - num_constraints = len(constr) - num_vars = len(x) - # maxxhist = xhist.shape[1] - # maxfhist = len(fhist) - # maxconhist = conhist.shape[1] - # maxchist = len(chist) - # maxhist = max(maxxhist, maxfhist, maxconhist, maxchist) - conmat = np.zeros((num_constraints, num_vars+1)) - REALMAX - cval = np.zeros(num_vars+1) + REALMAX + + # Sizes + num_constraints = np.size(constr) + num_vars = np.size(x) + + # Preconditions + if DEBUGGING: + assert abs(iprint) <= 3 + assert num_constraints >= 0 + assert num_vars >= 1 + assert maxfun >= num_vars + 2 + assert rhobeg >= rhoend and rhoend > 0 + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + assert gamma1 > 0 and gamma1 < 1 and gamma2 > 1 + assert ctol >= 0 + assert cweight >= 0 + assert maxhist >= 0 and maxhist <= maxfun + assert maxfilt >= min(MIN_MAXFILT, maxfun) and maxfilt <= maxfun + assert factor_alpha > 0 and factor_alpha < factor_gamma and factor_gamma < 1 + assert factor_beta >= factor_delta and factor_delta > 1 + + #====================# + # Calculation starts # + #====================# - subinfo = 0 - evaluated, sim, simi, fval, nf, subinfo = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, - conmat, cval, srname="COBYLA") - # TODO: Need to initialize history with f and x and constr + # Initialize SIM, FVAL, CONMAT, and CVAL, together with the history. + # After the initialization, SIM[:, NUM_VARS] holds the vertex of the initial simplex with the smallest + # function value (regardless of the constraint violation), and SIM[:, :NUM_VARS] holds the displacements + # from the other vertices to SIM[:, NUM_VARS]. FVAL, CONMAT, and CVAL hold the function values, + # constraint values, and constraint violations on the vertices in the order corresponding to SIM. + evaluated, conmat, cval, sim, simi, fval, nf, subinfo = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, + srname="COBYLA") # Initialize the filter, including xfilt, ffilt, confiolt, cfilt, and nfilt. # N.B.: The filter is used only when selecting which iterate to return. It does not # interfere with the iterations. COBYLA is NOT a filter method but a trust-region method # based on an L-inifinity merit function. Powell's implementation does not use a # filter to select the iterate, possibly returning a suboptimal iterate. - cfilt = np.zeros(min(max(maxfilt, 1), maxfun)) - confilt = np.zeros((len(constr), len(cfilt))) - ffilt = np.zeros(len(cfilt)) - xfilt = np.zeros((len(x), len(cfilt))) + cfilt = np.zeros(np.minimum(np.maximum(maxfilt, 1), maxfun)) + confilt = np.zeros((np.size(constr), np.size(cfilt))) + ffilt = np.zeros(np.size(cfilt)) + xfilt = np.zeros((np.size(x), np.size(cfilt))) nfilt = initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt) # TODO: Initfilt and savefilt appear to be working. It did not get tested with a rising edge in the # keep array (i.e. a False followed by a True), but it looks like that shouldn't be a problem. - # For now we should move on to the rest of the algorithm so that we can update the PR with a milestone. # Check whether to return due to abnormal cases that may occur during the initialization. if subinfo != INFO_DEFAULT: @@ -362,9 +225,15 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # Calculate the linear approximations to the objective and constraint functions, placing # minus the objective function gradient after the constraint gradients in the array A. - # N.B.: TRSTLP accesses A mostly by colums, so it is more reasonable to save A instead of A.T - A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T - A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + # N.B.: TRSTLP accesses A mostly by columns, so it is more reasonable to save A instead of A.T + A = np.hstack(( + # A[:, :num_constraints] contains the approximate gradient for the constraints, + ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T, + # and A[:, num_constraints] is minus the approximate gradient for the objective function. + ((fval[num_vars] - fval[:num_vars])@simi).reshape(num_vars, 1) + )) + + # Theoretically, but not numerically, the last entry of b does not affect the result of TRSTLP # We set it to -fval[num_vars] following Powell's code. b = np.hstack((-conmat[:, num_vars], -fval[num_vars])) @@ -413,8 +282,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # TODO: Implement me # fmsg(solver, iprint, nf, f, x, cstrv, constr) # Save X, F, CONSTR, CSTRV into the history. - # TODO: Implement me - # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist) + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) # Save X, F, CONSTR, CSTRV into the filter. nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) @@ -576,8 +444,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # TODO: Implement me # fmsg(solver, iprint, nf, f, x, cstrv, constr) # Save X, F, CONSTR, CSTRV into the history. - # TODO: Implement me - # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist) + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) # Save X, F, CONSTR, CSTRV into the filter. nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) @@ -622,7 +489,8 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, x = sim[:, num_vars] + d f, constr, cstrv = evaluate(calcfc, x) nf += 1 - # TODO: msg, savehist + # TODO: retmsg + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) # Return the best calculated values of the variables @@ -640,7 +508,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # Print a return message according to IPRINT. # TODO: Implement me #call retmsg(solver, info, iprint, nf, f, x, cstrv, constr) - return x, f, nf, info + return x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info diff --git a/python/src/prima/cobyla/initialize.py b/python/src/prima/cobyla/initialize.py new file mode 100644 index 0000000000..c9cf1f9804 --- /dev/null +++ b/python/src/prima/cobyla/initialize.py @@ -0,0 +1,163 @@ +from prima.common.checkbreak import checkbreak_con +from prima.common.consts import INFO_DEFAULT, REALMAX +from prima.common.evaluate import evaluate, moderatec, moderatef +from prima.common.selectx import savefilt + +import numpy as np + +def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, srname): + ''' + This subroutine does the initialization concerning X, function values, and constraints. + ''' + + # Sizes + num_constraints = np.size(constr0) + num_vars = np.size(x0) + + # Preconditions + assert num_constraints >= 0, f'M >= 0 {srname}' + assert num_vars >= 1, f'N >= 1 {srname}' + assert abs(iprint) <= 3, f'IPRINT is 0, 1, -1, 2, -2, 3, or -3 {srname}' + # assert conmat.shape == (num_constraints , num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + # assert cval.size == num_vars + 1, f'CVAL.size == N+1 {srname}' + # assert maxchist * (maxchist - maxhist) == 0, f'CHIST.shape == 0 or MAXHIST {srname}' + # assert conhist.shape[0] == num_constraints and maxconhist * (maxconhist - maxhist) == 0, 'CONHIST.shape[0] == num_constraints, SIZE(CONHIST, 2) == 0 or MAXHIST {srname)}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + # assert xhist.shape[0] == num_vars and maxxhist * (maxxhist - maxhist) == 0, 'XHIST.shape[0] == N, SIZE(XHIST, 2) == 0 or MAXHIST {srname)}' + assert all(np.isfinite(x0)), f'X0 is finite {srname}' + assert rhobeg > 0, f'RHOBEG > 0 {srname}' + + #====================# + # Calculation starts # + #====================# + + # Initialize info to the default value. At return, a value different from this value will + # indicate an abnormal return + info = INFO_DEFAULT + + # Initialize the simplex. It will be revised during the initialization. + sim = np.eye(num_vars, num_vars+1) * rhobeg + sim[:, num_vars] = x0 + + # evaluated[j] = True iff the function/constraint of SIM[:, j] has been evaluated. + evaluated = np.zeros(num_vars+1, dtype=bool) + + # Initialize fval + fval = np.zeros(num_vars+1) + REALMAX + conmat = np.zeros((num_constraints, num_vars+1)) - REALMAX + cval = np.zeros(num_vars+1) + REALMAX + + + for k in range(num_vars + 1): + x = sim[:, num_vars].copy() + # We will evaluate F corresponding to SIM(:, J). + if k == 0: + j = num_vars + f = moderatef(f0) + constr = moderatec(constr0) + cstrv = np.max(np.append(-constr, 0)) + else: + j = k - 1 + x[j] += rhobeg + f, constr, cstrv = evaluate(calcfc, x) + + # Print a message about the function/constraint evaluation according to IPRINT. + # TODO: Finish implementing fmsg, if decided that its worth it + # fmsg(solver, iprint, k, f, x, cstrv, constr) + + # Save X, F, CONSTR, CSTRV into the history. + # TODO: Implement history (maybe we need it for the iterations?) + # savehist(k, x, xhist, f, fhist, cstrv, chist, constr, conhist) + + # Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively. + evaluated[j] = True + fval[j] = f + conmat[:, j] = constr + cval[j] = cstrv + + # Check whether to exit. + subinfo = checkbreak_con(maxfun, k, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + + # Exchange the new vertex of the initial simplex with the optimal vertex if necessary. + # This is the ONLY part that is essentially non-parallel. + if j < num_vars and fval[j] < fval[num_vars]: + fval[j], fval[num_vars] = fval[num_vars], fval[j] + cval[j], cval[num_vars] = cval[num_vars], cval[j] + conmat[:, [j, num_vars]] = conmat[:, [num_vars, j]] + sim[:, num_vars] = x + sim[j, :j+1] = -rhobeg # SIM[:, :j+1] is lower triangular + + nf = np.count_nonzero(evaluated) + + if evaluated.all(): + # Initialize SIMI to the inverse of SIM[:, :num_vars] + simi = np.linalg.inv(sim[:, :num_vars]) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + assert nf <= maxfun, f'NF <= MAXFUN {srname}' + assert evaluated.size == num_vars + 1, f'EVALUATED.size == Num_vars + 1 {srname}' + # assert chist.size == maxchist, f'CHIST.size == MAXCHIST {srname}' + # assert conhist.shape== (num_constraints, maxconhist), f'CONHIST.shape == [M, MAXCONHIST] {srname}' + assert conmat.shape == (num_constraints, num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + assert not (np.isnan(conmat).any() or np.isneginf(conmat).any()), f'CONMAT does not contain NaN/-Inf {srname}' + assert cval.size == num_vars + 1 and not (any(cval < 0) or any(np.isnan(cval)) or any(np.isposinf(cval))), f'CVAL.shape == Num_vars+1 and CVAL does not contain negative values or NaN/+Inf {srname}' + # assert fhist.shape == maxfhist, f'FHIST.shape == MAXFHIST {srname}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + assert fval.size == num_vars + 1 and not (any(np.isnan(fval)) or any(np.isposinf(fval))), f'FVAL.shape == Num_vars+1 and FVAL is not NaN/+Inf {srname}' + # assert xhist.shape == (num_vars, maxxhist), f'XHIST.shape == [N, MAXXHIST] {srname}' + assert sim.shape == (num_vars, num_vars + 1), f'SIM.shape == [N, N+1] {srname}' + assert np.isfinite(sim).all(), f'SIM is finite {srname}' + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0), f'SIM(:, 1:N) has no zero column {srname}' + assert simi.shape == (num_vars, num_vars), f'SIMI.shape == [N, N] {srname}' + assert np.isfinite(simi).all(), f'SIMI is finite {srname}' + assert np.allclose(sim[:, :num_vars] @ simi, np.eye(num_vars), rtol=0.1, atol=0.1) or not all(evaluated), f'SIMI = SIM(:, 1:N)^{-1} {srname}' + + return evaluated, conmat, cval, sim, simi, fval, nf, info + + +def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt): + ''' + This function initializes the filter (XFILT, etc) that will be used when selecting + x at the end of the solver. + N.B.: + 1. Why not initialize the filters using XHIST, etc? Because the history is empty if + the user chooses not to output it. + 2. We decouple INITXFC and INITFILT so that it is easier to parallelize the former + if needed. + ''' + + # Sizes + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + maxfilt = len(ffilt) + + # Precondictions + assert num_constraints >= 0 + assert num_vars >= 1 + assert maxfilt >= 1 + assert confilt.shape == (num_constraints, maxfilt) + assert cfilt.shape == (maxfilt,) + assert xfilt.shape == (num_vars, maxfilt) + assert ffilt.shape == (maxfilt,) + assert conmat.shape == (num_constraints, num_vars+1) + # TODO: Need to finish these preconditions + + + nfilt = 0 + for i in range(num_vars+1): + if evaluated[i]: + if i < num_vars: + x = sim[:, i] + sim[:, num_vars] + else: + x = sim[:, i] # i == num_vars, i.e. the last column + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt) + + + return nfilt \ No newline at end of file diff --git a/python/src/prima/common/consts.py b/python/src/prima/common/consts.py index 5b41d77a70..6e73999abd 100644 --- a/python/src/prima/common/consts.py +++ b/python/src/prima/common/consts.py @@ -1,17 +1,15 @@ import numpy as np +import os + +# TODO: Reset this to False when making PR +DEBUGGING = True or bool(os.getenv('PRIMA_DEBUGGING')) + REALMIN = np.finfo(float).tiny REALMAX = np.finfo(float).max -RHOBEG_DEFAULT = 1 -RHOEND_DEFAULT = 1e-6 FUNCMAX = 2.0**100 CONSTRMAX = FUNCMAX EPS = np.finfo(float).eps -CTOL_DEFAULT = np.sqrt(EPS) -CWEIGHT_DEFAULT = 1e8 -FTARGET_DEFAULT = -REALMAX -IPRINT_DEFAULT = False -MAXFUN_DIM_DEFAULT = 500 INFO_DEFAULT = 0 SMALL_TR_RADIUS = 0 # Why is this same as default? MAXTR_REACHED = 20 @@ -19,4 +17,28 @@ NAN_INF_X = -1 NAN_INF_F = -2 FTARGET_ACHIEVED = 1 -MAXFUN_REACHED = 3 \ No newline at end of file +MAXFUN_REACHED = 3 + +# Some default values +RHOBEG_DEFAULT = 1 +RHOEND_DEFAULT = 1e-6 +FTARGET_DEFAULT = -REALMAX +CTOL_DEFAULT = np.sqrt(EPS) +CWEIGHT_DEFAULT = 1e8 +ETA1_DEFAULT = 0.1 +ETA2_DEFAULT = 0.7 +GAMMA1_DEFAULT = 0.5 +GAMMA2_DEFAULT = 2 +IPRINT_DEFAULT = 0 +MAXFUN_DIM_DEFAULT = 500 + +PRIMA_MAX_HIST_MEM_MB = 300 # 1MB > 10^5*REAL64. 100 can be too small. + +# Maximal amount of memory (Byte) allowed for XHIST, FHIST, CONHIST, CHIST, and the filters. +MHM = PRIMA_MAX_HIST_MEM_MB * 10**6 +# Make sure that MAXHISTMEM does not exceed HUGE(0) to avoid overflow and memory errors. +MAXHISTMEM = min(MHM, np.iinfo(np.int32).max) + +# Maximal length of the filter used in constrained solvers. +MIN_MAXFILT = 200 # Should be positive; < 200 is not recommended. +MAXFILT_DEFAULT = 10 * MIN_MAXFILT \ No newline at end of file diff --git a/python/src/prima/common/evaluate.py b/python/src/prima/common/evaluate.py index d16cabf419..4e0e81e4d5 100644 --- a/python/src/prima/common/evaluate.py +++ b/python/src/prima/common/evaluate.py @@ -1,14 +1,18 @@ import numpy as np -from prima.common.consts import FUNCMAX, CONSTRMAX +from prima.common.consts import FUNCMAX, CONSTRMAX, REALMAX, DEBUGGING # This is a module evaluating the objective/constraint function with # Nan/Inf handling. def moderatex(x): - np.nan_to_num(x, copy=False, nan=FUNCMAX) - x = np.clip(x, -np.finfo(float).max, np.finfo(float).max) - return x + ''' + This function moderates a decision variable. It replaces NaN by 0 and Inf/-Inf by REALMAX/-REALMAX. + ''' + y = x + y[np.isnan(x)] = 0 + y = np.maximum(-REALMAX, np.minimum(REALMAX, y)) + return y def moderatef(f): f = FUNCMAX if np.isnan(f) else f @@ -20,17 +24,48 @@ def moderatec(c): return c -def evaluate(calfun, x): +def evaluate(calcfc, x): """ - This function evaluates calfun at x, setting to f the objective function value. - Nan/Inf are handled by a moderated extreme barrier + This function evaluates CALCFC at X, setting F to the objective function value, CONSTR to the + constraint value, and CSTRV to the constraint violation. Nan/Inf are handled by a moderated + extreme barrier. """ - assert not any(np.isnan(x)), "x contains NaN" + # Preconditions + if DEBUGGING: + # X should not contain NaN if the initial X does not contain NaN and the subroutines generating + # trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf. + assert not any(np.isnan(x)) - f, constr = calfun(moderatex(x)) - f = moderatef(f) - constr = moderatec(constr) - cstrv = max([*-constr, 0]) + #====================# + # Calculation starts # + #====================# + + if any(np.isnan(x)): + f = np.sum(x) + constr = f # TODO: This is supposed to be an array, but I don't know how long it is here. + cstrv = f + else: + f, constr = calcfc(moderatex(x)) # Evaluate F and CONSTR; We moderate X before doing so. + + # Moderated extreme barrier: replace NaN/huge objective or constraint values with a large but + # finite value. This is naive, and better approaches surely exist. + f = moderatef(f) + constr = moderatec(constr) + + # Evaluate the constraint violation for constraints CONSTR(X) >= 0. + cstrv = np.max(np.append(-constr, 0)) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + # With X not containing NaN, and with the moderated extreme barrier, F cannot be NaN/+Inf, and + # CONSTR cannot be NaN/-Inf. + assert not (np.isnan(f) or np.isposinf(f)) + assert not any(np.isnan(constr) | np.isneginf(constr)) + assert not (cstrv < 0 or np.isnan(cstrv) or np.isposinf(cstrv)) return f, constr, cstrv \ No newline at end of file diff --git a/python/src/prima/common/history.py b/python/src/prima/common/history.py new file mode 100644 index 0000000000..743536bbe7 --- /dev/null +++ b/python/src/prima/common/history.py @@ -0,0 +1,17 @@ +def savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist): + ''' + Save the data values to the history lists. + + The implementation of this function is vastly different from the Fortran implementation. + This is mostly due to the ease of creating and appending to lists in Python + + However just like the Fortran version we should be concerned about both performance + and memory constraints. It will probably be better to initialize an array of NaN for + each of the histories and keep track of how many indices we have stored. Not needed for + the moment. + ''' + if len(xhist) < maxhist: + xhist.append(x) + fhist.append(f) + chist.append(cstrv) + conhist.append(constr) \ No newline at end of file diff --git a/python/src/prima/common/preproc.py b/python/src/prima/common/preproc.py new file mode 100644 index 0000000000..c92844b554 --- /dev/null +++ b/python/src/prima/common/preproc.py @@ -0,0 +1,267 @@ +from prima.common.consts import DEBUGGING, EPS, IPRINT_DEFAULT, FTARGET_DEFAULT, \ + MIN_MAXFILT, MAXFILT_DEFAULT, MAXHISTMEM, ETA1_DEFAULT, ETA2_DEFAULT, \ + GAMMA1_DEFAULT, GAMMA2_DEFAULT, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ + CTOL_DEFAULT, CWEIGHT_DEFAULT +from prima.common.present import present +from warnings import warn +import numpy as np + + +def preproc(solver, num_vars, iprint, maxfun, maxhist, ftarget, rhobeg, rhoend, + num_constraints=None, npt=None, maxfilt=None, ctol=None, cweight=None, + eta1=None, eta2=None, gamma1=None, gamma2=None, is_constrained=None, has_rhobeg=None, + honour_x0=None, xl=None, xu=None, x0=None): + ''' + This subroutine preprocesses the inputs. It does nothing to the inputs that are valid. + ''' + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + if present(num_constraints): + assert num_constraints >= 0 + assert num_constraints == 0 or solver.lower() == 'cobyla' + if solver.lower() == 'cobyla' and present(num_constraints) and present(is_constrained): + assert num_constraints == 0 or is_constrained + if solver.lower() == 'bobyqa': + assert present(xl) and present(xu) + assert all(xu - xl >= 2 * EPS) + if present(honour_x0): + assert solver.lower() == 'bobyqa' and present(has_rhobeg) and present(xl) and present(xu) and present(x0) + + #====================# + # Calculation starts # + #====================# + + # Read num_constraints, if necessary + num_constraints = num_constraints if (present(num_constraints) and solver.lower() == 'cobyla') else 0 + + # Decide whether the problem is truly constrained + is_constrained = is_constrained if (present(is_constrained)) else num_constraints > 0 + + # Validate IPRINT + if np.abs(iprint) > 3: + iprint = IPRINT_DEFAULT + warn(f'{solver}: Invalid IPRINT; it should be 0, 1, -1, 2, -2, 3, or -3; it is set to {iprint}') + + # Validate MAXFUN + if solver.lower() == 'uobyqa': + min_maxfun = (num_vars + 1) * (num_vars + 2) / 2 + 1 + min_maxfun_str = '(N+1)(N+2)/2 + 1' + elif solver.lower() == 'cobyla': + min_maxfun = num_vars + 2 + min_maxfun_str = 'num_vars + 2' + else: # CASE ('NEWUOA', 'BOBYQA', 'LINCOA') + min_maxfun = num_vars + 3 + min_maxfun_str = 'num_vars + 3' + if maxfun < min_maxfun: + maxfun = min_maxfun + warn(f'{solver}: Invalid MAXFUN; it should be at least {min_maxfun_str}; it is set to {maxfun}') + + # Validate MAXHIST + if maxhist < 0: + maxhist = maxfun + warn(f'{solver}: Invalid MAXHIST; it should be a nonnegative integer; it is set to {maxhist}') + maxhist = min(maxhist, maxfun) # MAXHIST > MAXFUN is never needed. + + # Validate FTARGET + if np.isnan(ftarget): + ftarget = FTARGET_DEFAULT + warn(f'{solver}: Invalid FTARGET; it should be a real number; it is set to {ftarget}') + + # Validate NPT + if (solver.lower() == 'newuoa' or solver.lower() == 'bobyqa' or solver.lower() == 'lincoa') and present(npt): + if (npt < num_vars + 2 or npt > min(maxfun - 1, ((num_vars + 2) * (num_vars + 1)) / 2)): + npt = int(min(maxfun - 1, 2 * num_vars + 1)) + warn(f'{solver}: Invalid NPT; it should be an integer in the interval [N+2, (N+1)(N+2)/2] and less than MAXFUN; it is set to {npt}') + + # Validate MAXFILT + if present(maxfilt) and (solver.lower() == 'lincoa' or solver.lower() == 'cobyla'): + maxfilt_in = maxfilt + if maxfilt < 1: + maxfilt = MAXFILT_DEFAULT # The inputted MAXFILT is obviously wrong. + else: + maxfilt = max(MIN_MAXFILT, maxfilt) # The inputted MAXFILT is too small. + # Further revise MAXFILT according to MAXHISTMEM. + if solver.lower() == 'lincoa': + unit_memo = (num_vars + 2) * np.dtype(float).itemsize + elif solver.lower() == 'cobyla': + unit_memo = (num_constraints + num_vars + 2) * np.dtype(float).itemsize + else: + unit_memo = 1 + # We cannot simply set MAXFILT = MIN(MAXFILT, MAXHISTMEM/...), as they may not have + # the same kind, and compilers may complain. We may convert them, but overflow may occur. + if maxfilt > MAXHISTMEM / unit_memo: + maxfilt = int(MAXHISTMEM / unit_memo) # Integer division. + maxfilt = min(maxfun, max(MIN_MAXFILT, maxfilt)) + if is_constrained: + if maxfilt_in < 1: + warn(f'{solver}: Invalid MAXFILT; it should be a positive integer; it is set to {maxfilt}') + elif maxfilt_in < min(maxfun, MIN_MAXFILT): + warn(f'{solver}: MAXFILT is too small; it is set to {maxfilt}') + elif maxfilt < min(maxfilt_in, maxfun): + warn(f'{solver}: MAXFILT is set to {maxfilt} due to memory limit') + + # Validate ETA1 and ETA2 + eta1_local = eta1 if present(eta1) else ETA1_DEFAULT + eta2_local = eta2 if present(eta2) else ETA2_DEFAULT + + # When the difference between ETA1 and ETA2 is tiny, we force them to equal. + # See the explanation around RHOBEG and RHOEND for the reason. + if present(eta1) and present(eta2): + if np.abs(eta1 - eta2) < 1.0E2 * EPS * max(np.abs(eta1), 1): + eta2 = eta1 + + if present(eta1): + if np.isnan(eta1): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + eta1 = ETA1_DEFAULT + elif eta1 < 0 or eta1 >= 1: + # Take ETA2 into account if it has a valid value. + if present(eta2) and eta2_local > 0 and eta2_local <= 1: + eta1 = max(EPS, eta2 / 7.0) + else: + eta1 = ETA1_DEFAULT + warn(f'{solver}: Invalid ETA1; it should be in the interval [0, 1) and not more than ETA2; it is set to {eta1}') + + if present(eta2): + if np.isnan(eta2): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + eta2 = ETA2_DEFAULT + elif present(eta1) and (eta2 < eta1_local or eta2 > 1): + eta2 = (eta1 + 2) / 3.0 + warn(f'{solver}: Invalid ETA2; it should be in the interval [0, 1) and not less than ETA1; it is set to {eta2}') + + # Validate GAMMA1 and GAMMA2 + if present(gamma1): + if np.isnan(gamma1): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + gamma1 = GAMMA1_DEFAULT + elif gamma1 <= 0 or gamma1 >= 1: + gamma1 = GAMMA1_DEFAULT + warn(f'{solver}: Invalid GAMMA1; it should in the interval (0, 1); it is set to {gamma1}') + + if present(gamma2): + if np.isnan(gamma2): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + gamma2 = GAMMA2_DEFAULT + elif gamma2 < 1 or np.isinf(gamma2): + gamma2 = GAMMA2_DEFAULT + warn(f'{solver}: Invalid GAMMA2; it should be a real number not less than 1; it is set to {gamma2}') + + # Validate RHOBEG and RHOEND + + if np.abs(rhobeg - rhoend) < 1.0e2 * EPS * np.maximum(np.abs(rhobeg), 1): + # When the data is passed from the interfaces (e.g., MEX) to the Fortran code, RHOBEG, and RHOEND + # may change a bit. It was observed in a MATLAB test that MEX passed 1 to Fortran as + # 0.99999999999999978. Therefore, if we set RHOEND = RHOBEG in the interfaces, then it may happen + # that RHOEND > RHOBEG, which is considered as an invalid input. To avoid this situation, we + # force RHOBEG and RHOEND to equal when the difference is tiny. + rhoend = rhobeg + + # Revise the default values for RHOBEG/RHOEND according to the solver. + if solver.lower() == 'bobyqa': + rhobeg_default = np.maximum(EPS, np.min(RHOBEG_DEFAULT, np.min(xu - xl) / 4.0)) + rhoend_default = np.maximum(EPS, np.min(0.1 * rhobeg_default, RHOEND_DEFAULT)) + else: + rhobeg_default = RHOBEG_DEFAULT + rhoend_default = RHOEND_DEFAULT + + if solver.lower() == 'bobyqa': + # Do NOT merge the IF below into the ELIF above! Otherwise, XU and XL may be accessed even if + # the solver is not BOBYQA, because the logical evaluation is not short-circuit. + if rhobeg > np.min(xu - xl) / 2: + # Do NOT make this revision if RHOBEG not positive or not finite, because otherwise RHOBEG + # will get a huge value when XU or XL contains huge values that indicate unbounded variables. + rhobeg = np.min(xu - xl) / 4.0 # Here, we do not take RHOBEG_DEFAULT. + warn(f'{solver}: Invalid RHOBEG; {solver} requires 0 < RHOBEG <= np.min(XU-XL)/2; it is set to np.min(XU-XL)/4') + if rhobeg <= 0 or np.isnan(rhobeg) or np.isinf(rhobeg): + # Take RHOEND into account if it has a valid value. We do not do this if the solver is BOBYQA, + # which requires that RHOBEG <= (XU-XL)/2. + if np.isfinite(rhoend) and rhoend > 0 and solver.lower() != 'bobyqa': + rhobeg = max(10 * rhoend, rhobeg_default) + else: + rhobeg = rhobeg_default + warn(f'{solver}: Invalid RHOBEG; it should be a positive number; it is set to {rhobeg}') + + if rhoend <= 0 or rhobeg < rhoend or np.isnan(rhoend) or np.isinf(rhoend): + rhoend = max(EPS, min(0.1 * rhobeg, rhoend_default)) + warn(f'{solver}: Invalid RHOEND; it should be a positive number and RHOEND <= RHOBEG; it is set to {rhoend}') + + # For BOBYQA, revise X0 or RHOBEG so that the distance between X0 and the inactive bounds is at + # least RHOBEG. If HONOUR_X0 == TRUE, revise RHOBEG if needed; otherwise, revise HONOUR_X0 if needed. + if present(honour_x0): + if honour_x0: + rhobeg_old = rhobeg; + lbx = np.isfinite(xl) & (x0 - xl <= EPS * np.maximum(1, np.abs(xl))) # X0 essentially equals XL + ubx = np.isfinite(xu) & (x0 - xu >= -EPS * np.maximum(1, np.abs(xu))) # X0 essentially equals XU + x0[lbx] = xl[lbx] + x0[ubx] = xu[ubx] + rhobeg = max(EPS, np.min([rhobeg, x0[~lbx] - xl[~lbx], xu[~ubx] - x0[~ubx]])) + if rhobeg_old - rhobeg > EPS * max(1, rhobeg_old): + rhoend = max(EPS, min(0.1 * rhobeg, rhoend)) # We do not revise RHOEND unless RHOBEG is truly revised. + if has_rhobeg: + warn(f'{solver}: RHOBEG is revised to {rhobeg} and RHOEND to at most 0.1*RHOBEG so that the distance between X0 and the inactive bounds is at least RHOBEG') + else: + rhoend = np.minimum(rhoend, rhobeg) # This may update RHOEND slightly. + else: + x0_old = x0 # Recorded to see whether X0 is really revised. + # N.B.: The following revision is valid only if XL <= X0 <= XU and RHOBEG <= MINVAL(XU-XL)/2, + # which should hold at this point due to the revision of RHOBEG and moderation of X0. + # The cases below are mutually exclusive in precise arithmetic as MINVAL(XU-XL) >= 2*RHOBEG. + lbx = x0 <= xl + 0.5 * rhobeg + lbx_plus = (x0 > xl + 0.5 * rhobeg) & (x0 < xl + rhobeg) + ubx = x0 >= xu - 0.5 * rhobeg + ubx_minus = (x0 < xu - 0.5 * rhobeg) & (x0 > xu - rhobeg) + x0[lbx] = xl[lbx] + x0[lbx_plus] = xl[lbx_plus] + rhobeg + x0[ubx] = xu[ubx] + x0[ubx_minus] = xu[ubx_minus] - rhobeg + + if (any(np.abs(x0_old - x0) > 0)): + warn(f'{solver}: X0 is revised so that the distance between X0 and the inactive bounds is at least RHOBEG set HONOUR_X0 to .TRUE. if you prefer to keep X0 unchanged') + + # Validate CTOL (it can be 0) + if (present(ctol)): + if (np.isnan(ctol) or ctol < 0): + ctol = CTOL_DEFAULT + if (is_constrained): + warn(f'{solver}: Invalid CTOL; it should be a nonnegative number; it is set to {ctol}') + + # Validate CWEIGHT (it can be +Inf) + if (present(cweight)): + if (np.isnan(cweight) or cweight < 0): + cweight = CWEIGHT_DEFAULT + if (is_constrained): + warn(f'{solver}: Invalid CWEIGHT; it should be a nonnegative number; it is set to {cweight}') + + #====================! + # Calculation ends ! + #====================! + + # Postconditions + if DEBUGGING: + assert abs(iprint) <= 3 + assert maxhist >= 0 and maxhist <= maxfun + if present(npt): + assert maxfun >= npt + 1 + assert npt >= 3 + if present(maxfilt): + assert maxfilt >= np.minimum(MIN_MAXFILT, maxfun) and maxfilt <= maxfun + if present(eta1) and present(eta2): + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + if present(gamma1) and present(gamma2): + assert gamma1 > 0 and gamma1 < 1 and gamma2 > 1 + assert rhobeg >= rhoend and rhoend > 0 + if solver.lower() == 'bobyqa': + assert all(rhobeg <= (xu - xl) / 2) + assert all(np.isfinite(x0)) + assert all(x0 >= xl and (x0 <= xl or x0 >= xl + rhobeg)) + assert all(x0 <= xu and (x0 >= xu or x0 <= xu - rhobeg)) + if present(ctol): + assert ctol >= 0 + + return iprint, maxfun, maxhist, ftarget, rhobeg, rhoend, npt, maxfilt, ctol, cweight, eta1, eta2, gamma1, gamma2, x0 \ No newline at end of file diff --git a/python/src/prima/common/present.py b/python/src/prima/common/present.py new file mode 100644 index 0000000000..15f8b47b67 --- /dev/null +++ b/python/src/prima/common/present.py @@ -0,0 +1,5 @@ +def present(x): + ''' + This is a Python equivalent of the Fortran 'present' function for optional arguments. + ''' + return x is not None \ No newline at end of file diff --git a/python/src/prima/common/redrho.py b/python/src/prima/common/redrho.py new file mode 100644 index 0000000000..f24e747eb3 --- /dev/null +++ b/python/src/prima/common/redrho.py @@ -0,0 +1,34 @@ +import numpy as np + +def redrho(rho_in, rhoend): + ''' + This function calculates RHO when it needs to be reduced. + The sceme is shared by UOBYQA, NEWUOA, BOBYQA, LINCOA. For COBYLA, Powell's code reduces RHO by + 'RHO *= 0.5; if RHO <= 1.5 * RHOEND: RHO = RHOEND' as specified in (11) of the COBYLA + paper. However, this scheme seems to work better, especially after we introduce DELTA. + ''' + + # Preconditions + assert rho_in > rhoend > 0 + + #====================# + # Calculation starts # + #====================# + + rho_ratio = rho_in / rhoend + + if rho_ratio > 250: + rho = 0.1 * rho_in + elif rho_ratio <= 16: + rho = rhoend + else: + rho = np.sqrt(rho_ratio) * rhoend # rho = np.sqrt(rho * rhoend) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + assert rho_in > rho >= rhoend + + return rho \ No newline at end of file diff --git a/python/src/prima/examples/cobyla/cobyla_example.py b/python/src/prima/examples/cobyla/cobyla_example.py index fba02303a9..7e4d6d4282 100644 --- a/python/src/prima/examples/cobyla/cobyla_example.py +++ b/python/src/prima/examples/cobyla/cobyla_example.py @@ -52,19 +52,49 @@ def calcfc_hexagon(x): if __name__ == "__main__": n_chebyquad = 6 - x_chebyquad = np.array([i/(n_chebyquad+1) for i in range(1, n_chebyquad+1)]) - m = 0 - f = 0 - cstrv = np.inf - x, f = cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv) - print(x, f) - # evaluate - f, constr = calcfc_chebyquad(x_chebyquad) + + # The following lines illustrates how to call the solver to solve the Chebyquad problem. + x_chebyquad = np.array([i/(n_chebyquad+1) for i in range(1, n_chebyquad+1)]) # Starting point + m = 0 # Dimension of constraints. M must be specified correctly, or the program will crash! + result = cobyla(calcfc_chebyquad, m, x_chebyquad) # This call will not print anything + + # In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + # and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + # take their default values coded in the solver. + x_chebyquad = np.array([i/(n_chebyquad+1) for i in range(1, n_chebyquad+1)]) # Starting point + result = cobyla(calcfc_chebyquad, m, x_chebyquad, rhobeg=0.2 * x_chebyquad[0], iprint=1) + + # The following lines illustrates how to call the solver to solve the Hexagon problem. x_hexagon = np.zeros(9) + 2 - m_hexagon = 14 - f = 0 - cstrv = 0 - initial_f, initial_constr = calcfc_hexagon(x_hexagon) - print(initial_f, initial_constr) - x, f = cobyla(calcfc_hexagon, m_hexagon, x_hexagon, f, cstrv) - print(x, f) \ No newline at end of file + m_hexagon = 14 # Dimension of constraints. M must the specified correctly, or the program will crash! + result = cobyla(calcfc_hexagon, m_hexagon, x_hexagon) # This call will not print anything. + + # In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + # and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + # take their default values coded in the solver. Note that CONSTR is an output, which will be set to + # the value of CONSTR(X_HEXAGON) when the solver returns. + result = cobyla(calcfc_hexagon, m_hexagon, x_hexagon, rhobeg=1.0, rhoend=1.0e-3, iprint=1) + + ''' + ! The following lines illustrates how to call the solver to solve the Chebyquad problem. + x_chebyquad = [(real(i, RP) / real(n_chebyquad + 1, RP), i=1, n_chebyquad)] ! Starting point + m = 0 ! Dimension of constraints. M must the specified correctly, or the program will crash! + call cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv) ! This call will not print anything. + + ! In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + ! and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + ! take their default values coded in the solver. + x_chebyquad = [(real(i, RP) / real(n_chebyquad + 1, RP), i=1, n_chebyquad)] ! Starting point + call cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv, rhobeg=0.2_RP * x_chebyquad(1), iprint=1, nf=nf, info=info) + + ! The following lines illustrates how to call the solver to solve the Hexagon problem. + x_hexagon = 2.0_RP ! Starting point. + m = 14 ! + call cobyla(calcfc_hexagon, m, x_hexagon, f, cstrv) ! This call will not print anything. + + ! In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + ! and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + ! take their default values coded in the solver. Note that CONSTR is an output, which will be set to + ! the value of CONSTR(X_HEXAGON) when the solver returns. + x_hexagon = 2.0_RP ! Starting point. + call cobyla(calcfc_hexagon, m, x_hexagon, f, cstrv, constr=constr, rhobeg=1.0_RP, rhoend=1.0D-3, iprint=1, nf=nf, info=info)''' \ No newline at end of file From 0b6108ab84ae830cf63be914e10cbdaaeb856d22 Mon Sep 17 00:00:00 2001 From: Nickolai Belakovski Date: Tue, 26 Sep 2023 14:55:53 -0700 Subject: [PATCH 4/6] Added all pre and post condition statements --- python/src/prima/cobyla/cobylb.py | 181 +++++++++------- python/src/prima/cobyla/geometry.py | 266 +++++++++++++++++------- python/src/prima/cobyla/initialize.py | 119 +++++++---- python/src/prima/cobyla/trustregion.py | 165 ++++++++------- python/src/prima/cobyla/update.py | 137 +++++++++++- python/src/prima/common/checkbreak.py | 2 +- python/src/prima/common/consts.py | 10 +- python/src/prima/common/infos.py | 26 +++ python/src/prima/common/linalg.py | 33 ++- python/src/prima/common/message.py | 276 +++++++++++++++++++++++++ python/src/prima/common/output.py | 20 -- python/src/prima/common/ratio.py | 44 ++++ python/src/prima/common/redrho.py | 7 +- python/src/prima/common/selectx.py | 142 +++++++++++-- 14 files changed, 1106 insertions(+), 322 deletions(-) create mode 100644 python/src/prima/common/infos.py create mode 100644 python/src/prima/common/message.py delete mode 100644 python/src/prima/common/output.py create mode 100644 python/src/prima/common/ratio.py diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py index d37d028ee4..3cc656f313 100644 --- a/python/src/prima/cobyla/cobylb.py +++ b/python/src/prima/cobyla/cobylb.py @@ -1,52 +1,20 @@ import numpy as np from prima.common.checkbreak import checkbreak_con -from prima.common.consts import INFO_DEFAULT, REALMAX, EPS, MAXTR_REACHED, \ - DAMAGING_ROUNDING, SMALL_TR_RADIUS, DEBUGGING, MIN_MAXFILT +from prima.common.consts import REALMAX, EPS, DEBUGGING, MIN_MAXFILT +from prima.common.infos import INFO_DEFAULT, MAXTR_REACHED, DAMAGING_ROUNDING, \ + SMALL_TR_RADIUS from prima.common.evaluate import evaluate from prima.common.history import savehist +from prima.common.linalg import isinv +from prima.common.message import fmsg, retmsg, rhomsg +from prima.common.ratio import redrat from prima.common.redrho import redrho from prima.common.selectx import savefilt, selectx from prima.cobyla.update import updatepole, findpole, updatexfc from prima.cobyla.geometry import assess_geo, setdrop_tr, geostep, setdrop_geo -from prima.cobyla.trustregion import trstlp, redrat, trrad +from prima.cobyla.trustregion import trstlp, trrad from prima.cobyla.initialize import initxfc, initfilt -def fcratio(conmat, fval): - ''' - This function calculates the ratio between the "typical changre" of F and that of CONSTR. - See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio. - ''' - - # Preconditions - assert np.size(fval) >= 1 - assert np.size(conmat, 1) == np.size(fval) - - #====================# - # Calculation starts # - #====================# - - cmin = np.min(conmat, axis=1) - cmax = np.max(conmat, axis=1) - fmin = min(fval) - fmax = max(fval) - if any(cmin < 0.5 * cmax) and fmin < fmax: - denom = np.min(np.maximum(cmax, 0) - cmin, where=cmin < 0.5 * cmax, initial=np.inf) - # Powell mentioned the following alternative in section 4 of his COBYLA paper. According to a test - # on 20230610, it does not make much difference to the performance. - # denom = np.max(max(*cmax, 0) - cmin, mask=(cmin < 0.5 * cmax)) - r = (fmax - fmin) / denom - else: - r = 0 - - #==================# - # Calculation ends # - #==================# - - # Postconditions - assert r >= 0 - - return r - def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, gamma1, gamma2, rhobeg, rhoend, constr, f, x, @@ -62,6 +30,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, conhist = [] # Local variables + solver = 'COBYLA' # CPENMIN is the minimum of the penalty parameter CPEN for the L-infinity constraint violation in # the merit function. Note that CPENMIN = 0 in Powell's implementation, which allows CPEN to be 0. # Here, we take CPENMIN > 0 so that CPEN is always positive. This avoids the situation where PREREM @@ -114,7 +83,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # from the other vertices to SIM[:, NUM_VARS]. FVAL, CONMAT, and CVAL hold the function values, # constraint values, and constraint violations on the vertices in the order corresponding to SIM. evaluated, conmat, cval, sim, simi, fval, nf, subinfo = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, - srname="COBYLA") + xhist, fhist, chist, conhist, maxhist, srname="COBYLA") # Initialize the filter, including xfilt, ffilt, confiolt, cfilt, and nfilt. # N.B.: The filter is used only when selecting which iterate to return. It does not @@ -144,8 +113,23 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # TODO: Implement me # rangehist(nf, xhist, fhist, chist, conhist) # print a return message according to IPRINT. - # TODO: Implement me - # retmsg("COBYLA", info, iprint, nf, f, x, cstrv, constr) + retmsg(solver, info, iprint, nf, f, x, cstrv, constr) + # Postconditions + if DEBUGGING: + assert nf <= maxfun + assert np.size(x) == num_vars and not any(np.isnan(x)) + assert not (np.isnan(f) or np.isposinf(f)) + # assert np.size(xhist, 0) == n and np.size(xhist, 1) == maxxhist + # assert not any(np.isnan(xhist(:, 1:min(nf, maxxhist)))) + # The last calculated X can be Inf (finite + finite can be Inf numerically). + # assert np.size(fhist) == maxfhist + # assert not any(np.isnan(fhist(1:min(nf, maxfhist))) or np.isposinf(fhist(1:min(nf, maxfhist)))) + # assert np.size(conhist, 0) == m and np.size(conhist, 1) == maxconhist + # assert not any(np.isnan(conhist(:, 1:min(nf, maxconhist))) or np.isneginf(conhist(:, 1:min(nf, maxconhist)))) + # assert np.size(chist) == maxchist + # assert not any(chist(1:min(nf, maxchist)) < 0 or np.isnan(chist(1:min(nf, maxchist))) or np.isposinf(chist(1:min(nf, maxchist)))) + # nhist = minval([nf, maxfhist, maxchist]) + # assert not any(isbetter(fhist(1:nhist), chist(1:nhist), f, cstrv, ctol)) # Set some more initial values. @@ -159,7 +143,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # code simply initializes CPEN to 0. rho = rhobeg delta = rhobeg - cpen = max(cpenmin, min(1.0E3, fcratio(conmat, fval))) # Powell's code: CPEN = ZERO + cpen = np.maximum(cpenmin, np.minimum(1.0E3, fcratio(conmat, fval))) # Powell's code: CPEN = ZERO prerec = -REALMAX preref = -REALMAX prerem = -REALMAX @@ -177,21 +161,12 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # T. M. Ragonneau's thesis: "Model-Based Derivative-Free Optimization Methods and Software." # According to test on 20230613, for COBYLA, this Powellful updating scheme of DELTA works slightly # better than setting directly DELTA = max(NEW_DELTA, RHO). - gamma3 = max(1, min(0.75 * gamma2, 1.5)) + gamma3 = np.maximum(1, np.minimum(0.75 * gamma2, 1.5)) # MAXTR is the maximal number of trust region iterations. Each trust-region iteration takes 1 or 2 # function evaluations unless the trust-region step is short or the trust-region subproblem solver # fails but the geometry step is not invoked. Thus the following MAXTR is unlikely to be reached - # - # - # Normally each trust-region - # iteration takes 1 or 2 function evaluations except for the following cases: - # 1. The update of cpen alters the optimal vertex - # 2. The trust-region step is short or fails to reduce either the - # linearized objective or the linearized constraint violation but - # the geometry step is not invoked. - # The following maxtr is unlikely to be reached. - maxtr = max(maxfun, 2*maxfun) # max: precaution against overflow, which will make 2*MAXFUN < 0 + maxtr = np.maximum(maxfun, 2*maxfun) # max: precaution against overflow, which will make 2*MAXFUN < 0 info = MAXTR_REACHED # Begin the iterative procedure @@ -201,7 +176,6 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # step will be taken, corresponding to the "Branch (Delta)" in the COBYLA paper. # REDUCE_RHO - Will we reduce rho after the trust-region iteration? # COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously. - for tr in range(maxtr): # Increase the penalty parameter CPEN, if needed, so that PREREM = PREREF + CPEN * PREREC > 0. # This is the first (out of two) update of CPEN, where CPEN increases or remains the same. @@ -279,8 +253,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, nf += 1 # Print a message about the function/constraint evaluation accoring to iprint - # TODO: Implement me - # fmsg(solver, iprint, nf, f, x, cstrv, constr) + fmsg(solver, 'Trust region', iprint, nf, delta, f, x, cstrv, constr) # Save X, F, CONSTR, CSTRV into the history. savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) # Save X, F, CONSTR, CSTRV into the filter. @@ -441,8 +414,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, nf += 1 # Print a message about the function/constraint evaluation accoring to iprint - # TODO: Implement me - # fmsg(solver, iprint, nf, f, x, cstrv, constr) + fmsg(solver, 'Geometry', iprint, nf, delta, f, x, cstrv, constr) # Save X, F, CONSTR, CSTRV into the history. savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) # Save X, F, CONSTR, CSTRV into the filter. @@ -470,10 +442,9 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, rho = redrho(rho, rhoend) # THe second (out of two) updates of CPEN, where CPEN decreases or remains the same. # Powell's code: cpen = min(cpen, fcratio(fval, conmat)), which may set CPEN to 0. - cpen = max(cpenmin, min(cpen, fcratio(conmat, fval))) + cpen = np.maximum(cpenmin, np.minimum(cpen, fcratio(conmat, fval))) # Print a message about the reduction of rho according to iprint - # TODO: implement me! - #call rhomsg(solver, iprint, nf, fval(n + 1), rho, sim(:, n + 1), cval(n + 1), conmat(:, n + 1), cpen) + rhomsg(solver, iprint, nf, fval[num_vars], rho, sim[:, num_vars], cval[num_vars], conmat[:, num_vars], cpen) conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi) # Check whether to break due to damaging rounding detected in updatepole if subinfo == DAMAGING_ROUNDING: @@ -489,7 +460,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, x = sim[:, num_vars] + d f, constr, cstrv = evaluate(calcfc, x) nf += 1 - # TODO: retmsg + fmsg(solver, 'Trust region', iprint, nf, rho, f, x, cstrv, constr) savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) @@ -506,8 +477,7 @@ def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, # call rangehist(nf, xhist, fhist, chist, conhist) # Print a return message according to IPRINT. - # TODO: Implement me - #call retmsg(solver, info, iprint, nf, f, x, cstrv, constr) + retmsg(solver, info, iprint, nf, f, x, cstrv, constr) return x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info @@ -518,10 +488,34 @@ def getcpen(conmat, cpen, cval, delta, fval, rho, sim, simi): See the discussions around equation (9) of the COBYLA paper. ''' - num_constraints = conmat.shape[0] - num_vars = sim.shape[0] + # Intermediate variables + A = np.zeros((np.size(sim, 0), np.size(conmat, 0) + 1)) + itol = 1 - A = np.zeros((num_vars, num_constraints + 1)) + # Sizes + num_constraints = np.size(conmat, 0) + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert cpen > 0 + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + assert delta >= rho and rho > 0 + + #====================# + # Calculation starts # + #====================# # Initialize INFO, PREREF, and PREREC, which are needed in the postconditions info = INFO_DEFAULT @@ -572,4 +566,53 @@ def getcpen(conmat, cpen, cval, delta, fval, rho, sim, simi): if findpole(cpen, cval, fval) == num_vars: break - return cpen \ No newline at end of file + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert cpen >= cpen and cpen > 0 + assert preref + cpen * prerec > 0 or info == DAMAGING_ROUNDING or \ + not (prerec >= 0 and np.maximum(prerec, preref) > 0) or not np.isfinite(preref) + + return cpen + + +def fcratio(conmat, fval): + ''' + This function calculates the ratio between the "typical changre" of F and that of CONSTR. + See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio. + ''' + + # Preconditions + if DEBUGGING: + assert np.size(fval) >= 1 + assert np.size(conmat, 1) == np.size(fval) + + #====================# + # Calculation starts # + #====================# + + cmin = np.min(conmat, axis=1) + cmax = np.max(conmat, axis=1) + fmin = min(fval) + fmax = max(fval) + if any(cmin < 0.5 * cmax) and fmin < fmax: + denom = np.min(np.maximum(cmax, 0) - cmin, where=cmin < 0.5 * cmax, initial=np.inf) + # Powell mentioned the following alternative in section 4 of his COBYLA paper. According to a test + # on 20230610, it does not make much difference to the performance. + # denom = np.max(max(*cmax, 0) - cmin, mask=(cmin < 0.5 * cmax)) + r = (fmax - fmin) / denom + else: + r = 0 + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert r >= 0 + + return r \ No newline at end of file diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py index 9a6e0487d2..7526886e99 100644 --- a/python/src/prima/cobyla/geometry.py +++ b/python/src/prima/cobyla/geometry.py @@ -1,63 +1,57 @@ +from prima.common.consts import DEBUGGING +from prima.common.linalg import isinv import numpy as np -def setdrop_geo(delta, factor_alpha, factor_beta, sim, simi): + +def assess_geo(delta, factor_alpha, factor_beta, sim, simi): ''' - This function finds (the index) of a current interpolation point to be replaced with a - geometry-improving point. See (15)-(16) of the COBYLA paper. - N.B.: COBYLA never sets jdrop to NUM_VARS + This function checks if an interpolation set has acceptable geometry as (14) of the COBYLA paper ''' - num_vars = sim.shape[0] + # Local variables + itol = 0.1 - # Calculate the values of sigma and eta - # VSIG[j] for j = 0...NUM_VARS-1 is the Euclidean distance from vertex J to the opposite face of the simplex. - vsig = 1 / np.sqrt(np.sum(simi**2, axis=1)) - veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) + # Sizes + num_vars = np.size(sim, 0) - # Decide which vertex to drop from the simplex. It will be replaced with a new point to improve the - # acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. - if any(veta > factor_beta * delta): - jdrop = np.where(veta == max(veta[~np.isnan(veta)]))[0][0] - elif any(vsig < factor_alpha * delta): - jdrop = np.where(vsig == min(vsig[~np.isnan(vsig)]))[0][0] - else: - # We arrive here if vsig and veta are all nan, which can happen due to nan in sim and simi - # which should not happen unless there is a bug - jdrop = None + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + assert delta > 0 + assert factor_alpha > 0 and factor_alpha < 1 + assert factor_beta > 1 - # Zaikun 230202: What if we consider veta and vsig together? The following attempts do not work well. - # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)*np.sum(simi**2, axis=1)) # Condition number - # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)**2 * np.sum(simi**2, axis=1)) # Condition number times distance - return jdrop + #====================# + # Calculation starts # + #====================# -def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi): - ''' - This function calculates a geometry step so that the geometry of the interpolation set is improved - when SIM[: JDROP_GEO] is replaced with SIM[:NUM_VARS] + D. See (15)--(17) of the COBYLA paper. - ''' - num_constraints = conmat.shape[0] - num_vars = simi.shape[0] + # Calculate the values of sigma and eta + # veta[j] (0 <= j < num_vars) is the distance between the vertices j and 0 (the best vertex) + # of the simplex. + # vsig[j] (0 <= j < num_vars) is the distance from vertex j to its opposite face of the simplex. + # Thus vsig <= veta. + # N.B.: What about the distance from vertex N+1 to its opposite face? Consider the simplex + # {V_{N+1}, V_{N+1} + L*e_1,... v_{N+1} + L*e_N}, where V_{N+1} is vertex N+1, + # namely the current "best" point, [e_1, ..., e_n] is an orthogonal matrix, and L is a + # constant in the order of delta. This simplex is optimal in the sense that the interpolation + # system has the minimal condition number, i.e. 1. For this simplex, the distance from + # V_{N+1} to its opposite face is L/sqrt(n). + vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) # TODO: Check axis + veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) + adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta) - # SIMI[JDROP, :] is a vector perpendicular to the face of the simplex to the opposite of vertex - # JDROP. Thus VSIGJ * SIMI[JDROP, :] is the unit vector in this direction - vsigj = 1 / np.sqrt(np.sum(simi[jdrop, :]**2)) + #==================# + # Calculation ends # + #==================# - # Set D to the vector in the above-mentioned direction and with length FACTOR_GAMMA * DELTA. Since - # FACTOR_ALPHA < FACTOR_GAMMA < FACTOR_BETA, D improves the geometry of the simplex as per (14) of - # the COBYLA paper. This also explains why this subroutine does not replace DELTA with - # DELBAR = max(min(0.1 * np.sqrt(max(DISTSQ)), 0.5 * DELTA), RHO) as in NEWUOA. - d = factor_gamma * delta * (vsigj * simi[jdrop, :]) + return adequate_geo - # Calculate the coefficients of the linear approximations to the objective and constraint functions, - # placing minus the objective function gradient after the constraint gradients in the array A - A = np.zeros((num_vars, num_constraints + 1)) - A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T - A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi - cvmaxp = np.max(np.append(0, -d@A[:, :num_constraints] - conmat[:, num_vars])) - cvmaxn = np.max(np.append(0, d@A[:, :num_constraints] - conmat[:, num_vars])) - if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn): - d *= -1 - return d def setdrop_tr(ximproved, d, delta, rho, sim, simi): ''' @@ -70,7 +64,27 @@ def setdrop_tr(ximproved, d, delta, rho, sim, simi): Note that UPDATEXFC should be revised accordingly. ''' - num_vars = sim.shape[0] + # Local variables + itol = 0.1 + + # Sizes + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert np.size(d) == num_vars and all(np.isfinite(d)) + assert delta >= rho and rho > 0 + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + + #====================# + # Calculation starts # + #====================# # -------------------------------------------------------------------------------------------------- # # The following code is Powell's scheme for defining JDROP. @@ -121,7 +135,7 @@ def setdrop_tr(ximproved, d, delta, rho, sim, simi): # -------------------------------------------------------------------------------------------------- # # The following definition of JDROP is inspired by SETDROP_TR in UOBYQA/NEWUOA/BOBYQA/LINCOA. - # It is simpler and works better than Powell's scheme. Note that we allow JDROP to be NUM_VARS if + # It is simpler and works better than Powell's scheme. Note that we allow JDROP to be NUM_VARS+1 if # XIMPROVED is True, whereas Powell's code does not. # See also (4.1) of Scheinberg-Toint-2010: Self-Correcting Geometry in Model-Based Algorithms for # Derivative-Free Unconstrained Optimization, which refers to the strategy here as the "combined @@ -129,29 +143,28 @@ def setdrop_tr(ximproved, d, delta, rho, sim, simi): # DISTSQ[j] is the square of the distance from the jth vertex of the simplex to get "best" point so # far, taking the trial point SIM[:, NUM_VARS] + D into account. - distsq = np.zeros(sim.shape[1]) + distsq = np.zeros(np.size(sim, 1)) if ximproved: distsq[:num_vars] = np.sum((sim[:, :num_vars] - np.tile(d, (num_vars, 1)).T)**2, axis=0) - distsq[num_vars] = sum(d**2) + distsq[num_vars] = np.sum(d**2) else: distsq[:num_vars] = np.sum(sim[:, :num_vars]**2, axis=0) distsq[num_vars] = 0 - weight = np.maximum(np.ones(len(distsq)), distsq / max(rho, delta/10)**2) # Similar to Powell's NEWUOA code. + weight = np.maximum(1, distsq / np.maximum(rho, delta/10)**2) # Similar to Powell's NEWUOA code. # Other possible definitions of weight. They work almost the same as the one above. # weight = distsq # Similar to Powell's LINCOA code, but WRONG. See comments in LINCOA/geometry.f90. # weight = max(1, max(25 * distsq / delta**2)) # Similar to Powell's BOBYQA code, works well. # weight = max(1, max(10 * distsq / delta**2)) # weight = max(1, max(1e2 * distsq / delta**2)) - # weight = max(1, max(distsq / max(rho, delta/10)**2)) # weight = max(1, max(distsq / rho**2)) ! Similar to Powell's UOBYQA # If 0 <= j < NUM_VARS, SIMID[j] is the value of the jth Lagrange function at D; the value of the # (NUM_VARS+1)th Lagrange function is 1 - sum(SIMID). [SIMID, 1 - sum(SIMID)] is the counterpart of # VLAG in UOBYQA and DEN in NEWUOA/BOBYQA/LINCOA. simid = simi@d - score = weight * abs(np.array([*simid, 1 - sum(simid)])) + score = weight * abs(np.array([*simid, 1 - np.sum(simid)])) # If XIMPORVED = False (D does not render a better X), set SCORE[NUM_VARS] = -1 to avoid JDROP = NUM_VARS. if not ximproved: @@ -166,27 +179,138 @@ def setdrop_tr(ximproved, d, delta, rho, sim, simi): else: jdrop = None # We arrive here when XIMPROVED = False and no entry of score is positive. + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert jdrop is None or (jdrop >= 0 and jdrop < num_vars + 1) + assert jdrop <= num_vars or ximproved + assert jdrop >= 0 or not ximproved + # JDROP >= 1 when XIMPROVED = TRUE unless NaN occurs in DISTSQ, which should not happen if the + # starting point does not contain NaN and the trust-region/geometry steps never contain NaN. + return jdrop -def assess_geo(delta, factor_alpha, factor_beta, sim, simi): + +def setdrop_geo(delta, factor_alpha, factor_beta, sim, simi): ''' - This function checks if an interpolation set has acceptable geometry as (14) of the COBYLA paper + This function finds (the index) of a current interpolation point to be replaced with a + geometry-improving point. See (15)-(16) of the COBYLA paper. + N.B.: COBYLA never sets jdrop to NUM_VARS ''' - num_vars = sim.shape[0] + # Local variables + itol = 0.1 + + # Sizes + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + assert factor_alpha > 0 and factor_alpha < 1 + assert factor_beta > 1 + assert not assess_geo(delta, factor_alpha, factor_beta, sim, simi) + + #====================# + # Calculation starts # + #====================# # Calculate the values of sigma and eta - # veta[j] (0 <= j < num_vars) is the distance between the vertices j and 0 (the best vertex) - # of the simplex. - # vsig[j] (0 <= j < num_vars) is the distance from vertex j to its opposite face of the simplex. - # Thus vsig <= veta. - # N.B.: What about the distance from vertex N+1 to its opposite face? Consider the simplex - # {V_{N+1}, V_{N+1} + L*e_1,... v_{N+1} + L*e_N}, where V_{N+1} is vertex N+1, - # namely the current "best" point, [e_1, ..., e_n] is an orthogonal matrix, and L is a - # constant in the order of delta. This simplex is optimal in the sense that the interpolation - # system has the minimal condition number, i.e. 1. For this simplex, the distance from - # V_{N+1} to its opposite face is L/sqrt(n). - vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) # TODO: Check axis + # VSIG[j] for j = 0...NUM_VARS-1 is the Euclidean distance from vertex J to the opposite face of the simplex. + vsig = 1 / np.sqrt(np.sum(simi**2, axis=1)) veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) - adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta) - return adequate_geo \ No newline at end of file + + # Decide which vertex to drop from the simplex. It will be replaced with a new point to improve the + # acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. + if any(veta > factor_beta * delta): + jdrop = np.where(veta == max(veta[~np.isnan(veta)]))[0][0] + elif any(vsig < factor_alpha * delta): + jdrop = np.where(vsig == min(vsig[~np.isnan(vsig)]))[0][0] + else: + # We arrive here if vsig and veta are all nan, which can happen due to nan in sim and simi + # which should not happen unless there is a bug + jdrop = None + + # Zaikun 230202: What if we consider veta and vsig together? The following attempts do not work well. + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)*np.sum(simi**2, axis=1)) # Condition number + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)**2 * np.sum(simi**2, axis=1)) # Condition number times distance + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert jdrop >= 0 and jdrop < num_vars + return jdrop + + +def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi): + ''' + This function calculates a geometry step so that the geometry of the interpolation set is improved + when SIM[: JDROP_GEO] is replaced with SIM[:NUM_VARS] + D. See (15)--(17) of the COBYLA paper. + ''' + + # Sizes + num_constraints = np.size(conmat, 0) + num_vars = np.size(simi, 0) + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert delta > 0 + assert cpen > 0 + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert jdrop >= 0 and jdrop < num_vars + assert factor_gamma > 0 and factor_gamma < 1 + + #====================# + # Calculation starts # + #====================# + + # SIMI[JDROP, :] is a vector perpendicular to the face of the simplex to the opposite of vertex + # JDROP. Thus VSIGJ * SIMI[JDROP, :] is the unit vector in this direction + vsigj = 1 / np.sqrt(np.sum(simi[jdrop, :]**2)) + + # Set D to the vector in the above-mentioned direction and with length FACTOR_GAMMA * DELTA. Since + # FACTOR_ALPHA < FACTOR_GAMMA < FACTOR_BETA, D improves the geometry of the simplex as per (14) of + # the COBYLA paper. This also explains why this subroutine does not replace DELTA with + # DELBAR = max(min(0.1 * np.sqrt(max(DISTSQ)), 0.5 * DELTA), RHO) as in NEWUOA. + d = factor_gamma * delta * (vsigj * simi[jdrop, :]) + + # Calculate the coefficients of the linear approximations to the objective and constraint functions, + # placing minus the objective function gradient after the constraint gradients in the array A + A = np.zeros((num_vars, num_constraints + 1)) + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + cvmaxp = np.max(np.append(0, -d@A[:, :num_constraints] - conmat[:, num_vars])) + cvmaxn = np.max(np.append(0, d@A[:, :num_constraints] - conmat[:, num_vars])) + if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn): + d *= -1 + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert np.size(d) == num_vars and all(np.isfinite(d)) + # In theory, ||S|| == FACTOR_GAMMA*DELTA, which may be false due to rounding, but not too far. + # It is crucial to ensure that the geometry step is nonzero, which holds in theory. + assert np.linalg.norm(d) > 0.9 * factor_gamma * delta and np.linalg.norm(d) <= 1.1 * factor_gamma * delta + return d \ No newline at end of file diff --git a/python/src/prima/cobyla/initialize.py b/python/src/prima/cobyla/initialize.py index c9cf1f9804..75b7646381 100644 --- a/python/src/prima/cobyla/initialize.py +++ b/python/src/prima/cobyla/initialize.py @@ -1,31 +1,38 @@ from prima.common.checkbreak import checkbreak_con -from prima.common.consts import INFO_DEFAULT, REALMAX +from prima.common.consts import DEBUGGING, REALMAX +from prima.common.infos import INFO_DEFAULT from prima.common.evaluate import evaluate, moderatec, moderatef +from prima.common.history import savehist +from prima.common.message import fmsg from prima.common.selectx import savefilt import numpy as np -def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, srname): +def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, xhist, fhist, chist, conhist, maxhist, srname): ''' This subroutine does the initialization concerning X, function values, and constraints. ''' + # Local variables + solver = 'COBYLA' + # Sizes num_constraints = np.size(constr0) num_vars = np.size(x0) # Preconditions - assert num_constraints >= 0, f'M >= 0 {srname}' - assert num_vars >= 1, f'N >= 1 {srname}' - assert abs(iprint) <= 3, f'IPRINT is 0, 1, -1, 2, -2, 3, or -3 {srname}' - # assert conmat.shape == (num_constraints , num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' - # assert cval.size == num_vars + 1, f'CVAL.size == N+1 {srname}' - # assert maxchist * (maxchist - maxhist) == 0, f'CHIST.shape == 0 or MAXHIST {srname}' - # assert conhist.shape[0] == num_constraints and maxconhist * (maxconhist - maxhist) == 0, 'CONHIST.shape[0] == num_constraints, SIZE(CONHIST, 2) == 0 or MAXHIST {srname)}' - # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' - # assert xhist.shape[0] == num_vars and maxxhist * (maxxhist - maxhist) == 0, 'XHIST.shape[0] == N, SIZE(XHIST, 2) == 0 or MAXHIST {srname)}' - assert all(np.isfinite(x0)), f'X0 is finite {srname}' - assert rhobeg > 0, f'RHOBEG > 0 {srname}' + if DEBUGGING: + assert num_constraints >= 0, f'M >= 0 {srname}' + assert num_vars >= 1, f'N >= 1 {srname}' + assert abs(iprint) <= 3, f'IPRINT is 0, 1, -1, 2, -2, 3, or -3 {srname}' + # assert conmat.shape == (num_constraints , num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + # assert cval.size == num_vars + 1, f'CVAL.size == N+1 {srname}' + # assert maxchist * (maxchist - maxhist) == 0, f'CHIST.shape == 0 or MAXHIST {srname}' + # assert conhist.shape[0] == num_constraints and maxconhist * (maxconhist - maxhist) == 0, 'CONHIST.shape[0] == num_constraints, SIZE(CONHIST, 2) == 0 or MAXHIST {srname)}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + # assert xhist.shape[0] == num_vars and maxxhist * (maxxhist - maxhist) == 0, 'XHIST.shape[0] == N, SIZE(XHIST, 2) == 0 or MAXHIST {srname)}' + assert all(np.isfinite(x0)), f'X0 is finite {srname}' + assert rhobeg > 0, f'RHOBEG > 0 {srname}' #====================# # Calculation starts # @@ -62,12 +69,10 @@ def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, srna f, constr, cstrv = evaluate(calcfc, x) # Print a message about the function/constraint evaluation according to IPRINT. - # TODO: Finish implementing fmsg, if decided that its worth it - # fmsg(solver, iprint, k, f, x, cstrv, constr) + fmsg(solver, 'Initialization', iprint, k, rhobeg, f, x, cstrv, constr) # Save X, F, CONSTR, CSTRV into the history. - # TODO: Implement history (maybe we need it for the iterations?) - # savehist(k, x, xhist, f, fhist, cstrv, chist, constr, conhist) + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) # Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively. evaluated[j] = True @@ -101,23 +106,24 @@ def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, srna #==================# # Postconditions - assert nf <= maxfun, f'NF <= MAXFUN {srname}' - assert evaluated.size == num_vars + 1, f'EVALUATED.size == Num_vars + 1 {srname}' - # assert chist.size == maxchist, f'CHIST.size == MAXCHIST {srname}' - # assert conhist.shape== (num_constraints, maxconhist), f'CONHIST.shape == [M, MAXCONHIST] {srname}' - assert conmat.shape == (num_constraints, num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' - assert not (np.isnan(conmat).any() or np.isneginf(conmat).any()), f'CONMAT does not contain NaN/-Inf {srname}' - assert cval.size == num_vars + 1 and not (any(cval < 0) or any(np.isnan(cval)) or any(np.isposinf(cval))), f'CVAL.shape == Num_vars+1 and CVAL does not contain negative values or NaN/+Inf {srname}' - # assert fhist.shape == maxfhist, f'FHIST.shape == MAXFHIST {srname}' - # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' - assert fval.size == num_vars + 1 and not (any(np.isnan(fval)) or any(np.isposinf(fval))), f'FVAL.shape == Num_vars+1 and FVAL is not NaN/+Inf {srname}' - # assert xhist.shape == (num_vars, maxxhist), f'XHIST.shape == [N, MAXXHIST] {srname}' - assert sim.shape == (num_vars, num_vars + 1), f'SIM.shape == [N, N+1] {srname}' - assert np.isfinite(sim).all(), f'SIM is finite {srname}' - assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0), f'SIM(:, 1:N) has no zero column {srname}' - assert simi.shape == (num_vars, num_vars), f'SIMI.shape == [N, N] {srname}' - assert np.isfinite(simi).all(), f'SIMI is finite {srname}' - assert np.allclose(sim[:, :num_vars] @ simi, np.eye(num_vars), rtol=0.1, atol=0.1) or not all(evaluated), f'SIMI = SIM(:, 1:N)^{-1} {srname}' + if DEBUGGING: + assert nf <= maxfun, f'NF <= MAXFUN {srname}' + assert evaluated.size == num_vars + 1, f'EVALUATED.size == Num_vars + 1 {srname}' + # assert chist.size == maxchist, f'CHIST.size == MAXCHIST {srname}' + # assert conhist.shape== (num_constraints, maxconhist), f'CONHIST.shape == [M, MAXCONHIST] {srname}' + assert conmat.shape == (num_constraints, num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + assert not (np.isnan(conmat).any() or np.isneginf(conmat).any()), f'CONMAT does not contain NaN/-Inf {srname}' + assert cval.size == num_vars + 1 and not (any(cval < 0) or any(np.isnan(cval)) or any(np.isposinf(cval))), f'CVAL.shape == Num_vars+1 and CVAL does not contain negative values or NaN/+Inf {srname}' + # assert fhist.shape == maxfhist, f'FHIST.shape == MAXFHIST {srname}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + assert fval.size == num_vars + 1 and not (any(np.isnan(fval)) or any(np.isposinf(fval))), f'FVAL.shape == Num_vars+1 and FVAL is not NaN/+Inf {srname}' + # assert xhist.shape == (num_vars, maxxhist), f'XHIST.shape == [N, MAXXHIST] {srname}' + assert sim.shape == (num_vars, num_vars + 1), f'SIM.shape == [N, N+1] {srname}' + assert np.isfinite(sim).all(), f'SIM is finite {srname}' + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0), f'SIM(:, 1:N) has no zero column {srname}' + assert simi.shape == (num_vars, num_vars), f'SIMI.shape == [N, N] {srname}' + assert np.isfinite(simi).all(), f'SIMI is finite {srname}' + assert np.allclose(sim[:, :num_vars] @ simi, np.eye(num_vars), rtol=0.1, atol=0.1) or not all(evaluated), f'SIMI = SIM(:, 1:N)^{-1} {srname}' return evaluated, conmat, cval, sim, simi, fval, nf, info @@ -139,15 +145,26 @@ def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, maxfilt = len(ffilt) # Precondictions - assert num_constraints >= 0 - assert num_vars >= 1 - assert maxfilt >= 1 - assert confilt.shape == (num_constraints, maxfilt) - assert cfilt.shape == (maxfilt,) - assert xfilt.shape == (num_vars, maxfilt) - assert ffilt.shape == (maxfilt,) - assert conmat.shape == (num_constraints, num_vars+1) - # TODO: Need to finish these preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert maxfilt >= 1 + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert np.size(cfilt) == maxfilt + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert np.size(ffilt) == maxfilt + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(evaluated) == num_vars + 1 + + #====================# + # Calculation starts # + #====================# nfilt = 0 @@ -159,5 +176,21 @@ def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, x = sim[:, i] # i == num_vars, i.e. the last column nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt) + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert nfilt <= maxfilt + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any() + assert np.size(cfilt) == maxfilt + assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt])) + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert not (np.isnan(xfilt[:, :nfilt])).any() + # The last calculated X can be Inf (finite + finite can be Inf numerically). + assert np.size(ffilt) == maxfilt + assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt])) return nfilt \ No newline at end of file diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py index a971c478b6..5f37035507 100644 --- a/python/src/prima/cobyla/trustregion.py +++ b/python/src/prima/cobyla/trustregion.py @@ -10,61 +10,10 @@ import numpy as np import numpy.typing as npt -from prima.common.consts import REALMIN, REALMAX, EPS +from prima.common.consts import DEBUGGING, REALMIN, REALMAX, EPS from prima.common.powalg import qradd_Rdiag, qrexc_Rdiag from prima.common.linalg import isminor -def trrad(delta_in, dnorm, eta1, eta2, gamma1, gamma2, ratio): - ''' - This function updates the trust region radius according to RATIO and DNORM. - ''' - - if ratio <= eta1: - delta = gamma1 * dnorm # Powell's UOBYQA/NEWOUA - # delta = gamma1 * delta_in # Powell's COBYLA/LINCOA - # delta = min(gamma1 * delta_in, dnorm) # Powell's BOBYQA - elif ratio <= eta2: - delta = max(gamma1 * delta_in, dnorm) # Powell's UOBYQA/NEWOUA/BOBYQA/LINCOA - else: - delta = max(gamma1 * delta_in, gamma2 * dnorm) # Powell's NEWUOA/BOBYQA - # delta = max(delta_in, gamma2 * dnorm) # Modified version. Works well for UOBYQA - # For noise-free CUTEst problems of <= 100 variables, Powell's version works slightly better - # than the modified one. - # delta = max(delta_in, 1.25*dnorm, dnorm + rho) # Powell's UOBYQA - # delta = min(max(gamma1 * delta_in, gamma2 * dnorm), gamma3 * delta_in) # Powell's LINCOA, gamma3 = np.sqrt(2) - - # For noisy problems, the following may work better. - # if ratio <= eta1: - # delta = gamma1 * dnorm - # elseif ratio <= eta2: # Ensure DELTA >= DELTA_IN - # delta = delta_in - # else: # Ensure DELTA > DELTA_IN with a constant factor - # delta = max(delta_in * (1 + gamma2) / 2, gamma2 * dnorm) - return delta - -def redrat(ared, pred, rshrink): - # This function evaluates the reduction ratio of a trust-region step, handling inf/nan properly. - if np.isnan(ared): - # This should not happen in unconstrained problems due to the moderated extreme barrier. - ratio = -REALMAX - elif np.isnan(pred) or pred <= 0: - # The trust-region subproblem solver fails in this rare case. Instead of terminating as Powell's - # original code does, we set ratio as follows so that the solver may continue to progress. - if ared > 0: - # The trial point will be accepted, but the trust-region radius will be shrunk if rshrink>0 - ratio = rshrink/2 - else: - # Set the ration to a large negative number to signify a bad trust-region step, so that the - # solver will check whether to take a geometry step or reduce rho. - ratio = -REALMAX - elif np.isposinf(pred) and np.isposinf(ared): - ratio = 1 # ared/pred = NaN if calculated directly - elif np.isposinf(pred) and np.isneginf(ared): - ratio = -REALMAX # ared/pred = NaN if calculated directly - else: - ratio = ared/pred - return ratio - def trstlp(A_in, b_in, delta): ''' @@ -168,28 +117,29 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): 5. step. step <= cviol in stage 1. ''' # zdasav = np.zeros(z.shape[1]) - vmultd = np.zeros(len(vmultc)) - zdota = np.zeros(z.shape[1]) + vmultd = np.zeros(np.size(vmultc)) + zdota = np.zeros(np.size(z, 1)) # Sizes - num_vars = A.shape[0] - mcon = A.shape[1] + num_vars = np.size(A, 0) + mcon = np.size(A, 1) - # Precondictions - assert num_vars >= 1 - assert stage == 1 or stage == 2 - assert (mcon >= 0 and stage == 1) or (mcon >= 1 and stage == 2) - assert b.shape[0] == mcon - assert iact.shape[0] == mcon - assert vmultc.shape[0] == mcon - assert d.shape[0] == num_vars - assert z.shape[0] == num_vars and z.shape[1] == num_vars - assert delta > 0 - if stage == 2: - assert all(np.isfinite(d)) and np.linalg.norm(d) <= 2 * delta - assert nact >= 0 and nact <= min(mcon, num_vars) - assert all(vmultc[:mcon]) >= 0 - # N.B.: Stage 1 defines only VMULTC(1:M); VMULTC(M+1) is undefined! + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert stage == 1 or stage == 2 + assert (mcon >= 0 and stage == 1) or (mcon >= 1 and stage == 2) + assert np.size(b) == mcon + assert np.size(iact) == mcon + assert np.size(vmultc) == mcon + assert np.size(d) == num_vars + assert np.size(z, 0) == num_vars and np.size(z, 1) == num_vars + assert delta > 0 + if stage == 2: + assert all(np.isfinite(d)) and np.linalg.norm(d) <= 2 * delta + assert nact >= 0 and nact <= np.minimum(mcon, num_vars) + assert all(vmultc[:mcon]) >= 0 + # N.B.: Stage 1 defines only VMULTC(1:M); VMULTC(M+1) is undefined! # Initialize according to stage @@ -236,8 +186,10 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): # problems: DANWOODLS, GAUSS1LS, GAUSS2LS, GAUSS3LS, KOEBHELB, TAX13322, TAXR13322. Indeed, in all # these cases, Inf/NaN appear in d due to extremely large values in A (up to 10^219). To resolve # this, we set the maximal number of iterations to maxiter, and terminate if Inf/NaN occurs in d. - maxiter = min(10000, 100*max(num_constraints, num_vars)) + maxiter = np.minimum(10000, 100*np.maximum(num_constraints, num_vars)) for iter in range(maxiter): + if DEBUGGING: + assert all(vmultc >= 0) if stage == 1: optnew = cviol else: @@ -252,11 +204,10 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): nfail = 0 else: nfail += 1 - optold = min(optold, optnew) + optold = np.minimum(optold, optnew) if nfail == 3: break - # TODO: nact needs to be changed to nact-1 in a lot of places, essentially everywhere it acts as an index # If icon exceeds nact, then we add the constraint with index iact[icon] to the active set. # Apply Givens rotations so that the last num_vars-nact-1 columns of Z are orthogonal to the gradient # of the new constraint, a scalar product being set to 0 if its nonzero value could be due to @@ -483,9 +434,71 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): # In Powell's code, the condition is icon == 0. Indeed, icon < 0 cannot hold unless # fracmult contains only nan, which should not happen; icon >= mcon should never occur. break + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert np.size(iact) == mcon + assert np.size(vmultc) == mcon + assert all(vmultc >= 0) + assert np.size(d) == num_vars + assert all(np.isfinite(d)) + assert np.linalg.norm(d) <= 2 * delta + assert np.size(z, 0) == num_vars and np.size(z, 1) == num_vars + assert nact >= 0 and nact <= np.minimum(mcon, num_vars) + return iact, nact, d, vmultc, z - - +def trrad(delta_in, dnorm, eta1, eta2, gamma1, gamma2, ratio): + ''' + This function updates the trust region radius according to RATIO and DNORM. + ''' + + # Preconditions + if DEBUGGING: + assert delta_in >= dnorm and dnorm > 0 + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + assert gamma1 > 0 and gamma1 < 1 and gamma2 > 1 + # By the definition of RATIO in ratio.f90, RATIO cannot be NaN unless the actual reduction is + # NaN, which should NOT happen due to the moderated extreme barrier. + assert not np.isnan(ratio) + + #====================# + # Calculation starts # + #====================# + if ratio <= eta1: + delta = gamma1 * dnorm # Powell's UOBYQA/NEWOUA + # delta = gamma1 * delta_in # Powell's COBYLA/LINCOA + # delta = min(gamma1 * delta_in, dnorm) # Powell's BOBYQA + elif ratio <= eta2: + delta = max(gamma1 * delta_in, dnorm) # Powell's UOBYQA/NEWOUA/BOBYQA/LINCOA + else: + delta = max(gamma1 * delta_in, gamma2 * dnorm) # Powell's NEWUOA/BOBYQA + # delta = max(delta_in, gamma2 * dnorm) # Modified version. Works well for UOBYQA + # For noise-free CUTEst problems of <= 100 variables, Powell's version works slightly better + # than the modified one. + # delta = max(delta_in, 1.25*dnorm, dnorm + rho) # Powell's UOBYQA + # delta = min(max(gamma1 * delta_in, gamma2 * dnorm), gamma3 * delta_in) # Powell's LINCOA, gamma3 = np.sqrt(2) + + # For noisy problems, the following may work better. + # if ratio <= eta1: + # delta = gamma1 * dnorm + # elseif ratio <= eta2: # Ensure DELTA >= DELTA_IN + # delta = delta_in + # else: # Ensure DELTA > DELTA_IN with a constant factor + # delta = max(delta_in * (1 + gamma2) / 2, gamma2 * dnorm) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert delta > 0 + return delta diff --git a/python/src/prima/cobyla/update.py b/python/src/prima/cobyla/update.py index fda45db27e..b1cf0a1e68 100644 --- a/python/src/prima/cobyla/update.py +++ b/python/src/prima/cobyla/update.py @@ -1,4 +1,6 @@ -from prima.common.consts import DAMAGING_ROUNDING, INFO_DEFAULT +from prima.common.consts import DEBUGGING +from prima.common.infos import DAMAGING_ROUNDING, INFO_DEFAULT +from prima.common.linalg import isinv import numpy as np @@ -7,8 +9,37 @@ def updatexfc(jdrop, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi): This function revises the simplex by updating the elements of SIM, SIMI, FVAL, CONMAT, and CVAL ''' - num_constraints = len(constr) - num_vars = sim.shape[0] + # Local variables + itol = 1 + + # Sizes + num_constraints = np.size(constr) + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert jdrop >= 0 and jdrop <= num_vars + 1 + assert not any(np.isnan(constr) | np.isneginf(constr)) + assert not (np.isnan(cstrv) | np.isposinf(cstrv)) + assert np.size(d) == num_vars and all(np.isfinite(d)) + assert not (np.isnan(f) | np.isposinf(f)) + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + + #====================# + # Calculation starts # + #====================# + # Do nothing when JDROP is None. This can only happen after a trust-region step. if jdrop is None: # JDROP is None is impossible if the input is correct. @@ -51,26 +82,65 @@ def updatexfc(jdrop, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi): info = DAMAGING_ROUNDING sim = sim_old simi = simi_old - + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) or info == DAMAGING_ROUNDING return sim, simi, fval, conmat, cval, info def findpole(cpen, cval, fval): - # This subroutine identifies the best vertex of the current simplex with respect to the merit - # function PHI = F + CPEN * CSTRV. - n = len(fval) - 1 # used for debugging which I have not implemented yet + ''' + This subroutine identifies the best vertex of the current simplex with respect to the merit + function PHI = F + CPEN * CSTRV. + ''' + + # Size + num_vars = np.size(fval) - 1 + + # Preconditions + if DEBUGGING: + assert cpen > 0 + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + + #====================# + # Calculation starts # + #====================# # Identify the optimal vertex of the current simplex - jopt = len(fval) - 1 + jopt = np.size(fval) - 1 phi = fval + cpen * cval - phimin = min(phi) + phimin = np.min(phi) joptcandidate = np.argmin(phi) if phi[joptcandidate] < phi[jopt]: jopt = joptcandidate - if cpen <= 0 and any(np.logical_and(cval < cval[jopt], phi <= phimin)): + if cpen <= 0 and any(cval < cval[jopt] & phi <= phimin): # jopt is the index of the minimum value of cval # on the set of cval values where phi <= phimin - jopt = np.where(cval == min(cval[phi <= phimin]))[0][0] + jopt = np.where(cval == np.min(cval[phi <= phimin]))[0][0] + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert jopt >= 0 and jopt < num_vars + 1 + assert jopt == num_vars or phi[jopt] < phi[num_vars] or (phi[jopt] <= phi[num_vars] and cval[jopt] < cval[num_vars]) return jopt @@ -100,8 +170,34 @@ def updatepole(cpen, conmat, cval, fval, sim, simi): # need to call UPDATEPOLE after updating CPEN at the beginning of each trust-region iteration and # after each invocation of REDRHO. + # Local variables + itol = 1 + + # Sizes num_constraints = conmat.shape[0] num_vars = sim.shape[0] + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert cpen > 0 + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + + #====================# + # Calculation starts # + #====================# + + # INFO must be set, as it is an output. info = INFO_DEFAULT # Identify the optimal vertex of the current simplex. @@ -154,4 +250,23 @@ def updatepole(cpen, conmat, cval, fval, sim, simi): sim = sim_old simi = simi_old + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert findpole(cpen, cval, fval) == num_vars or info == DAMAGING_ROUNDING + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + # Do not check SIMI = SIM[:, :num_vars]^{-1}, as it may not be true due to damaging rounding. + assert isinv(sim[:, :num_vars], simi, itol) or info == DAMAGING_ROUNDING + return conmat, cval, fval, sim, simi, info \ No newline at end of file diff --git a/python/src/prima/common/checkbreak.py b/python/src/prima/common/checkbreak.py index b74d476ab5..1329ca5169 100644 --- a/python/src/prima/common/checkbreak.py +++ b/python/src/prima/common/checkbreak.py @@ -1,4 +1,4 @@ -from prima.common.consts import INFO_DEFAULT, NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED +from prima.common.infos import INFO_DEFAULT, NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED import numpy as np diff --git a/python/src/prima/common/consts.py b/python/src/prima/common/consts.py index 6e73999abd..3c8901e17c 100644 --- a/python/src/prima/common/consts.py +++ b/python/src/prima/common/consts.py @@ -10,14 +10,6 @@ FUNCMAX = 2.0**100 CONSTRMAX = FUNCMAX EPS = np.finfo(float).eps -INFO_DEFAULT = 0 -SMALL_TR_RADIUS = 0 # Why is this same as default? -MAXTR_REACHED = 20 -DAMAGING_ROUNDING = 7 -NAN_INF_X = -1 -NAN_INF_F = -2 -FTARGET_ACHIEVED = 1 -MAXFUN_REACHED = 3 # Some default values RHOBEG_DEFAULT = 1 @@ -41,4 +33,4 @@ # Maximal length of the filter used in constrained solvers. MIN_MAXFILT = 200 # Should be positive; < 200 is not recommended. -MAXFILT_DEFAULT = 10 * MIN_MAXFILT \ No newline at end of file +MAXFILT_DEFAULT = 10 * MIN_MAXFILT diff --git a/python/src/prima/common/infos.py b/python/src/prima/common/infos.py new file mode 100644 index 0000000000..f80f4d4925 --- /dev/null +++ b/python/src/prima/common/infos.py @@ -0,0 +1,26 @@ + +''' +This is a module defining exit flags. +''' + +INFO_DEFAULT = 0 +SMALL_TR_RADIUS = 0 +FTARGET_ACHIEVED = 1 +TRSUBP_FAILED = 2 +MAXFUN_REACHED = 3 +MAXTR_REACHED = 20 +NAN_INF_X = -1 +NAN_INF_F = -2 +NAN_INF_MODEL = -3 +NO_SPACE_BETWEEN_BOUNDS = 6 +DAMAGING_ROUNDING = 7 +ZERO_LINEAR_CONSTRAINT = 8 + +# Stop-codes. +# The following codes are used by ERROR STOP as stop-codes, which should be default integers. +INVALID_INPUT = 100 +ASSERTION_FAILS = 101 +VALIDATION_FAILS = 102 +MEMORY_ALLOCATION_FAILS = 103 + + diff --git a/python/src/prima/common/linalg.py b/python/src/prima/common/linalg.py index a9b8810588..2f1708a7b2 100644 --- a/python/src/prima/common/linalg.py +++ b/python/src/prima/common/linalg.py @@ -1,5 +1,6 @@ import numpy as np -from prima.common.consts import EPS, REALMAX, REALMIN +from prima.common.consts import DEBUGGING, EPS, REALMAX, REALMIN +from prima.common.present import present def planerot(x): ''' @@ -110,3 +111,33 @@ def isminor(x, ref): refa = abs(ref) + sensitivity * abs(x) refb = abs(ref) + 2 * sensitivity * abs(x) return np.logical_or(abs(ref) >= refa, refa >= refb) + + +def isinv(A, B, tol=None): + ''' + This procedure tests whether A = B^{-1} up to the tolerance TOL. + ''' + + # Sizes + n = np.size(A, 0) + + # Preconditions + if DEBUGGING: + assert np.size(A, 0) == np.size(A, 1) + assert np.size(B, 0) == np.size(B, 1) + assert np.size(A, 0) == np.size(B, 0) + if present(tol): + assert tol >= 0 + + #====================# + # Calculation starts # + #====================# + + tol = tol if present(tol) else np.minimum(1e-3, 1e2 * EPS * np.maximum(np.size(A, 0), np.size(A, 1))) + tol = np.max([tol, tol * np.max(abs(A)), tol * np.max(abs(B))]) + is_inv = ((abs(A@B) - np.eye(n)) <= tol).all() or ((abs(B@A - np.eye(n))) <= tol).all() + + #===================# + # Calculation ends # + #===================# + return is_inv diff --git a/python/src/prima/common/message.py b/python/src/prima/common/message.py new file mode 100644 index 0000000000..02cd8236b6 --- /dev/null +++ b/python/src/prima/common/message.py @@ -0,0 +1,276 @@ +from prima.common.consts import DEBUGGING +from prima.common.infos import FTARGET_ACHIEVED, MAXFUN_REACHED, MAXTR_REACHED, \ + SMALL_TR_RADIUS, TRSUBP_FAILED, NAN_INF_F, NAN_INF_X, NAN_INF_MODEL, DAMAGING_ROUNDING, \ + NO_SPACE_BETWEEN_BOUNDS, ZERO_LINEAR_CONSTRAINT +from prima.common.present import present +import numpy as np + +''' +# This module provides some functions that print messages to terminal/files. +# +# N.B.: +# 1. In case parallelism is desirable (especially during initialization), the functions may +# have to be modified or disabled due to the IO operations. +# 2. IPRINT indicates the level of verbosity, which increases with the absolute value of IPRINT. +# IPRINT = +/-3 can be expensive due to high IO operations. +''' + +spaces = ' ' + +def retmsg(solver, info, iprint, nf, f, x, cstrv=None, constr=None): + ''' + This function prints messages at return. + ''' + # Local variables + valid_exit_codes = [FTARGET_ACHIEVED, MAXFUN_REACHED, MAXTR_REACHED, + SMALL_TR_RADIUS, TRSUBP_FAILED, NAN_INF_F, NAN_INF_X, NAN_INF_MODEL, DAMAGING_ROUNDING, + NO_SPACE_BETWEEN_BOUNDS, ZERO_LINEAR_CONSTRAINT] + + # Preconditions + if DEBUGGING: + assert info in valid_exit_codes + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 1: # No printing (iprint == 0) + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver}_output.txt' + + # Decide whether the problem is truly constrained. + if present(constr): + is_constrained = (np.size(constr) > 0) + else: + is_constrained = present(cstrv) + + # Decide the constraint violation. + if present(cstrv): + cstrv_loc = cstrv + elif present(constr): + cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0. + else: + cstrv_loc = 0 + + # Decide the return message. + if info == FTARGET_ACHIEVED: + reason = 'the target function value is achieved.' + elif info == MAXFUN_REACHED: + reason = 'the objective function has been evaluated MAXFUN times.' + elif info == MAXTR_REACHED: + reason = 'the maximal number of trust region iterations has been reached.' + elif info == SMALL_TR_RADIUS: + reason = 'the trust region radius reaches its lower bound.' + elif info == TRSUBP_FAILED: + reason = 'a trust region step has failed to reduce the quadratic model.' + elif info == NAN_INF_X: + reason = 'NaN or Inf occurs in x.' + elif info == NAN_INF_F: + reason = 'the objective function returns NaN/+Inf.' + elif info == NAN_INF_MODEL: + reason = 'NaN or Inf occurs in the models.' + elif info == DAMAGING_ROUNDING: + reason = 'rounding errors are becoming damaging.' + elif info == NO_SPACE_BETWEEN_BOUNDS: + reason = 'there is no space between the lower and upper bounds of variable.' + elif info == ZERO_LINEAR_CONSTRAINT: + reason = 'one of the linear constraints has a zero gradient' + else: + reason = 'UNKNOWN EXIT FLAG' + ret_message = f'\nReturn from {solver} because {reason.strip()}' + + if np.size(x) <= 2: + x_message = f'\nThe corresponding X is: {x}' # Printed in one line + else: + x_message = f'\nThe corresponding X is:\n{x}' + + if is_constrained: + nf_message = (f'\nNumber of function values = {nf}{spaces}' + f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}') + else: + nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}' + + if is_constrained and present(constr): + if np.size(constr) <= 2: + constr_message = f'\nThe constraint value is: {constr}' # Printed in one line + else: + constr_message = f'\nThe constraint value is:\n{constr}' + else: + constr_message = '' + + # Print the message. + if abs(iprint) >= 2: + message = f'\n{ret_message}{nf_message}{x_message}{constr_message}\n' + else: + message = f'{ret_message}{nf_message}{x_message}{constr_message}\n' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + +def rhomsg(solver, iprint, nf, delta, f, rho, x, cstrv=None, constr=None, cpen=None): + ''' + This function prints messages when RHO is updated. + ''' + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 2: # No printing + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver.strip()}_output.txt' + + # Decide whether the problem is truly constrained. + if present(constr): + is_constrained = (np.size(constr) > 0) + else: + is_constrained = present(cstrv) + + # Decide the constraint violation. + if present(cstrv): + cstrv_loc = cstrv + elif present(constr): + cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0. + else: + cstrv_loc = 0 + + if present(cpen): + rho_message = (f'\nNew RHO = {rho}{spaces}Delta = {delta}{spaces}' + f'CPEN = {cpen}') + else: + rho_message = f'\nNew RHO = {rho}{spaces}Delta = {delta}' + + if np.size(x) <= 2: + x_message = f'\nThe corresponding X is: {x}' # Printed in one line + else: + x_message = f'\nThe corresponding X is:\n{x}' + + if is_constrained: + nf_message = (f'\nNumber of function values = {nf}{spaces}' + f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}') + else: + nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}' + + if is_constrained and present(constr): + if np.size(constr) <= 2: + constr_message = f'\nThe constraint value is: {constr}' # Printed in one line + else: + constr_message = f'\nThe constraint value is:\n{constr}' + else: + constr_message = '' + + # Print the message. + if abs(iprint) >= 3: + message = f'\n{rho_message}{nf_message}{x_message}{constr_message}' + else: + message = f'{rho_message}{nf_message}{x_message}{constr_message}' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + #====================# + # Calculation ends # + #====================# + + +def cpenmsg(solver, iprint, cpen): + ''' + This function prints a message when CPEN is updated. + ''' + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 2: # No printing + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver.strip()}_output.txt' + + # Print the message. + if abs(iprint) >= 3: + message = f'\nSet CPEN to {cpen}' + else: + message = f'\n\nSet CPEN to {cpen}' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + #====================# + # Calculation ends # + #====================# + + +def fmsg(solver, state, iprint, nf, delta, f, x, cstrv=None, constr=None): + ''' + This subroutine prints messages for each evaluation of the objective function. + ''' + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 2: # No printing + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver.strip()}_output.txt' + + # Decide whether the problem is truly constrained. + if present(constr): + is_constrained = (np.size(constr) > 0) + else: + is_constrained = present(cstrv) + + # Decide the constraint violation. + if present(cstrv): + cstrv_loc = cstrv + elif present(constr): + cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0. + else: + cstrv_loc = 0 + + delta_message = f'\n{state} step with radius = {delta}' + + if is_constrained: + nf_message = (f'\nNumber of function values = {nf}{spaces}' + f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}') + else: + nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}' + + if np.size(x) <= 2: + x_message = f'\nThe corresponding X is: {x}' # Printed in one line + else: + x_message = f'\nThe corresponding X is:\n{x}' + + if is_constrained and present(constr): + if np.size(constr) <= 2: + constr_message = f'\nThe constraint value is: {constr}' # Printed in one line + else: + constr_message = f'\nThe constraint value is:\n{constr}' + else: + constr_message = '' + + # Print the message. + message = f'{delta_message}{nf_message}{x_message}{constr_message}' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + #====================# + # Calculation ends # + #====================# diff --git a/python/src/prima/common/output.py b/python/src/prima/common/output.py deleted file mode 100644 index 151b150eb9..0000000000 --- a/python/src/prima/common/output.py +++ /dev/null @@ -1,20 +0,0 @@ -def fmsg(solver, iprint, nf, f, x, cstrv=None, constr=None): - if abs(iprint) < 3: - return - elif iprint > 0: - output = "stdout" - else: - output = "file" - filename = solver + "_output.txt" - - # Decide whether the problem is truly constrained. - if constr is None: - is_constrained = cstrv is not None - else: - is_constrained = bool(len(constr)) - - # Decide the constraint violation - - # Will finish this later, not important for now - - diff --git a/python/src/prima/common/ratio.py b/python/src/prima/common/ratio.py new file mode 100644 index 0000000000..80712f5104 --- /dev/null +++ b/python/src/prima/common/ratio.py @@ -0,0 +1,44 @@ +from prima.common.consts import DEBUGGING, REALMAX +import numpy as np + +def redrat(ared, pred, rshrink): + ''' + This function evaluates the reduction ratio of a trust-region step, handling inf/nan properly. + ''' + + # Preconditions + if DEBUGGING: + assert rshrink >= 0 + + #====================# + # Calculation starts # + #====================# + + if np.isnan(ared): + # This should not happen in unconstrained problems due to the moderated extreme barrier. + ratio = -REALMAX + elif np.isnan(pred) or pred <= 0: + # The trust-region subproblem solver fails in this rare case. Instead of terminating as Powell's + # original code does, we set ratio as follows so that the solver may continue to progress. + if ared > 0: + # The trial point will be accepted, but the trust-region radius will be shrunk if rshrink>0 + ratio = rshrink/2 + else: + # Set the ration to a large negative number to signify a bad trust-region step, so that the + # solver will check whether to take a geometry step or reduce rho. + ratio = -REALMAX + elif np.isposinf(pred) and np.isposinf(ared): + ratio = 1 # ared/pred = NaN if calculated directly + elif np.isposinf(pred) and np.isneginf(ared): + ratio = -REALMAX # ared/pred = NaN if calculated directly + else: + ratio = ared/pred + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert not np.isnan(ratio) + return ratio \ No newline at end of file diff --git a/python/src/prima/common/redrho.py b/python/src/prima/common/redrho.py index f24e747eb3..fd11baf92d 100644 --- a/python/src/prima/common/redrho.py +++ b/python/src/prima/common/redrho.py @@ -1,3 +1,4 @@ +from prima.common.consts import DEBUGGING import numpy as np def redrho(rho_in, rhoend): @@ -9,7 +10,8 @@ def redrho(rho_in, rhoend): ''' # Preconditions - assert rho_in > rhoend > 0 + if DEBUGGING: + assert rho_in > rhoend > 0 #====================# # Calculation starts # @@ -29,6 +31,7 @@ def redrho(rho_in, rhoend): #==================# # Postconditions - assert rho_in > rho >= rhoend + if DEBUGGING: + assert rho_in > rho >= rhoend return rho \ No newline at end of file diff --git a/python/src/prima/common/selectx.py b/python/src/prima/common/selectx.py index 22586cab1f..8e33f15837 100644 --- a/python/src/prima/common/selectx.py +++ b/python/src/prima/common/selectx.py @@ -10,7 +10,8 @@ import numpy as np import numpy.typing as npt -from prima.common.consts import EPS, CONSTRMAX, REALMAX, FUNCMAX +from prima.common.consts import DEBUGGING, EPS, CONSTRMAX, REALMAX, FUNCMAX +from prima.common.present import present def isbetter(f1: float, c1: float, f2: float, c2: float, ctol: float) -> bool: ''' @@ -21,20 +22,43 @@ def isbetter(f1: float, c1: float, f2: float, c2: float, ctol: float) -> bool: At return, BETTER = TRUE if and only if (F1, C1) is better than (F2, C2). Here, C means constraint violation, which is a nonnegative number. ''' - isbetter = False + + # Preconditions + if DEBUGGING: + assert not any(np.isnan([f1, c1]) | np.isposinf([f2, c2])) + assert not any(np.isnan([f2, c2]) | np.isposinf([f2, c2])) + assert c1 >= 0 and c2 >= 0 + assert ctol >= 0 + + #====================# + # Calculation starts # + #====================# + + is_better = False # Even though NaN/+Inf should not occur in FC1 or FC2 due to the moderated extreme barrier, for # security and robustness, the code below does not make this assumption. - isbetter = isbetter or (any(np.isnan([f1, c1])) and not any(np.isnan([f2, c2]))) + is_better = is_better or (any(np.isnan([f1, c1])) and not any(np.isnan([f2, c2]))) - isbetter = isbetter or (f1 < f2 and c1 <= c2) - isbetter = isbetter or (f1 <= f2 and c1 < c2) + is_better = is_better or (f1 < f2 and c1 <= c2) + is_better = is_better or (f1 <= f2 and c1 < c2) # If C1 <= CTOL and C2 is significantly larger/worse than CTOL, i.e., C2 > MAX(CTOL,CREF), # then FC1 is better than FC2 as long as F1 < REALMAX. Normally CREF >= CTOL so MAX(CTOL, CREF) # is indeed CREF. However, this may not be true if CTOL > 1E-1*CONSTRMAX. cref = 10 * max(EPS, min(ctol, 1.0E-2 * CONSTRMAX)) # The MIN avoids overflow. - isbetter = isbetter or (f1 < REALMAX and c1 <= ctol and (c2 > max(ctol, cref) or np.isnan(c2))) - return isbetter + is_better = is_better or (f1 < REALMAX and c1 <= ctol and (c2 > max(ctol, cref) or np.isnan(c2))) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert not (is_better and f1 >= f2 and c1 >= c2) + assert is_better or not (f1 <= f2 and c1 < c2) + assert is_better or not (f1 < f2 and c1 <= c2) + + return is_better def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None, confilt=None): @@ -53,15 +77,48 @@ def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None ''' # Sizes - if constr is not None: + if present(constr): num_constraints = len(constr) else: num_constraints = 0 - num_vars = len(x) maxfilt = len(ffilt) - # Preconditions TODO + # Preconditions + if DEBUGGING: + # Check the size of X. + assert num_vars >= 1 + # Check CWEIGHT and CTOL + assert cweight >= 0 + assert ctol >= 0 + # Check NFILT + assert nfilt >= 0 and nfilt <= maxfilt + # Check the sizes of XFILT, FFILT, CFILT. + assert maxfilt >= 1 + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert np.size(cfilt) == maxfilt + # Check the values of XFILT, FFILT, CFILT. + assert not (np.isnan(xfilt[:, :nfilt])).any() + assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt])) + assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt])) + # Check the values of X, F, CSTRV. + # X does not contain NaN if X0 does not and the trust-region/geometry steps are proper. + assert not any(np.isnan(x)) + # F cannot be NaN/+Inf due to the moderated extreme barrier. + assert not (np.isnan(f) | np.isposinf(f)) + # CSTRV cannot be NaN/+Inf due to the moderated extreme barrier. + assert not (cstrv < 0 | np.isnan(cstrv) | np.isposinf(cstrv)) + # Check CONSTR and CONFILT. + assert present(constr) == present(confilt) + if present(constr): + # CONSTR cannot contain NaN/-Inf due to the moderated extreme barrier. + assert not any(np.isnan(constr) | np.isneginf(constr)) + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any() + + #====================# + # Calculation starts # + #====================# # Return immeditely if any column of XFILT is better than X. TODO: Lazy evaluation if any([isbetter(ffilt_i, cfilt_i, f, cstrv, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]): @@ -119,6 +176,30 @@ def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None confilt[:, nfilt] = constr nfilt += 1 # In Python we need to increment the index afterwards + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + # Check NFILT and the sizes of XFILT, FFILT, CFILT. + assert nfilt >= 1 and nfilt <= maxfilt + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert np.size(ffilt) == maxfilt + assert np.size(cfilt) == maxfilt + # Check the values of XFILT, FFILT, CFILT. + assert not (np.isnan(xfilt[:, :nfilt])).any() + assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt])) + assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt])) + # Check that no point in the filter is better than X, and X is better than no point. + assert not any([isbetter(ffilt_i, cfilt_i, f, cstrv, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]) + assert not any([isbetter(f, cstrv, ffilt_i, cfilt_i, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]) + # Check CONFILT. + if present(confilt): + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any() + + return nfilt, cfilt, ffilt, xfilt, confilt @@ -132,8 +213,22 @@ def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float) its constraint violation is at most CTOL. Not that CTOL is absolute, not relative. ''' + # Sizes nhist = len(fhist) + # Preconditions + if DEBUGGING: + assert nhist >= 1 + assert np.size(chist) == nhist + assert not any(np.isnan(fhist) | np.isposinf(fhist)) + assert not any(chist < 0 | np.isnan(chist) | np.isposinf(chist)) + assert cweight >= 0 + assert ctol >= 0 + + #====================# + # Calculation starts # + #====================# + # We select X among the points with F < FREF and CSTRV < CREF. # Do NOT use F <= FREF, because F == FREF (FUNCMAX or REALMAX) may mean F == INF in practice! if any(np.logical_and(fhist < FUNCMAX, chist < CONSTRMAX)): @@ -155,21 +250,21 @@ def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float) # Shift the constraint violations by ctol, so that cstrv <= ctol is regarded as no violation. chist_shifted = np.maximum(chist - ctol, 0) # cmin is the minimal shift constraint violation attained in the history. - cmin = min(chist_shifted[fhist < fref]) + cmin = np.min(chist_shifted[fhist < fref]) # We consider only the points whose shifted constraint violations are at most the cref below. - # N.B.: Without taking max(EPS, .), cref would be 0 if cmin = 0. In that case, asking for + # N.B.: Without taking np.maximum(EPS, .), cref would be 0 if cmin = 0. In that case, asking for # cstrv_shift < cref would be WRONG! - cref = max(EPS, 2*cmin) + cref = np.maximum(EPS, 2*cmin) # We use the following phi as our merit function to select X. if cweight <= 0: phi = fhist elif np.isposinf(cweight): phi = chist_shifted - # We should not use chist here; if min(chist_shifted) is attained at multiple indices, then + # We should not use chist here; if np.minimum(chist_shifted) is attained at multiple indices, then # we will check fhist to exhaust the remaining degree of freedom. else: phi = np.maximum(fhist, -REALMAX) + cweight * chist_shifted - # max(fhist, -REALMAX) makes sure that phi will not contain NaN (unless there is a bug). + # np.maximum(fhist, -REALMAX) makes sure that phi will not contain NaN (unless there is a bug). # We select X to minimize phi subject to f < fref and cstrv_shift <= cref (see the comments # above for the reason of taking "<" and "<=" in these two constraints). In case there are @@ -181,9 +276,18 @@ def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float) # 1. This process is the opposite of selecting kworst in savefilt # 2. In finite-precision arithmetic, phi_2 == phi_2 and cstrv_shift_1 == cstrv_shifted_2 do # not ensure thatn f_1 == f_2! - phimin = min(phi[np.logical_and(fhist < fref, chist_shifted <= cref)]) - cref = min(chist_shifted[np.logical_and(fhist < fref, phi <= phimin)]) - fref = min(fhist[chist_shifted <= cref]) - kopt = np.where(chist == min(chist[fhist <= fref]))[0][0] + phimin = np.min(phi[np.logical_and(fhist < fref, chist_shifted <= cref)]) + cref = np.min(chist_shifted[np.logical_and(fhist < fref, phi <= phimin)]) + fref = np.min(fhist[chist_shifted <= cref]) + kopt = np.where(chist == np.min(chist[fhist <= fref]))[0][0] + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert kopt >= 0 and kopt < nhist + assert not any([isbetter(fhisti, chisti, fhist[kopt], chist[kopt], ctol) for fhisti, chisti in zip(fhist[:nhist], chist[:nhist])]) return kopt \ No newline at end of file From 52d5647208a559774f8873e9f1e777309b83cb0d Mon Sep 17 00:00:00 2001 From: Nickolai Belakovski Date: Tue, 26 Sep 2023 15:17:01 -0700 Subject: [PATCH 5/6] Added a few more pre/post conditions --- python/src/prima/cobyla/geometry.py | 2 +- python/src/prima/cobyla/trustregion.py | 4 +- python/src/prima/common/linalg.py | 52 +++++++++++++++++++++----- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py index 7526886e99..03c80d248a 100644 --- a/python/src/prima/cobyla/geometry.py +++ b/python/src/prima/cobyla/geometry.py @@ -42,7 +42,7 @@ def assess_geo(delta, factor_alpha, factor_beta, sim, simi): # constant in the order of delta. This simplex is optimal in the sense that the interpolation # system has the minimal condition number, i.e. 1. For this simplex, the distance from # V_{N+1} to its opposite face is L/sqrt(n). - vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) # TODO: Check axis + vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta) diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py index 5f37035507..a606a3d863 100644 --- a/python/src/prima/cobyla/trustregion.py +++ b/python/src/prima/cobyla/trustregion.py @@ -29,7 +29,7 @@ def trstlp(A_in, b_in, delta): It is possible but rare that a degeneracy may prevent d from attaining the target length delta. - cviol is the largest constraint violation of the current d: max(b[:m] - A[:,:m].T@d, 0). TODO: Not sure if it should be an @, or what max(vector, scalar) means + cviol is the largest constraint violation of the current d: np.maximum(np.append(b[:m] - A[:,:m].T@d, 0)). icon is the index of a most violated constraint if cviol is positive. nact is the number of constraints in the active set and iact[0], ..., iact[nact-1] are their indices, @@ -403,7 +403,6 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): vmultd[nact-1] = max(0, vmultd[nact-1]) # This seems never activated. # Complete vmultd by finding the new constraint residuals. (Powell wrote "Complete vmultc ...") cvshift = dnew@A[:, iact] - b[iact] + cviol # Only cvshift[nact+1:mcon] is needed - # TODO: The fortran code has a matprod for two vectors, not sure what that's supposed to return? cvshift is supposed to be a vector cvsabs = abs(dnew)@abs(A[:, iact]) + abs(b[iact]) + cviol cvshift[isminor(cvshift, cvsabs)] = 0 vmultd[nact:mcon] = cvshift[nact:mcon] @@ -412,7 +411,6 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): fracmult = [vmultc[i]/(vmultc[i] - vmultd[i]) if vmultd[i] < 0 else REALMAX for i in range(len(vmultd))] # Only the places with vmultd < 0 are relevant below, if any. icon = np.where((temp_arr := np.array([1, *fracmult])) == min(temp_arr))[0][0] - 1 - # TODO: The above code is a little strange. It seems like icon could be -1 which in our context is invalid frac = min(temp_arr) # Update d, vmultc, and cviol diff --git a/python/src/prima/common/linalg.py b/python/src/prima/common/linalg.py index 2f1708a7b2..4580d2e3a3 100644 --- a/python/src/prima/common/linalg.py +++ b/python/src/prima/common/linalg.py @@ -16,7 +16,8 @@ def planerot(x): ''' # Preconditions - assert len(x) == 2, "x must be a 2-vector" + if DEBUGGING: + assert len(x) == 2, "x must be a 2-vector" # ================== # Calculation starts @@ -79,14 +80,15 @@ def planerot(x): #====================# # Postconditions - assert G.shape == (2,2) - assert np.all(np.isfinite(G)) - assert abs(G[0, 0] - G[1, 1]) + abs(G[0, 1] + G[1, 0]) <= 0 - tol = max(1.0E-10, min(1.0E-1, 1.0E6 * EPS)) - # assert isorth(G, tol) # TODO: Implement isorth - if all(np.logical_and(np.isfinite(x), np.abs(x) < np.sqrt(REALMAX / 2.1))): - r = np.linalg.norm(x) - assert max(abs(G@x - [r, 0])) <= max(tol, tol * r), 'G @ X = [||X||, 0]' + if DEBUGGING: + assert G.shape == (2,2) + assert np.all(np.isfinite(G)) + assert abs(G[0, 0] - G[1, 1]) + abs(G[0, 1] + G[1, 0]) <= 0 + tol = np.maximum(1.0E-10, np.minimum(1.0E-1, 1.0E6 * EPS)) + assert isorth(G, tol) + if all(np.logical_and(np.isfinite(x), np.abs(x) < np.sqrt(REALMAX / 2.1))): + r = np.linalg.norm(x) + assert max(abs(G@x - [r, 0])) <= max(tol, tol * r), 'G @ X = [||X||, 0]' return G @@ -141,3 +143,35 @@ def isinv(A, B, tol=None): # Calculation ends # #===================# return is_inv + + +def isorth(A, tol=None): + ''' + This function tests whether the matrix A has orthonormal columns up to the tolerance TOL. + ''' + + # Preconditions + if DEBUGGING: + if present(tol): + assert tol >= 0 + + #====================# + # Calculation starts # + #====================# + + num_vars = np.size(A, 1) + + if num_vars > np.size(A, 0): + is_orth = False + elif (np.isnan(np.sum(abs(A)))): + is_orth = False + else: + if present(tol): + is_orth = (abs(A.T@A - np.eye(num_vars)) <= np.maximum(tol, tol * np.max(abs(A)))).all() + else: + is_orth = (abs(A.T@A - np.eye(num_vars)) <= 0).all() + + #====================# + # Calculation ends # + #====================# + return is_orth From 1095cc13ff1e76b0c92c7eccbccfc8862597e246 Mon Sep 17 00:00:00 2001 From: Nickolai Belakovski Date: Thu, 28 Sep 2023 13:59:14 -0700 Subject: [PATCH 6/6] Adding Python action --- .github/workflows/test_python.yml | 68 ++++++++++++++++++++++++++ python/src/prima/cobyla/trustregion.py | 4 +- 2 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test_python.yml diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml new file mode 100644 index 0000000000..96f3dbcc1a --- /dev/null +++ b/.github/workflows/test_python.yml @@ -0,0 +1,68 @@ +name: Test Python + +on: + # Trigger the workflow on push or pull request + #push: + pull_request: # DANGEROUS! MUST be disabled for self-hosted runners! + # Trigger the workflow by cron. The default time zone of GitHub Actions is UTC. + schedule: + - cron: '0 16 4-31/4 * *' + # Trigger the workflow manually + workflow_dispatch: + + +jobs: + + test: + name: Run Python tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + solver: [cobyla] + testdim: [small] + + steps: + - name: Checkout repository + uses: actions/checkout@v3.5.3 + with: + submodules: recursive + # ssh-key: ${{ secrets.SSH_PRIVATE_KEY_ACT }} # This forces checkout to use SSH, not HTTPS + # As of 230425, checkout with ssh fails frequently on Windows runners. + + # TODO: See if this is necessary for Python + # - name: Make tools such as grep, make, and git available on Windows + # if: runner.os == 'Windows' + # run: $env:Path += ";C:\Program Files\Git\usr\bin;C:\Program Files\Git\bin;C:\ProgramData\Chocolatey\bin" + + # - name: Miscellaneous setup + # shell: bash # Important; otherwise, the following statements do not work on Windows. + # run: bash .github/scripts/misc_setup + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install numpy pytest + + - name: Conduct the test + # shell: bash # Important; otherwise, `<` will not work on Windows. + run: | + cd "$ROOT_DIR"/python && pytest --cov=src --cov-report=html test + + + - name: Store artifacts + uses: actions/upload-artifact@v3.1.2 + if: always() # Always run even if the workflow is canceled manually or due to overtime. + # Note that `$TEST_DIR` does not work on Windows, where its equivalent is `$env:TEST_DIR`. + # In the following, we enquire `$TEST_DIR` by using the `env` context, which is platform independent. + with: + path: ${{ env.TEST_DIR }}/prima/python/htmlcov/* + + - name: Remove the test data + shell: bash # Important; otherwise, `rm -rf` will not work on Windows. + run: rm -rf ${{ env.TEST_DIR }} diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py index a606a3d863..86bed4331a 100644 --- a/python/src/prima/cobyla/trustregion.py +++ b/python/src/prima/cobyla/trustregion.py @@ -235,7 +235,7 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): # same before and after QRADD). Therefore if we supply ZDOTA to LSQR (as Rdiag) as # Powell did, we should use the UNUPDATED version, namely ZDASAV. # vmultd[:nact] = lsqr(A[:, iact[:nact]], A[:, iact[icon]], z[:, :nact], zdasav[:nact]) - vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], A[:, iact[icon]])[0] + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], A[:, iact[icon]], rcond=None)[0] if not any(np.logical_and(vmultd[:nact] > 0, iact[:nact] <= num_constraints)): # N.B.: This can be triggered by NACT == 0 (among other possibilities)! This is # important, because NACT will be used as an index in the sequel. @@ -398,7 +398,7 @@ def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): # force multd[k] = 0 if deviations from this value can be attributed to computer rounding # errors. First calculate the new Lagrange multipliers. # vmultd[:nact] = lsqr(A[:, iact[:nact]], dnew, z[:, :nact], zdota[:nact]) - vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], dnew)[0] + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], dnew, rcond=None)[0] if stage == 2: vmultd[nact-1] = max(0, vmultd[nact-1]) # This seems never activated. # Complete vmultd by finding the new constraint residuals. (Powell wrote "Complete vmultc ...")