diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml new file mode 100644 index 0000000000..96f3dbcc1a --- /dev/null +++ b/.github/workflows/test_python.yml @@ -0,0 +1,68 @@ +name: Test Python + +on: + # Trigger the workflow on push or pull request + #push: + pull_request: # DANGEROUS! MUST be disabled for self-hosted runners! + # Trigger the workflow by cron. The default time zone of GitHub Actions is UTC. + schedule: + - cron: '0 16 4-31/4 * *' + # Trigger the workflow manually + workflow_dispatch: + + +jobs: + + test: + name: Run Python tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + solver: [cobyla] + testdim: [small] + + steps: + - name: Checkout repository + uses: actions/checkout@v3.5.3 + with: + submodules: recursive + # ssh-key: ${{ secrets.SSH_PRIVATE_KEY_ACT }} # This forces checkout to use SSH, not HTTPS + # As of 230425, checkout with ssh fails frequently on Windows runners. + + # TODO: See if this is necessary for Python + # - name: Make tools such as grep, make, and git available on Windows + # if: runner.os == 'Windows' + # run: $env:Path += ";C:\Program Files\Git\usr\bin;C:\Program Files\Git\bin;C:\ProgramData\Chocolatey\bin" + + # - name: Miscellaneous setup + # shell: bash # Important; otherwise, the following statements do not work on Windows. + # run: bash .github/scripts/misc_setup + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install numpy pytest + + - name: Conduct the test + # shell: bash # Important; otherwise, `<` will not work on Windows. + run: | + cd "$ROOT_DIR"/python && pytest --cov=src --cov-report=html test + + + - name: Store artifacts + uses: actions/upload-artifact@v3.1.2 + if: always() # Always run even if the workflow is canceled manually or due to overtime. + # Note that `$TEST_DIR` does not work on Windows, where its equivalent is `$env:TEST_DIR`. + # In the following, we enquire `$TEST_DIR` by using the `env` context, which is platform independent. + with: + path: ${{ env.TEST_DIR }}/prima/python/htmlcov/* + + - name: Remove the test data + shell: bash # Important; otherwise, `rm -rf` will not work on Windows. + run: rm -rf ${{ env.TEST_DIR }} diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000000..0d60cfadc9 --- /dev/null +++ b/python/README.md @@ -0,0 +1,74 @@ +To develop, `cd` into the `src` directory and run + +```pip install --editable .``` + +This will install prima locally in an editable fashion. From there you can run the examples/cobyla/cobyla_example.py (from any directory) and go from there. + +## Style notes + +- In cases where there are Python or numpy functions that can achieve the same aim (i.e. sum vs np.sum) we prefer the numpy functions. + - Rationale: + - In some cases we must use the numpy function because it has functionality the Python function doesn't, i.e. np.sum has an axis argument, whereas sum does not. + - In light of this, for the sake of consistency we prefer to use the numpy functions everywhere. +- Most of the comments are copied from Fortran verbatim, except in cases where they need to modified due to specifics of the Python language. In these cases a note will be made of the difference between Fortran and Python + - Rationale: + - The main purpose of this is to keep the Python and Fortran codebases as similar as possible. +- For determining the dimensions of an array, we exclusively use `np.size` instead of `np.shape` or `some_array.shape` or `len` + - Rationale: + - Fortran uses `SIZE` so this helps us to be as consistent with the Fortran code as possible. + +## A note on Fortran's `max` and `maxval` and their Python equivalents + +| Fortran | Python | +|---------|--------| +| `max` | `np.maximum` | +| `maxval` | `np.max` or `max` | + +Fortran's `max` and numpy's `maximum` accept two arguments, either of which can be a scalar or an array, +and returns an elementwise maximum of the two arguments. In the case of a scalar and an array argument it +returns an elementwise maximum of the scalar and each element of the array. + +Fortran's `maxval` takes a single argument and returns the largest item in the list, similar to numpy's `max`. +Python's built-in `max` can either take a single argument and return the largest element, or it can take a +variable number of arguments and return the largest of all of them. + +Per the style notes, prefer `np.max` over the built-in `max`. + +For the larger of two numbers Fortran uses `max`, and we prefer `np.maximum` over the Python `max` for consistenchy in the translation. + +This note applies to `min` and `minval` as well. + + +## A note on indices + +Consider the following Fortran code + +``` +do i=0:5 + print *, * +end do +``` + +It can be easily and automatically translated to Python as + +``` +for i in range(0, 6): + print(i) +``` + +Now consider the following similar loop + +``` +do i=1:5 + print *, some_array(i) +end do +``` + +This can be translated to Python as + +``` +for i in range(1, 6): + print(some_array[i-1]) +``` + +This leads to awkward Python code, since the more pythonic code would range from 0 to 5, and the indexing would be `some_array[i]`. In order to make the Python code more usable, we will attempt to write more "pythonic" code, even though that makes the translation a little bit more difficult. \ No newline at end of file diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000000..d95d6147de --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "prima" +version = "0.0.1" +dependencies = [ + "numpy" +] + +[tool.setuptools.packages.find] +where = ["src"] # ["."] by default +exclude = ["prima.examples*"] # empty by default diff --git a/python/src/prima/__init__.py b/python/src/prima/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/cobyla/__init__.py b/python/src/prima/cobyla/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/cobyla/cobyla.py b/python/src/prima/cobyla/cobyla.py new file mode 100644 index 0000000000..497c8a6ed2 --- /dev/null +++ b/python/src/prima/cobyla/cobyla.py @@ -0,0 +1,267 @@ +from prima.common.evaluate import evaluate, moderatex +from prima.common.consts import EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ + CTOL_DEFAULT, CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT, \ + MAXFUN_DIM_DEFAULT, DEBUGGING +from prima.common.preproc import preproc +from prima.common.present import present +from prima.cobyla.cobylb import cobylb +import numpy as np +from dataclasses import dataclass + + +# Return type for COBYLA +@dataclass +class COBYLAResult: + x: np.ndarray + f: float + constr: np.ndarray + cstrv: float + nf: int + xhist: np.ndarray | None + fhist: np.ndarray | None + chist: np.ndarray | None + conhist: np.ndarray | None + info: int + + +def cobyla(calcfc, num_constraints, x, f0=None, constr0=None, rhobeg=None, + rhoend=None, ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT, + maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None, gamma1=0.5, gamma2=2, + maxhist=None, maxfilt=2000): + """ + Among all the arguments, only CALCFC, M, and X are obligatory. The others are OPTIONAL and you + can neglect them unless you are familiar with the algorithm. Any unspecified optional input will + take the default value detailed below. For instance, we may invoke the solver as follows. + + First define CALCFC, NUM_CONSTRAINTS, and X, and then do the following. + result = cobyla(calcfc, num_constraints, x) + + or + + First define CALCFC, NUM_CONSTRAINTS, and X, and then do the following. + result = cobyla(calcfc, num_constraints, x, rhobeg=0.5, rhoend=1E-3, maxfun=100) + + IMPORTANT NOTICE: The user must set NUM_CONSTRAINTS correctly to the number of constraints, namely + the size of CONSTR introduced below. Set NUM_CONSTRAINTS to 0 if there is no constraint. + + See examples/cobyla_example.py for a concrete example. + + A detailed introduction to the arguments is as follows. The section below is largely similar to the + comments in the Fortran version but has been slightly modified to reflect the differences in + some of the arguments. + + CALCFC + F, CONSTR = CALCFC(X) should evaluate the objective function and constraints at the given + vector X; it should return the objective function value and the constraint value. It must be + provided by the user, and its definition must conform to the following interface: + !-------------------------------------------------------------------------! + def calcfc(x): + # ... calculations ... + return f, constr + !-------------------------------------------------------------------------! + X, F, and CONSTR are numpy arrays. If there are no constraints, CONSTR is expected to be an + empty numpy array. + + NUM_CONSTRAINTS + NUM_CONSTRAINTS must be set to the number of constraints + N.B.: + 1. Why don't we define NUM_CONSTRAINTS as optional and determine it from running CALCFC? + Because we need to know it ahead of time to setup up various matrices and vectors. Since CALCFC + might be expensive, we prefer to avoid running it unnecessarily. + + X + X should be an N-dimensional vector that contains the starting point, N being the + dimension of the problem. + + F0 + F0, if present, should be set to the objective function value of the starting X. + + CONSTR0 + CONSTR0, if present, should be set to the constraint value of the starting X; in addition, + SIZE(CONSTR0) must be NUM_CONSTRAINTS, or the solver will abort. + + RHOBEG, RHOEND + Default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to + the initial and final values of a trust-region radius, both being positive and RHOEND <= RHOBEG. + Typically RHOBEG should be about one tenth of the greatest expected change to a variable, and + RHOEND should indicate the accuracy that is required in the final values of the variables. + + FTARGET + Default: -Inf. + FTARGET is the target function value. The algorithm will terminate when a feasible point with a + function value <= FTARGET is found. + + CTOL + Default: machine epsilon. + CTOL is the tolerance of constraint violation. Any X with MAXVAL(-CONSTR(X)) <= CTOL is + considered feasible. + N.B.: 1. CTOL is absolute, not relative. 2. CTOL is used only when selecting the returned X. + It does not affect the iterations of the algorithm. + + CWEIGHT + Default: CWEIGHT_DFT defined in common/consts.py. + CWEIGHT is the weight that the constraint violation takes in the selection of the returned X. + + MAXFUN + Default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in common/consts.py. + MAXFUN is the maximal number of calls of CALCFC. + + IPRINT + Default: 0. + The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls how much + information will be printed during the computation: + 0: there will be no printing; + 1: a message will be printed to the screen at the return, showing the best vector of variables + found and its objective function value; + 2: in addition to 1, each new value of RHO is printed to the screen, with the best vector of + variables so far and its objective function value; each new value of CPEN is also printed; + 3: in addition to 2, each function evaluation with its variables will be printed to the screen; + -1, -2, -3: the same information as 1, 2, 3 will be printed, not to the screen but to a file + named COBYLA_output.txt; the file will be created if it does not exist; the new output will + be appended to the end of this file if it already exists. + Note that IPRINT = +/-3 can be costly in terms of time and/or space. + + ETA1, ETA2, GAMMA1, GAMMA2 + Default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2. + ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the trust-region radius + detailed in the subroutine TRRAD in trustregion.py. Roughly speaking, the trust-region radius + is contracted by a factor of GAMMA1 when the reduction ratio is below ETA1, and enlarged by a + factor of GAMMA2 when the reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 + < 1 and 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set ETA1 >= 0.5. + + XHIST, FHIST, CHIST, CONHIST, MAXHIST + MAXHIST: Input, INTEGER(IK) scalar, default: MAXFUN + XHIST, if present, will output the history of iterates; FHIST, if present, will output the + history function values; CHIST, if present, will output the history of constraint violations; + CONHIST, if present, will output the history of constraint values; MAXHIST should be a + nonnegative integer, and XHIST/FHIST/CHIST/CONHIST will output only the history of the last + MAXHIST iterations. Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output nothing, + while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to output all the history. + If XHIST is present, its size at exit will be (N, min(NF, MAXHIST)); if FHIST is present, its + size at exit will be min(NF, MAXHIST); if CHIST is present, its size at exit will be + min(NF, MAXHIST); if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)). + + IMPORTANT NOTICE: + Setting MAXHIST to a large value can be costly in terms of memory for large problems. + MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM defined in + CONSTS_MOD (see consts.F90 under the directory named "common"). + Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large problems). + + MAXFILT + Input, INTEGER(IK) scalar. + MAXFILT is a nonnegative integer indicating the maximal length of the filter used for selecting + the returned solution; default: MAXFILT_DFT (a value lower than MIN_MAXFILT is not recommended); + see common/consts.F90 for the definitions of MAXFILT_DFT and MIN_MAXFILT. + + Output: A Python dataclass with the following fields + X + X will be set to the best vector of variables found. + + F + F will be set to the objective function value at X. + + CSTRV + Optional, only appears if CSTRV is True. CSTRV will be set to the constraint violation of X at + exit, i.e., MAXVAL([-CONSTR(X), 0]). + + CONSTR + Optional, only appears if CONSTR is True. CONSTR will be set to the constraint value of X at + exit. + + NF + NF will be set to the number of calls of CALCFC at exit. + + INFO + INFO is the exit flag. It will be set to one of the following values defined in the module + INFOS_MOD (see common/infos.f90): + SMALL_TR_RADIUS: the lower bound for the trust region radius is reached; + FTARGET_ACHIEVED: the target function value is reached; + MAXFUN_REACHED: the objective function has been evaluated MAXFUN times; + MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN); + NAN_INF_X: NaN or Inf occurs in X; + DAMAGING_ROUNDING: rounding errors are becoming damaging. + !--------------------------------------------------------------------------! + The following case(s) should NEVER occur unless there is a bug. + NAN_INF_F: the objective function returns NaN or +Inf; + NAN_INF_MODEL: NaN or Inf occurs in the model; + TRSUBP_FAILED: a trust region step failed to reduce the model + !--------------------------------------------------------------------------! + """ + + # Local variables + solver = 'COBYLA' + + # Preconditions + if DEBUGGING: + assert (f0 is None) == (constr0 is None), "f0 and constr0 must be both present or both absent" + + # Sizes + num_vars = len(x) + + # Exit if constr0 is not consistent with num_constraints + if present(constr0): + assert np.size(constr0) == num_constraints + + + # If the user provides the function & constraint value at X0, then use them. + # Otherwise, set [F0, CONSTR0] = [F(X0), CONSTR(X0)], so that COBYLB only needs a single interface. + if (f0 is None and constr0 is None) or not all(np.isfinite(x)): + # Replace any NaN in X by ZERO and Inf/-Inf in X by REALMAX/-REALMAX. + x = moderatex(x) + f0, constr0, cstrv0 = evaluate(calcfc, x) + + if present(rhobeg): + rhobeg = rhobeg + elif present(rhoend) and np.isfinite(rhoend) and rhoend > 0: + rhobeg = np.maximum(10*rhoend, RHOBEG_DEFAULT) + else: + rhobeg = RHOBEG_DEFAULT + + if present(rhoend): + rhoend = rhoend + elif rhobeg > 0: + rhoend = np.maximum(EPS, np.minimum(0.1*rhobeg, RHOEND_DEFAULT)) + else: + rhoend = RHOEND_DEFAULT + + maxfun = maxfun if present(maxfun) else MAXFUN_DIM_DEFAULT*num_vars + + if present(eta1): + eta1 = eta1 + elif present(eta2) and 0 < eta2 < 1: + eta1 = np.maximum(EPS, eta2/7) + else: + eta1 = 0.1 + + if present(eta2): + eta2 = eta2 + else: + if 0 < eta1 < 1: + eta2 = (eta1 + 2)/3 + else: + eta2 = 0.7 + + maxhist = maxhist if present(maxhist) else np.max([maxfun, num_vars+2, MAXFUN_DIM_DEFAULT*num_vars]) + + # Preprocess the inputs in case some of them are invalid. It does nothing if all inputs are valid. + # Python: The underscores refer to `npt` and `x0`, neither of which are relevant here. + iprint, maxfun, maxhist, ftarget, rhobeg, rhoend, _, maxfilt, ctol, cweight, \ + eta1, eta2, gamma1, gamma2, _ = preproc(solver, num_vars, iprint, maxfun, maxhist, \ + ftarget, rhobeg, rhoend, num_constraints=num_constraints, ctol=ctol, cweight=cweight, \ + eta1=eta1, eta2=eta2, gamma1=gamma1, gamma2=gamma2, maxfilt=maxfilt) + + + # Further revise MAXHIST according to MAXHISTMEM, and allocate memory for the history. + # In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize + # CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN) + # if they are requested; replace MAXFUN with 0 for the history that is not requested. + # prehist(maxhist, num_vars, present(xhist), xhist_loc, present(fhist), fhist_loc, & + # & present(chist), chist_loc, m, present(conhist), conhist_loc) + + + # call coblyb, which performs the real calculations + x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, + ftarget, gamma1, gamma2, rhobeg, rhoend, constr0, f0, x, + cstrv0, maxhist) + + return COBYLAResult(x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info) diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py new file mode 100644 index 0000000000..3cc656f313 --- /dev/null +++ b/python/src/prima/cobyla/cobylb.py @@ -0,0 +1,618 @@ +import numpy as np +from prima.common.checkbreak import checkbreak_con +from prima.common.consts import REALMAX, EPS, DEBUGGING, MIN_MAXFILT +from prima.common.infos import INFO_DEFAULT, MAXTR_REACHED, DAMAGING_ROUNDING, \ + SMALL_TR_RADIUS +from prima.common.evaluate import evaluate +from prima.common.history import savehist +from prima.common.linalg import isinv +from prima.common.message import fmsg, retmsg, rhomsg +from prima.common.ratio import redrat +from prima.common.redrho import redrho +from prima.common.selectx import savefilt, selectx +from prima.cobyla.update import updatepole, findpole, updatexfc +from prima.cobyla.geometry import assess_geo, setdrop_tr, geostep, setdrop_geo +from prima.cobyla.trustregion import trstlp, trrad +from prima.cobyla.initialize import initxfc, initfilt + + +def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, + gamma1, gamma2, rhobeg, rhoend, constr, f, x, + cstrv, maxhist): + ''' + This subroutine performs the actual computations of COBYLA. + ''' + + # Outputs + xhist = [] + fhist = [] + chist = [] + conhist = [] + + # Local variables + solver = 'COBYLA' + # CPENMIN is the minimum of the penalty parameter CPEN for the L-infinity constraint violation in + # the merit function. Note that CPENMIN = 0 in Powell's implementation, which allows CPEN to be 0. + # Here, we take CPENMIN > 0 so that CPEN is always positive. This avoids the situation where PREREM + # becomes 0 when PREREF = 0 = CPEN. It brings two advantages as follows. + # 1. If the trust-region subproblem solver works correctly and the trust-region center is not + # optimal for the subproblem, then PREREM > 0 is guaranteed. This is because, in theory, PREREC >= 0 + # and MAX(PREREC, PREREF) > 0 , and the definition of CPEN in GETCPEN ensures that PREREM > 0. + # 2. There is no need to revise ACTREM and PREREM when CPEN = 0 and F = FVAL(N+1) as in lines + # 312--314 of Powell's cobylb.f code. Powell's code revises ACTREM to CVAL(N + 1) - CSTRV and PREREM + # to PREREC in this case, which is crucial for feasibility problems. + cpenmin = EPS + # FACTOR_ALPHA, FACTOR_BETA, FACTOR_GAMMA, and FACTOR_DELTA are four factors that COBYLB uses + # when managing the simplex. Note the following. + # 1. FACTOR_ALPHA < FACTOR_GAMMA < 1 < FACTOR_DELTA <= FACTOR_BETA. + # 2. FACTOR_DELTA has nothing to do with DELTA, which is the trust-region radius. + # 3. FACTOR_GAMMA has nothing to do with GAMMA1 and GAMMA2, which are the contracting/expanding + # factors for updating the trust-region radius DELTA. + factor_alpha = 0.25 # The factor alpha in the COBYLA paper + factor_beta = 2.1 # The factor beta in the COBYLA paper + factor_delta = 1.1 # The factor delta in the COBYLA paper + factor_gamma = 0.5 # The factor gamma in the COBYLA paper + + # Sizes + num_constraints = np.size(constr) + num_vars = np.size(x) + + # Preconditions + if DEBUGGING: + assert abs(iprint) <= 3 + assert num_constraints >= 0 + assert num_vars >= 1 + assert maxfun >= num_vars + 2 + assert rhobeg >= rhoend and rhoend > 0 + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + assert gamma1 > 0 and gamma1 < 1 and gamma2 > 1 + assert ctol >= 0 + assert cweight >= 0 + assert maxhist >= 0 and maxhist <= maxfun + assert maxfilt >= min(MIN_MAXFILT, maxfun) and maxfilt <= maxfun + assert factor_alpha > 0 and factor_alpha < factor_gamma and factor_gamma < 1 + assert factor_beta >= factor_delta and factor_delta > 1 + + #====================# + # Calculation starts # + #====================# + + # Initialize SIM, FVAL, CONMAT, and CVAL, together with the history. + # After the initialization, SIM[:, NUM_VARS] holds the vertex of the initial simplex with the smallest + # function value (regardless of the constraint violation), and SIM[:, :NUM_VARS] holds the displacements + # from the other vertices to SIM[:, NUM_VARS]. FVAL, CONMAT, and CVAL hold the function values, + # constraint values, and constraint violations on the vertices in the order corresponding to SIM. + evaluated, conmat, cval, sim, simi, fval, nf, subinfo = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, + xhist, fhist, chist, conhist, maxhist, srname="COBYLA") + + # Initialize the filter, including xfilt, ffilt, confiolt, cfilt, and nfilt. + # N.B.: The filter is used only when selecting which iterate to return. It does not + # interfere with the iterations. COBYLA is NOT a filter method but a trust-region method + # based on an L-inifinity merit function. Powell's implementation does not use a + # filter to select the iterate, possibly returning a suboptimal iterate. + cfilt = np.zeros(np.minimum(np.maximum(maxfilt, 1), maxfun)) + confilt = np.zeros((np.size(constr), np.size(cfilt))) + ffilt = np.zeros(np.size(cfilt)) + xfilt = np.zeros((np.size(x), np.size(cfilt))) + nfilt = initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt) + + # TODO: Initfilt and savefilt appear to be working. It did not get tested with a rising edge in the + # keep array (i.e. a False followed by a True), but it looks like that shouldn't be a problem. + + # Check whether to return due to abnormal cases that may occur during the initialization. + if subinfo != INFO_DEFAULT: + info = subinfo + # Return the best calculated values of the variables + # N.B: Selectx and findpole choose X by different standards, one cannot replace the other + kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], cweight, ctol) + x = xfilt[:, kopt] + f = ffilt[kopt] + constr = confilt[:, kopt] + cstrv = cfilt[kopt] + # Arrange chist, conhist, fhist, and xhist so that they are in chronological order. + # TODO: Implement me + # rangehist(nf, xhist, fhist, chist, conhist) + # print a return message according to IPRINT. + retmsg(solver, info, iprint, nf, f, x, cstrv, constr) + # Postconditions + if DEBUGGING: + assert nf <= maxfun + assert np.size(x) == num_vars and not any(np.isnan(x)) + assert not (np.isnan(f) or np.isposinf(f)) + # assert np.size(xhist, 0) == n and np.size(xhist, 1) == maxxhist + # assert not any(np.isnan(xhist(:, 1:min(nf, maxxhist)))) + # The last calculated X can be Inf (finite + finite can be Inf numerically). + # assert np.size(fhist) == maxfhist + # assert not any(np.isnan(fhist(1:min(nf, maxfhist))) or np.isposinf(fhist(1:min(nf, maxfhist)))) + # assert np.size(conhist, 0) == m and np.size(conhist, 1) == maxconhist + # assert not any(np.isnan(conhist(:, 1:min(nf, maxconhist))) or np.isneginf(conhist(:, 1:min(nf, maxconhist)))) + # assert np.size(chist) == maxchist + # assert not any(chist(1:min(nf, maxchist)) < 0 or np.isnan(chist(1:min(nf, maxchist))) or np.isposinf(chist(1:min(nf, maxchist)))) + # nhist = minval([nf, maxfhist, maxchist]) + # assert not any(isbetter(fhist(1:nhist), chist(1:nhist), f, cstrv, ctol)) + + + # Set some more initial values. + # We must initialize ACTREM and PREREM. Otherwise, when SHORTD = TRUE, compilers may raise a + # run-time error that they are undefined. But their values will not be used: when SHORTD = FALSE, + # they will be overwritten; when SHORTD = TRUE, the values are used only in BAD_TRSTEP, which is + # TRUE regardless of ACTREM or PREREM. Similar for PREREC, PREREF, PREREM, RATIO, and JDROP_TR. + # No need to initialize SHORTD unless MAXTR < 1, but some compilers may complain if we do not do it. + # Our initialization of CPEN differs from Powell's in two ways. First, we use the ratio defined in + # (13) of Powell's COBYLA paper to initialize CPEN. Second, we impose CPEN >= CPENMIN > 0. Powell's + # code simply initializes CPEN to 0. + rho = rhobeg + delta = rhobeg + cpen = np.maximum(cpenmin, np.minimum(1.0E3, fcratio(conmat, fval))) # Powell's code: CPEN = ZERO + prerec = -REALMAX + preref = -REALMAX + prerem = -REALMAX + actrem = -REALMAX # TODO: Is it necessary to define this and prerem here, per the above comment? + shortd = False + trfail = False + ratio = -1 + jdrop_tr = 0 + jdrop_geo = 0 + + # If DELTA <= GAMMA3*RHO after an update, we set DELTA to RHO. GAMMA3 must be less than GAMMA2. The + # reason is as follows. Imagine a very successful step with DNORM = the un-updated DELTA = RHO. + # The TRRAD will update DELTA to GAMMA2*RHO. If GAMMA3 >= GAMMA2, then DELTA will be reset to RHO, + # which is not reasonable as D is very successful. See paragraph two of Sec 5.2.5 in + # T. M. Ragonneau's thesis: "Model-Based Derivative-Free Optimization Methods and Software." + # According to test on 20230613, for COBYLA, this Powellful updating scheme of DELTA works slightly + # better than setting directly DELTA = max(NEW_DELTA, RHO). + gamma3 = np.maximum(1, np.minimum(0.75 * gamma2, 1.5)) + + # MAXTR is the maximal number of trust region iterations. Each trust-region iteration takes 1 or 2 + # function evaluations unless the trust-region step is short or the trust-region subproblem solver + # fails but the geometry step is not invoked. Thus the following MAXTR is unlikely to be reached + maxtr = np.maximum(maxfun, 2*maxfun) # max: precaution against overflow, which will make 2*MAXFUN < 0 + info = MAXTR_REACHED + + # Begin the iterative procedure + # After solving a trust-region subproblem, we use three boolean variables to control the workflow. + # SHORTD - Is the trust-region trial step too short to invoke a function evaluation? + # IMPROVE_GEO - Will we improve the model after the trust-region iteration? If yes, a geometry + # step will be taken, corresponding to the "Branch (Delta)" in the COBYLA paper. + # REDUCE_RHO - Will we reduce rho after the trust-region iteration? + # COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously. + for tr in range(maxtr): + # Increase the penalty parameter CPEN, if needed, so that PREREM = PREREF + CPEN * PREREC > 0. + # This is the first (out of two) update of CPEN, where CPEN increases or remains the same. + # N.B.: CPEN and the merit function PHI = FVAL + CPEN*CVAL are used in three places only. + # 1. In FINDPOLE/UPDATEPOLE, deciding the optimal vertex of the current simplex. + # 2. After the trust-region trial step, calculating the reduction ratio. + # 3. In GEOSTEP, deciding the direction of the geometry step. + # They do not appear explicitly in the trust-region subproblem, though the trust-region center + # (i.e. the current optimal vertex) is defined by them. + cpen = getcpen(conmat, cpen, cval, delta, fval, rho, sim, simi) + + # Switch the best vertex of the current simplex to SIM[:, NUM_VARS]. + conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi) + # Check whether to exit due to damaging rounding in UPDATEPOLE. + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or simply continue? + + # Does the interpolation set have acceptable geometry? It affects improve_geo and reduce_rho + adequate_geo = assess_geo(delta, factor_alpha, factor_beta, sim, simi) + + # Calculate the linear approximations to the objective and constraint functions, placing + # minus the objective function gradient after the constraint gradients in the array A. + # N.B.: TRSTLP accesses A mostly by columns, so it is more reasonable to save A instead of A.T + A = np.hstack(( + # A[:, :num_constraints] contains the approximate gradient for the constraints, + ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T, + # and A[:, num_constraints] is minus the approximate gradient for the objective function. + ((fval[num_vars] - fval[:num_vars])@simi).reshape(num_vars, 1) + )) + + + # Theoretically, but not numerically, the last entry of b does not affect the result of TRSTLP + # We set it to -fval[num_vars] following Powell's code. + b = np.hstack((-conmat[:, num_vars], -fval[num_vars])) + + # Calculate the trust-region trial step d. Note that d does NOT depend on cpen. + d = trstlp(A, b, delta) + dnorm = min(delta, np.linalg.norm(d)) + + # Is the trust-region trial step short? N.B.: we compare DNORM with RHO, not DELTA. + # Powell's code especially defines SHORTD by SHORTD = (DNORM < 0.5 * RHO). In our tests + # 1/10 seems to work better than 1/2 or 1/4, especially for linearly constrained + # problems. Note that LINCOA has a slightly more sophisticated way of defining SHORTD, + # taking into account whether D casues a change to the active set. Should we try the same here? + shortd = (dnorm < 0.1 * rho) + + # Predict the change to F (PREREF) and to the constraint violation (PREREC) due to D. + # We have the following in precise arithmetic. They may fail to hold due to rounding errors. + # 1. B[:NUM_CONSTRAINTS] = -CONMAT[:, NUM_VARS] and hence np.max(np.append(B[:NUM_CONSTRAINTS] - D@A[:, :NUM_CONSTRAINTS], 0)) is the + # L-infinity violation of the linearized constraints corresponding to D. When D=0, the + # violation is np.max(np.append(B[:NUM_CONSTRAINTS], 0)) = CVAL[NUM_VARS]. PREREC is the reduction of this + # violation achieved by D, which is nonnegative in theory; PREREC = 0 iff B[:NUM_CONSTRAINTS] <= 0, i.e. the + # trust-region center satisfies the linearized constraints. + # 2. PREREF may be negative or 0, but it is positive when PREREC = 0 and shortd is False + # 3. Due to 2, in theory, max(PREREC, PREREF) > 0 if shortd is False. + prerec = cval[num_vars] - np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) + preref = np.dot(d, A[:, num_constraints]) # Can be negative + + # Evaluate PREREM, which is the predicted reduction in the merit function. + # In theory, PREREM >= 0 and it is 0 iff CPEN = 0 = PREREF. This may not be true numerically. + prerem = preref + cpen * prerec + trfail = prerem < 1e-5 * min(cpen, 1) * rho**2 or np.isnan(prerem) # PREREM is tiny/negative or NaN. + + if shortd or trfail: + # Reduce DELTA if D is short or if D fails to render PREREM > 0. The latter can only happen due to + # rounding errors. This seems quite important for performance + delta *= 0.1 + if delta <= gamma3 * rho: + delta = rho # set delta to rho when it is close to or below + else: + x = sim[:, num_vars] + d + # Evaluate the objective and constraints at X, taking care of possible inf/nan values. + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + + # Print a message about the function/constraint evaluation accoring to iprint + fmsg(solver, 'Trust region', iprint, nf, delta, f, x, cstrv, constr) + # Save X, F, CONSTR, CSTRV into the history. + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) + # Save X, F, CONSTR, CSTRV into the filter. + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + # Evaluate ACTREM, which is the actual reduction in the merit function + actrem = (fval[num_vars] + cpen * cval[num_vars]) - (f + cpen * cstrv) + + # Calculate the reduction ratio by redrat, which hands inf/nan carefully + ratio = redrat(actrem, prerem, eta1) + + # Update DELTA. After this, DELTA < DNORM may hold. + # N.B.: + # 1. Powell's code uses RHO as the trust-region radius and updates it as follows: + # Reduce RHO to GAMMA1*RHO if ADEQUATE_GEO is TRUE and either SHORTD is TRUE or RATIO < ETA1, + # and then revise RHO to RHOEND if its new value is not more than GAMMA3*RHOEND; RHO remains + # unchanged in all other cases; in particular, RHO is never increased. + # 2. Our implementation uses DELTA as the trust-region radius, while using RHO as a lower + # bound for DELTA. DELTA is updated in a way that is typical for trust-region methods, and + # it is revised to RHO if its new value is not more than GAMMA3*RHO. RHO reflects the current + # resolution of the algorithm; its update is essentially the same as the update of RHO in + # Powell's code (see the definition of REDUCE_RHO below). Our implementation aligns with + # UOBYQA/NEWUOA/BOBYQA/LINCOA and improves the performance of COBYLA. + # 3. The same as Powell's code, we do not reduce RHO unless ADEQUATE_GEO is TRUE. This is + # also how Powell updated RHO in UOBYQA/NEWUOA/BOBYQA/LINCOA. What about we also use + # ADEQUATE_GEO == TRUE as a prerequisite for reducing DELTA? The argument would be that the + # bad (small) value of RATIO may be because of a bad geometry (and hence a bad model) rather + # than an improperly large DELTA, and it might be good to try improving the geometry first + # without reducing DELTA. However, according to a test on 20230206, it does not improve the + # performance if we skip the update of DELTA when ADEQUATE_GEO is FALSE and RATIO < 0.1. + # Therefore, we choose to update DELTA without checking ADEQUATE_GEO. + + delta = trrad(delta, dnorm, eta1, eta2, gamma1, gamma2, ratio) + if delta <= gamma3*rho: + delta = rho # Set delta to rho when it is close to or below. + + # Is the newly generated X better than the current best point? + ximproved = actrem > 0 # If ACTREM is NaN, then XIMPROVED should and will be False + + # Set JDROP_TR to the index of the vertex to be replaced with X. JDROP_TR = 0 means there + # is no good point to replace, and X will not be included into the simplex; in this case, + # the geometry of the simplex likely needs improvement, which will be handled below. + jdrop_tr = setdrop_tr(ximproved, d, delta, rho, sim, simi) + + # Update SIM, SIMI, FVAL, CONMAT, and CVAL so that SIM[:, JDROP_TR] is replaced with D. + # UPDATEXFC does nothing if JDROP_TR = 0, as the algorithm decides to discard X. + sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_tr, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi) + # Check whether to break due to dmaging rounding in UPDATEXFC + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or a RESCUE as in BOBYQA? + + # Check whether to break due to maxfun, ftarget, etc. + subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + # End of if SHORTD or TRFAIL. The normal trust-region calculation ends. + + # Before the next trust-region iteration, we possibly improve the geometry of the simplex or + # reduce RHO according to IMPROVE_GEO and REDUCE_RHO. Now we decide these indicators. + # N.B.: We must ensure that the algorithm does not set IMPROVE_GEO = True at infinitely many + # consecutive iterations without moving SIM[:, NUM_VARS] or reducing RHO. Otherwise, the algorithm + # will get stuck in repetitive invocations of GEOSTEP. This is ensured by the following facts: + # 1. If an iteration sets IMPROVE_GEO to True, it must also reduce DELTA or set DELTA to RHO. + # 2. If SIM[:, NUM_VARS] and RHO remain unchanged, then ADEQUATE_GEO will become True after at + # most NUM_VARS invocations of GEOSTEP. + + # BAD_TRSTEP: Is the last trust-region step bad? + bad_trstep = shortd or trfail or ratio <= 0 or jdrop_tr == None + # IMPROVE_GEO: Should we take a geometry step to improve the geometry of the interpolation set? + improve_geo = bad_trstep and not adequate_geo + # REDUCE_RHO: Should we enhance the resolution by reducing rho? + reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho + + # COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously. + # assert not (IMPROVE_GEO and REDUCE_RHO), 'IMPROVE_GEO or REDUCE_RHO are not both TRUE, COBYLA' + + # If SHORTD or TRFAIL is True, then either IMPROVE_GEO or REDUCE_RHO is True unless ADEQUATE_GEO + # is True and max(DELTA, DNORM) > RHO. + assert not (shortd or trfail) or (improve_geo or reduce_rho or (adequate_geo and max(delta, dnorm) > rho)), \ + 'If SHORTD or TRFAIL is TRUE, then either IMPROVE_GEO or REDUCE_RHO is TRUE unless ADEQUATE_GEO is TRUE and MAX(DELTA, DNORM) > RHO' + + # Comments on BAD_TRSTEP: + # 1. Powell's definition of BAD_TRSTEP is as follows. The one used above seems to work better, + # especially for linearly constrained problems due to the factor TENTH (= ETA1). + # !bad_trstep = (shortd .or. actrem <= 0 .or. actrem < TENTH * prerem .or. jdrop_tr == 0) + # Besides, Powell did not check PREREM > 0 in BAD_TRSTEP, which is reasonable to do but has + # little impact upon the performance. + # 2. NEWUOA/BOBYQA/LINCOA would define BAD_TRSTEP, IMPROVE_GEO, and REDUCE_RHO as follows. Two + # different thresholds are used in BAD_TRSTEP. It outperforms Powell's version. + # !bad_trstep = (shortd .or. trfail .or. ratio <= eta1 .or. jdrop_tr == 0) + # !improve_geo = bad_trstep .and. .not. adequate_geo + # !bad_trstep = (shortd .or. trfail .or. ratio <= 0 .or. jdrop_tr == 0) + # !reduce_rho = bad_trstep .and. adequate_geo .and. max(delta, dnorm) <= rho + # 3. Theoretically, JDROP_TR > 0 when ACTREM > 0 (guaranteed by RATIO > 0). However, in Powell's + # implementation, JDROP_TR may be 0 even RATIO > 0 due to NaN. The modernized code has rectified + # this in the function SETDROP_TR. After this rectification, we can indeed simplify the + # definition of BAD_TRSTEP by removing the condition JDROP_TR == 0. We retain it for robustness. + + # Comments on REDUCE_RHO: + # When SHORTD is TRUE, UOBYQA/NEWUOA/BOBYQA/LINCOA all set REDUCE_RHO to TRUE if the recent + # models are sufficiently accurate according to certain criteria. See the paragraph around (37) + # in the UOBYQA paper and the discussions about Box 14 in the NEWUOA paper. This strategy is + # crucial for the performance of the solvers. However, as of 20221111, we have not managed to + # make it work in COBYLA. As in NEWUOA, we recorded the errors of the recent models, and set + # REDUCE_RHO to true if they are small (e.g., ALL(ABS(MODERR_REC) <= 0.1 * MAXVAL(ABS(A))*RHO) or + # ALL(ABS(MODERR_REC) <= RHO**2)) when SHORTD is TRUE. It made little impact on the performance. + + + # Since COBYLA never sets IMPROVE_GEO and REDUCE_RHO to TRUE simultaneously, the following + # two blocks are exchangeable: IF (IMPROVE_GEO) ... END IF and IF (REDUCE_RHO) ... END IF. + + # Improve the geometry of the simplex by removing a point and adding a new one. + # If the current interpolation set has acceptable geometry, then we skip the geometry step. + # The code has a small difference from Powell's original code here: If the current geometry + # is acceptable, then we will continue with a new trust-region iteration; however, at the + # beginning of the iteration, CPEN may be updated, which may alter the pole point SIM(:, N+1) + # by UPDATEPOLE; the quality of the interpolation point depends on SIM(:, N + 1), meaning + # that the same interpolation set may have good or bad geometry with respect to different + # "poles"; if the geometry turns out bad with the new pole, the original COBYLA code will + # take a geometry step, but our code here will NOT do it but continue to take a trust-region + # step. The argument is this: even if the geometry step is not skipped in the first place, the + # geometry may turn out bad again after the pole is altered due to an update to CPEN; should + # we take another geometry step in that case? If no, why should we do it here? Indeed, this + # distinction makes no practical difference for CUTEst problems with at most 100 variables + # and 5000 constraints, while the algorithm framework is simplified. + if improve_geo and not assess_geo(delta, factor_alpha, factor_beta, sim, simi): + # Before the geometry step, updatepole has been called either implicitly by UPDATEXFC or + # explicitly after CPEN is updated, so that SIM[:, :NUM_VARS] is the optimal vertex. + + # Decide a vertex to drop from the simplex. It will be replaced with SIM[:, NUM_VARS] + D to + # improve acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. + # N.B.: COBYLA never sets JDROP_GEO = num_vars. + jdrop_geo = setdrop_geo(delta, factor_alpha, factor_beta, sim, simi) + + # The following JDROP_GEO comes from UOBYQA/NEWUOA/BOBYQA/LINCOA. It performs poorly! + # jdrop_geo = np.argmax(np.sum(sim[:, :num_vars]**2, axis=0), axis=0) + + # JDROP_GEO is between 0 and NUM_VARS unless SIM and SIMI contain NaN, which should not happen + # at this point unless there is a bug. Nevertheless, for robustness, we include the + # following instruction to break when JDROP_GEO == None (if JDROP_GEO does become None, then a + # TypeError will occur if we continue, as JDROP_GEO will be used as an index of arrays.) + if jdrop_geo is None: + info = DAMAGING_ROUNDING + break + + # Calculate the geometry step D. + # In NEWUOA, GEOSTEP takes DELBAR = max(min(0.1 * sqrt(max(DISTSQ)), 0.% * DELTA), RHO) + # rather than DELTA. This should not be done here, because D should improve the geometry of + # the simplex when SIM[:, JDROP] is replaced with D; the quality of the geometry is defined + # by DELTA instead of DELBAR as in (14) of the COBYLA paper. See GEOSTEP for more detail. + d = geostep(jdrop_geo, cpen, conmat, cval, delta, fval, factor_gamma, simi) + + x = sim[:, num_vars] + d + # Evaluate the objective and constraints at X, taking care of possible inf/nan values. + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + + # Print a message about the function/constraint evaluation accoring to iprint + fmsg(solver, 'Geometry', iprint, nf, delta, f, x, cstrv, constr) + # Save X, F, CONSTR, CSTRV into the history. + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) + # Save X, F, CONSTR, CSTRV into the filter. + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + subinfo = updatexfc(jdrop_geo, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi) + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break + + # Check whether to break due to maxfun, ftarget, etc. + subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + # end of if improve_geo. The procedure of improving the geometry ends. + + # The calculations with the current RHO are complete. Enhance the resolution of the algorithm + # by reducing RHO; update DELTA and CPEN at the same time. + if reduce_rho: + if rho <= rhoend: + info = SMALL_TR_RADIUS + break + delta = max(0.5 * rho, redrho(rho, rhoend)) + rho = redrho(rho, rhoend) + # THe second (out of two) updates of CPEN, where CPEN decreases or remains the same. + # Powell's code: cpen = min(cpen, fcratio(fval, conmat)), which may set CPEN to 0. + cpen = np.maximum(cpenmin, np.minimum(cpen, fcratio(conmat, fval))) + # Print a message about the reduction of rho according to iprint + rhomsg(solver, iprint, nf, fval[num_vars], rho, sim[:, num_vars], cval[num_vars], conmat[:, num_vars], cpen) + conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi) + # Check whether to break due to damaging rounding detected in updatepole + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or simply continue? + # End of if reduce_rho. The procedure of reducing RHO ends. + # End of for loop. The iterative procedure ends + + # Return from the calculation, after trying the last trust-region step if it has not been tried yet. + if info == SMALL_TR_RADIUS and shortd and nf < maxfun: + # Zaikun 20230615: UPDATEXFC or UPDATEPOLE is not called since the last trust-region step. Hence + # SIM[:, NUM_VARS] remains unchanged. Otherwise SIM[:, NUM_VARS] + D would not make sense. + x = sim[:, num_vars] + d + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + fmsg(solver, 'Trust region', iprint, nf, rho, f, x, cstrv, constr) + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + # Return the best calculated values of the variables + # N.B.: SELECTX and FINDPOLE choose X by different standards, one cannot replace the other. + kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], max(cpen, cweight), ctol) + x = xfilt[:, kopt] + f = ffilt[kopt] + constr = confilt[:, kopt] + cstrv = cfilt[kopt] + + # Arrange CHIST, CONHIST, FHIST, and XHIST so that they are in the chronological order. + # TODO: Implement me + # call rangehist(nf, xhist, fhist, chist, conhist) + + # Print a return message according to IPRINT. + retmsg(solver, info, iprint, nf, f, x, cstrv, constr) + return x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info + + + +def getcpen(conmat, cpen, cval, delta, fval, rho, sim, simi): + ''' + This function gets the penalty parameter CPEN so that PREREM = PREREF + CPEN * PREREC > 0. + See the discussions around equation (9) of the COBYLA paper. + ''' + + # Intermediate variables + A = np.zeros((np.size(sim, 0), np.size(conmat, 0) + 1)) + itol = 1 + + # Sizes + num_constraints = np.size(conmat, 0) + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert cpen > 0 + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + assert delta >= rho and rho > 0 + + #====================# + # Calculation starts # + #====================# + + # Initialize INFO, PREREF, and PREREC, which are needed in the postconditions + info = INFO_DEFAULT + preref = 0 + prerec = 0 + + # Increase CPEN if neccessary to ensure PREREM > 0. Branch back for the next loop if this change + # alters the optimal vertex of the current simplex. Note the following: + # 1. In each loop, CPEN is changed only if PREREC > 0 > PREREF, in which case PREREM is guaranteed + # positive after the update. Note that PREREC >= 0 and max(PREREC, PREREF) > 0 in theory. If this + # holds numerically as well then CPEN is not changed only if PREREC = 0 or PREREF >= 0, in which + # case PREREM is currently positive, explaining why CPEN needs no update. + # 2. Even without an upper bound for the loop counter, the loop can occur at most NUM_VARS+1 times. This + # is because the update of CPEN does not decrease CPEN, and hence it can make vertex J (J <= NUM_VARS) become + # the new optimal vertex only if CVAL[J] is less than CVAL[NUM_VARS], which can happen at most NUM_VARS times. + # See the paragraph below (9) in the COBYLA paper. After the "correct" optimal vertex is found, + # one more loop is needed to calculate CPEN, and hence the loop can occur at most NUM_VARS+1 times. + for iter in range(num_vars + 1): + # Switch the best vertex of the current simplex to SIM[:, NUM_VARS] + conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim, simi) + # Check whether to exit due to damaging rounding in UPDATEPOLE + if info == DAMAGING_ROUNDING: + break + + # Calculate the linear approximations to the objective and constraint functions, placing minus + # the objective function gradient after the constraint gradients in the array A. + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + b = np.hstack((-conmat[:, num_vars], -fval[num_vars])) + + # Calculate the trust-region trial step D. Note that D does NOT depend on CPEN. + d = trstlp(A, b, delta) + + # Predict the change to F (PREREF) and to the constraint violation (PREREC) due to D. + prerec = cval[num_vars] - np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) + preref = np.dot(d, A[:, num_constraints]) # Can be negative + + if not (prerec > 0 and preref < 0): # PREREC <= 0 or PREREF >=0 or either is NaN + break + + # Powell's code defines BARMU = -PREREF / PREREC, and CPEN is increased to 2*BARMU if and + # only if it is currently less than 1.5*BARMU, a very "Powellful" scheme. In our + # implementation, however, we seet CPEN directly to the maximum between its current value and + # 2*BARMU while handling possible overflow. The simplifies the scheme without worsening the + # performance of COBYLA. + cpen = max(cpen, min(-2 * preref / prerec, REALMAX)) + + if findpole(cpen, cval, fval) == num_vars: + break + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert cpen >= cpen and cpen > 0 + assert preref + cpen * prerec > 0 or info == DAMAGING_ROUNDING or \ + not (prerec >= 0 and np.maximum(prerec, preref) > 0) or not np.isfinite(preref) + + return cpen + + +def fcratio(conmat, fval): + ''' + This function calculates the ratio between the "typical changre" of F and that of CONSTR. + See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio. + ''' + + # Preconditions + if DEBUGGING: + assert np.size(fval) >= 1 + assert np.size(conmat, 1) == np.size(fval) + + #====================# + # Calculation starts # + #====================# + + cmin = np.min(conmat, axis=1) + cmax = np.max(conmat, axis=1) + fmin = min(fval) + fmax = max(fval) + if any(cmin < 0.5 * cmax) and fmin < fmax: + denom = np.min(np.maximum(cmax, 0) - cmin, where=cmin < 0.5 * cmax, initial=np.inf) + # Powell mentioned the following alternative in section 4 of his COBYLA paper. According to a test + # on 20230610, it does not make much difference to the performance. + # denom = np.max(max(*cmax, 0) - cmin, mask=(cmin < 0.5 * cmax)) + r = (fmax - fmin) / denom + else: + r = 0 + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert r >= 0 + + return r \ No newline at end of file diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py new file mode 100644 index 0000000000..03c80d248a --- /dev/null +++ b/python/src/prima/cobyla/geometry.py @@ -0,0 +1,316 @@ +from prima.common.consts import DEBUGGING +from prima.common.linalg import isinv +import numpy as np + + +def assess_geo(delta, factor_alpha, factor_beta, sim, simi): + ''' + This function checks if an interpolation set has acceptable geometry as (14) of the COBYLA paper + ''' + + # Local variables + itol = 0.1 + + # Sizes + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + assert delta > 0 + assert factor_alpha > 0 and factor_alpha < 1 + assert factor_beta > 1 + + #====================# + # Calculation starts # + #====================# + + # Calculate the values of sigma and eta + # veta[j] (0 <= j < num_vars) is the distance between the vertices j and 0 (the best vertex) + # of the simplex. + # vsig[j] (0 <= j < num_vars) is the distance from vertex j to its opposite face of the simplex. + # Thus vsig <= veta. + # N.B.: What about the distance from vertex N+1 to its opposite face? Consider the simplex + # {V_{N+1}, V_{N+1} + L*e_1,... v_{N+1} + L*e_N}, where V_{N+1} is vertex N+1, + # namely the current "best" point, [e_1, ..., e_n] is an orthogonal matrix, and L is a + # constant in the order of delta. This simplex is optimal in the sense that the interpolation + # system has the minimal condition number, i.e. 1. For this simplex, the distance from + # V_{N+1} to its opposite face is L/sqrt(n). + vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) + veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) + adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta) + + #==================# + # Calculation ends # + #==================# + + return adequate_geo + + +def setdrop_tr(ximproved, d, delta, rho, sim, simi): + ''' + This function finds (the index) of a current interpolation point to be replaced with the + trust-region trial point. See (19)-(22) of the COBYLA paper. + N.B.: + 1. If XIMPROVED == True, then JDROP >= 0 so that D is included into XPT. Otherwise, it is a bug. + 2. COBYLA never sets JDROP = NUM_VARS + TODO: Check whether it improves the performance if JDROP = NUM_VARS is allowed when XIMPROVED is True. + Note that UPDATEXFC should be revised accordingly. + ''' + + # Local variables + itol = 0.1 + + # Sizes + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert np.size(d) == num_vars and all(np.isfinite(d)) + assert delta >= rho and rho > 0 + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + + #====================# + # Calculation starts # + #====================# + + # -------------------------------------------------------------------------------------------------- # + # The following code is Powell's scheme for defining JDROP. + # -------------------------------------------------------------------------------------------------- # + # ! JDROP = 0 by default. It cannot be removed, as JDROP may not be set below in some cases (e.g., + # ! when XIMPROVED == FALSE, MAXVAL(ABS(SIMID)) <= 1, and MAXVAL(VETA) <= EDGMAX). + # jdrop = 0 + # + # ! SIMID(J) is the value of the J-th Lagrange function at D. It is the counterpart of VLAG in UOBYQA + # ! and DEN in NEWUOA/BOBYQA/LINCOA, but it excludes the value of the (N+1)-th Lagrange function. + # simid = matprod(simi, d) + # if (any(abs(simid) > 1) .or. (ximproved .and. any(.not. is_nan(simid)))) then + # jdrop = int(maxloc(abs(simid), mask=(.not. is_nan(simid)), dim=1), kind(jdrop)) + # !!MATLAB: [~, jdrop] = max(simid, [], 'omitnan'); + # end if + # + # ! VETA(J) is the distance from the J-th vertex of the simplex to the best vertex, taking the trial + # ! point SIM(:, N+1) + D into account. + # if (ximproved) then + # veta = sqrt(sum((sim(:, 1:n) - spread(d, dim=2, ncopies=n))**2, dim=1)) + # !!MATLAB: veta = sqrt(sum((sim(:, 1:n) - d).^2)); % d should be a column! Implicit expansion + # else + # veta = sqrt(sum(sim(:, 1:n)**2, dim=1)) + # end if + # + # ! VSIG(J) (J=1, .., N) is the Euclidean distance from vertex J to the opposite face of the simplex. + # vsig = ONE / sqrt(sum(simi**2, dim=2)) + # sigbar = abs(simid) * vsig + # + # ! The following JDROP will overwrite the previous one if its premise holds. + # mask = (veta > factor_delta * delta .and. (sigbar >= factor_alpha * delta .or. sigbar >= vsig)) + # if (any(mask)) then + # jdrop = int(maxloc(veta, mask=mask, dim=1), kind(jdrop)) + # !!MATLAB: etamax = max(veta(mask)); jdrop = find(mask & ~(veta < etamax), 1, 'first'); + # end if + # + # ! Powell's code does not include the following instructions. With Powell's code, if SIMID consists + # ! of only NaN, then JDROP can be 0 even when XIMPROVED == TRUE (i.e., D reduces the merit function). + # ! With the following code, JDROP cannot be 0 when XIMPROVED == TRUE, unless VETA is all NaN, which + # ! should not happen if X0 does not contain NaN, the trust-region/geometry steps never contain NaN, + # ! and we exit once encountering an iterate containing Inf (due to overflow). + # if (ximproved .and. jdrop <= 0) then ! Write JDROP <= 0 instead of JDROP == 0 for robustness. + # jdrop = int(maxloc(veta, mask=(.not. is_nan(veta)), dim=1), kind(jdrop)) + # !!MATLAB: [~, jdrop] = max(veta, [], 'omitnan'); + # end if + # -------------------------------------------------------------------------------------------------- # + # Powell's scheme ends here. + # -------------------------------------------------------------------------------------------------- # + + # The following definition of JDROP is inspired by SETDROP_TR in UOBYQA/NEWUOA/BOBYQA/LINCOA. + # It is simpler and works better than Powell's scheme. Note that we allow JDROP to be NUM_VARS+1 if + # XIMPROVED is True, whereas Powell's code does not. + # See also (4.1) of Scheinberg-Toint-2010: Self-Correcting Geometry in Model-Based Algorithms for + # Derivative-Free Unconstrained Optimization, which refers to the strategy here as the "combined + # distance/poisedness criteria". + + # DISTSQ[j] is the square of the distance from the jth vertex of the simplex to get "best" point so + # far, taking the trial point SIM[:, NUM_VARS] + D into account. + distsq = np.zeros(np.size(sim, 1)) + if ximproved: + distsq[:num_vars] = np.sum((sim[:, :num_vars] - np.tile(d, (num_vars, 1)).T)**2, axis=0) + distsq[num_vars] = np.sum(d**2) + else: + distsq[:num_vars] = np.sum(sim[:, :num_vars]**2, axis=0) + distsq[num_vars] = 0 + + weight = np.maximum(1, distsq / np.maximum(rho, delta/10)**2) # Similar to Powell's NEWUOA code. + + # Other possible definitions of weight. They work almost the same as the one above. + # weight = distsq # Similar to Powell's LINCOA code, but WRONG. See comments in LINCOA/geometry.f90. + # weight = max(1, max(25 * distsq / delta**2)) # Similar to Powell's BOBYQA code, works well. + # weight = max(1, max(10 * distsq / delta**2)) + # weight = max(1, max(1e2 * distsq / delta**2)) + # weight = max(1, max(distsq / rho**2)) ! Similar to Powell's UOBYQA + + # If 0 <= j < NUM_VARS, SIMID[j] is the value of the jth Lagrange function at D; the value of the + # (NUM_VARS+1)th Lagrange function is 1 - sum(SIMID). [SIMID, 1 - sum(SIMID)] is the counterpart of + # VLAG in UOBYQA and DEN in NEWUOA/BOBYQA/LINCOA. + simid = simi@d + score = weight * abs(np.array([*simid, 1 - np.sum(simid)])) + + # If XIMPORVED = False (D does not render a better X), set SCORE[NUM_VARS] = -1 to avoid JDROP = NUM_VARS. + if not ximproved: + score[num_vars] = -1 + + # The following if statement works a bit better than `if any(score > 1) or (any(score > 0) and ximproved)` + # from Powell's UOBYQA and NEWUOA code. + if any(score > 0): # Powell's BOBYQA and LINCOA code. + jdrop = np.where(score == max(score[~np.isnan(score)]))[0][0] + elif ximproved: + jdrop = np.argmax(distsq) + else: + jdrop = None # We arrive here when XIMPROVED = False and no entry of score is positive. + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert jdrop is None or (jdrop >= 0 and jdrop < num_vars + 1) + assert jdrop <= num_vars or ximproved + assert jdrop >= 0 or not ximproved + # JDROP >= 1 when XIMPROVED = TRUE unless NaN occurs in DISTSQ, which should not happen if the + # starting point does not contain NaN and the trust-region/geometry steps never contain NaN. + + return jdrop + + +def setdrop_geo(delta, factor_alpha, factor_beta, sim, simi): + ''' + This function finds (the index) of a current interpolation point to be replaced with a + geometry-improving point. See (15)-(16) of the COBYLA paper. + N.B.: COBYLA never sets jdrop to NUM_VARS + ''' + + # Local variables + itol = 0.1 + + # Sizes + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + assert factor_alpha > 0 and factor_alpha < 1 + assert factor_beta > 1 + assert not assess_geo(delta, factor_alpha, factor_beta, sim, simi) + + #====================# + # Calculation starts # + #====================# + + # Calculate the values of sigma and eta + # VSIG[j] for j = 0...NUM_VARS-1 is the Euclidean distance from vertex J to the opposite face of the simplex. + vsig = 1 / np.sqrt(np.sum(simi**2, axis=1)) + veta = np.sqrt(np.sum(sim[:, :num_vars]**2, axis=0)) + + # Decide which vertex to drop from the simplex. It will be replaced with a new point to improve the + # acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. + if any(veta > factor_beta * delta): + jdrop = np.where(veta == max(veta[~np.isnan(veta)]))[0][0] + elif any(vsig < factor_alpha * delta): + jdrop = np.where(vsig == min(vsig[~np.isnan(vsig)]))[0][0] + else: + # We arrive here if vsig and veta are all nan, which can happen due to nan in sim and simi + # which should not happen unless there is a bug + jdrop = None + + # Zaikun 230202: What if we consider veta and vsig together? The following attempts do not work well. + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)*np.sum(simi**2, axis=1)) # Condition number + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)**2 * np.sum(simi**2, axis=1)) # Condition number times distance + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert jdrop >= 0 and jdrop < num_vars + return jdrop + + +def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi): + ''' + This function calculates a geometry step so that the geometry of the interpolation set is improved + when SIM[: JDROP_GEO] is replaced with SIM[:NUM_VARS] + D. See (15)--(17) of the COBYLA paper. + ''' + + # Sizes + num_constraints = np.size(conmat, 0) + num_vars = np.size(simi, 0) + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert delta > 0 + assert cpen > 0 + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert jdrop >= 0 and jdrop < num_vars + assert factor_gamma > 0 and factor_gamma < 1 + + #====================# + # Calculation starts # + #====================# + + # SIMI[JDROP, :] is a vector perpendicular to the face of the simplex to the opposite of vertex + # JDROP. Thus VSIGJ * SIMI[JDROP, :] is the unit vector in this direction + vsigj = 1 / np.sqrt(np.sum(simi[jdrop, :]**2)) + + # Set D to the vector in the above-mentioned direction and with length FACTOR_GAMMA * DELTA. Since + # FACTOR_ALPHA < FACTOR_GAMMA < FACTOR_BETA, D improves the geometry of the simplex as per (14) of + # the COBYLA paper. This also explains why this subroutine does not replace DELTA with + # DELBAR = max(min(0.1 * np.sqrt(max(DISTSQ)), 0.5 * DELTA), RHO) as in NEWUOA. + d = factor_gamma * delta * (vsigj * simi[jdrop, :]) + + # Calculate the coefficients of the linear approximations to the objective and constraint functions, + # placing minus the objective function gradient after the constraint gradients in the array A + A = np.zeros((num_vars, num_constraints + 1)) + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + cvmaxp = np.max(np.append(0, -d@A[:, :num_constraints] - conmat[:, num_vars])) + cvmaxn = np.max(np.append(0, d@A[:, :num_constraints] - conmat[:, num_vars])) + if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn): + d *= -1 + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert np.size(d) == num_vars and all(np.isfinite(d)) + # In theory, ||S|| == FACTOR_GAMMA*DELTA, which may be false due to rounding, but not too far. + # It is crucial to ensure that the geometry step is nonzero, which holds in theory. + assert np.linalg.norm(d) > 0.9 * factor_gamma * delta and np.linalg.norm(d) <= 1.1 * factor_gamma * delta + return d \ No newline at end of file diff --git a/python/src/prima/cobyla/initialize.py b/python/src/prima/cobyla/initialize.py new file mode 100644 index 0000000000..75b7646381 --- /dev/null +++ b/python/src/prima/cobyla/initialize.py @@ -0,0 +1,196 @@ +from prima.common.checkbreak import checkbreak_con +from prima.common.consts import DEBUGGING, REALMAX +from prima.common.infos import INFO_DEFAULT +from prima.common.evaluate import evaluate, moderatec, moderatef +from prima.common.history import savehist +from prima.common.message import fmsg +from prima.common.selectx import savefilt + +import numpy as np + +def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, xhist, fhist, chist, conhist, maxhist, srname): + ''' + This subroutine does the initialization concerning X, function values, and constraints. + ''' + + # Local variables + solver = 'COBYLA' + + # Sizes + num_constraints = np.size(constr0) + num_vars = np.size(x0) + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0, f'M >= 0 {srname}' + assert num_vars >= 1, f'N >= 1 {srname}' + assert abs(iprint) <= 3, f'IPRINT is 0, 1, -1, 2, -2, 3, or -3 {srname}' + # assert conmat.shape == (num_constraints , num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + # assert cval.size == num_vars + 1, f'CVAL.size == N+1 {srname}' + # assert maxchist * (maxchist - maxhist) == 0, f'CHIST.shape == 0 or MAXHIST {srname}' + # assert conhist.shape[0] == num_constraints and maxconhist * (maxconhist - maxhist) == 0, 'CONHIST.shape[0] == num_constraints, SIZE(CONHIST, 2) == 0 or MAXHIST {srname)}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + # assert xhist.shape[0] == num_vars and maxxhist * (maxxhist - maxhist) == 0, 'XHIST.shape[0] == N, SIZE(XHIST, 2) == 0 or MAXHIST {srname)}' + assert all(np.isfinite(x0)), f'X0 is finite {srname}' + assert rhobeg > 0, f'RHOBEG > 0 {srname}' + + #====================# + # Calculation starts # + #====================# + + # Initialize info to the default value. At return, a value different from this value will + # indicate an abnormal return + info = INFO_DEFAULT + + # Initialize the simplex. It will be revised during the initialization. + sim = np.eye(num_vars, num_vars+1) * rhobeg + sim[:, num_vars] = x0 + + # evaluated[j] = True iff the function/constraint of SIM[:, j] has been evaluated. + evaluated = np.zeros(num_vars+1, dtype=bool) + + # Initialize fval + fval = np.zeros(num_vars+1) + REALMAX + conmat = np.zeros((num_constraints, num_vars+1)) - REALMAX + cval = np.zeros(num_vars+1) + REALMAX + + + for k in range(num_vars + 1): + x = sim[:, num_vars].copy() + # We will evaluate F corresponding to SIM(:, J). + if k == 0: + j = num_vars + f = moderatef(f0) + constr = moderatec(constr0) + cstrv = np.max(np.append(-constr, 0)) + else: + j = k - 1 + x[j] += rhobeg + f, constr, cstrv = evaluate(calcfc, x) + + # Print a message about the function/constraint evaluation according to IPRINT. + fmsg(solver, 'Initialization', iprint, k, rhobeg, f, x, cstrv, constr) + + # Save X, F, CONSTR, CSTRV into the history. + savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist) + + # Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively. + evaluated[j] = True + fval[j] = f + conmat[:, j] = constr + cval[j] = cstrv + + # Check whether to exit. + subinfo = checkbreak_con(maxfun, k, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + + # Exchange the new vertex of the initial simplex with the optimal vertex if necessary. + # This is the ONLY part that is essentially non-parallel. + if j < num_vars and fval[j] < fval[num_vars]: + fval[j], fval[num_vars] = fval[num_vars], fval[j] + cval[j], cval[num_vars] = cval[num_vars], cval[j] + conmat[:, [j, num_vars]] = conmat[:, [num_vars, j]] + sim[:, num_vars] = x + sim[j, :j+1] = -rhobeg # SIM[:, :j+1] is lower triangular + + nf = np.count_nonzero(evaluated) + + if evaluated.all(): + # Initialize SIMI to the inverse of SIM[:, :num_vars] + simi = np.linalg.inv(sim[:, :num_vars]) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert nf <= maxfun, f'NF <= MAXFUN {srname}' + assert evaluated.size == num_vars + 1, f'EVALUATED.size == Num_vars + 1 {srname}' + # assert chist.size == maxchist, f'CHIST.size == MAXCHIST {srname}' + # assert conhist.shape== (num_constraints, maxconhist), f'CONHIST.shape == [M, MAXCONHIST] {srname}' + assert conmat.shape == (num_constraints, num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}' + assert not (np.isnan(conmat).any() or np.isneginf(conmat).any()), f'CONMAT does not contain NaN/-Inf {srname}' + assert cval.size == num_vars + 1 and not (any(cval < 0) or any(np.isnan(cval)) or any(np.isposinf(cval))), f'CVAL.shape == Num_vars+1 and CVAL does not contain negative values or NaN/+Inf {srname}' + # assert fhist.shape == maxfhist, f'FHIST.shape == MAXFHIST {srname}' + # assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}' + assert fval.size == num_vars + 1 and not (any(np.isnan(fval)) or any(np.isposinf(fval))), f'FVAL.shape == Num_vars+1 and FVAL is not NaN/+Inf {srname}' + # assert xhist.shape == (num_vars, maxxhist), f'XHIST.shape == [N, MAXXHIST] {srname}' + assert sim.shape == (num_vars, num_vars + 1), f'SIM.shape == [N, N+1] {srname}' + assert np.isfinite(sim).all(), f'SIM is finite {srname}' + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0), f'SIM(:, 1:N) has no zero column {srname}' + assert simi.shape == (num_vars, num_vars), f'SIMI.shape == [N, N] {srname}' + assert np.isfinite(simi).all(), f'SIMI is finite {srname}' + assert np.allclose(sim[:, :num_vars] @ simi, np.eye(num_vars), rtol=0.1, atol=0.1) or not all(evaluated), f'SIMI = SIM(:, 1:N)^{-1} {srname}' + + return evaluated, conmat, cval, sim, simi, fval, nf, info + + +def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt): + ''' + This function initializes the filter (XFILT, etc) that will be used when selecting + x at the end of the solver. + N.B.: + 1. Why not initialize the filters using XHIST, etc? Because the history is empty if + the user chooses not to output it. + 2. We decouple INITXFC and INITFILT so that it is easier to parallelize the former + if needed. + ''' + + # Sizes + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + maxfilt = len(ffilt) + + # Precondictions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert maxfilt >= 1 + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert np.size(cfilt) == maxfilt + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert np.size(ffilt) == maxfilt + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(evaluated) == num_vars + 1 + + #====================# + # Calculation starts # + #====================# + + + nfilt = 0 + for i in range(num_vars+1): + if evaluated[i]: + if i < num_vars: + x = sim[:, i] + sim[:, num_vars] + else: + x = sim[:, i] # i == num_vars, i.e. the last column + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert nfilt <= maxfilt + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any() + assert np.size(cfilt) == maxfilt + assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt])) + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert not (np.isnan(xfilt[:, :nfilt])).any() + # The last calculated X can be Inf (finite + finite can be Inf numerically). + assert np.size(ffilt) == maxfilt + assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt])) + + return nfilt \ No newline at end of file diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py new file mode 100644 index 0000000000..86bed4331a --- /dev/null +++ b/python/src/prima/cobyla/trustregion.py @@ -0,0 +1,502 @@ +''' +This module provides subrouties concerning the trust-region calculations of COBYLA. + +Coded by Zaikun ZHANG (www.zhangzk.net) based on Powell's code and the COBYLA paper. + +Dedicated to late Professor M. J. D. Powell FRS (1936--2015). + +Python implementation by Nickolai Belakovski +''' + +import numpy as np +import numpy.typing as npt +from prima.common.consts import DEBUGGING, REALMIN, REALMAX, EPS +from prima.common.powalg import qradd_Rdiag, qrexc_Rdiag +from prima.common.linalg import isminor + + +def trstlp(A_in, b_in, delta): + ''' + This function calculated an n-component vector d by the following two stages. In the first + stage, d is set to the shortest vector that minimizes the greatest violation of the constraints + np.dot(A[:n, k], d) >= b(k), k = 0, 1, 2, ..., m-1 + subject to the Euclidean length of d being at most delta. If its length is strictly less than + delta, then the second stage uses the resultant freedom in d to minimize the objective function + np.dot(-A[:n, m], d) + subject to no increase in any greatest constraint violation. This notation allows the gradient of + the objective function to be regarded as the gradient of a constraint. Therefore the two stages + are distinguished by mcon == m and mcon > m respectively. + + It is possible but rare that a degeneracy may prevent d from attaining the target length delta. + + cviol is the largest constraint violation of the current d: np.maximum(np.append(b[:m] - A[:,:m].T@d, 0)). + icon is the index of a most violated constraint if cviol is positive. + + nact is the number of constraints in the active set and iact[0], ..., iact[nact-1] are their indices, + while the remainder of the iact contains a permutation of the remaining constraint indicies. + N.B.: nact <= min(num_constraints, num_vars). Obviously nact <= num_constraints. In addition, the constraints + in iact[0, ..., nact-1] have linearly independent gradients (see the comments above the instruction + that delete a constraint from the active set to make room for the new active constraint with index iact[icon]); + it can also be seen from the update of nact: starting from 0, nact is incremented only if nact < n. + + Further, Z is an orthogonal matrix whose first nact columns can be regarded as the result of + Gram-Schmidt applied to the active constraint gradients. For j = 0, 1, ..., nact-1, the number + zdota[j] is the scalar product of the jth column of Z with the gradient of the jth active + constraint. d is the current vector of variables and here the residuals of the active constraints + should be zero. Further, the active constraints have nonnegative Lagrange multipliers that are + held at the beginning of vmultc. The remaineder of this vector holds the residuals of the inactive + constraints at d, the ordering of the componenets of vmultc being in agreement with the permutation + of the indices of the constraints that is in iact. All these residuals are nonnegative, which is + achieved by the shift cviol that makes the least residual zero. + + N.B.: + 1. The algorithm was NOT documented in the COBYLA paper. A note should be written to introduce it! + 2. As a major part of the algorithm (see trstlp_sub), the code maintains and updates the QR + factorization of A[iact[:nact]], i.e. the gradients of all the active (linear) constraints. The + matrix Z is indeed Q, and the vector zdota is the diagonal of R. The factorization is updated by + Givens rotations when an index is added in or removed from iact. + 3. There are probably better algorithms available for this trust-region linear programming problem. + ''' + + A = A_in.copy() + b = b_in.copy() + + # Sizes + num_constraints = A.shape[1] - 1 + + # Preconditions + assert num_constraints >= 0 + assert A.shape[0] >= 1 and A.shape[1] >= 1 + assert b.shape[0] == A.shape[1] + assert delta > 0 + + + vmultc = np.zeros(len(b)) + iact = np.zeros(len(b), dtype=int) + nact = 0 + d = np.zeros(A_in.shape[0]) + z = np.zeros((len(d), len(d))) + + + + # Scale the problem if A contains large values. Otherwise floating point exceptions may occur. + # Note that the trust-region step is scale invariant. + for i in range(num_constraints+1): # Note that A.shape[1] == len(b) == num_constraints+1 != num_constraints + if maxval:=max(abs(A_in[:, i]) > 1e12): + modscal = max(2*REALMIN, 1/maxval) + A[:, i] *= modscal + b[i] *= modscal + + # Stage 1: minimize the 1+infinity constraint violation of the linnearized constraints. + iact[:num_constraints], nact, d, vmultc[:num_constraints], z = trstlp_sub(iact[:num_constraints], nact, 1, A[:, :num_constraints], b[:num_constraints], delta, d, vmultc[:num_constraints], z) + + # Stage 2: minimize the linearized objective without increasing the 1_infinity constraint violation. + iact, nact, d, vmultc, z = trstlp_sub(iact, nact, 2, A, b, delta, d, vmultc, z) + + # ================ + # Calculation ends + # ================ + + # Postconditions + assert d.shape[0] == A.shape[0] + assert all(np.isfinite(d)) + # Due to rounding, it may happen that ||D|| > DELTA, but ||D|| > 2*DELTA is highly improbable. + assert np.linalg.norm(d) <= 2 * delta + + return d + +def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): + ''' + This subroutine does the real calculations for trstlp, both stage 1 and stage 2. + Major differences between stage 1 and stage 2: + 1. Initializiation. Stage 2 inherits the values of some variables from stage 1, so they are + initialized in stage 1 but not in stage 2. + 2. cviol. cviol is updated after at iteration in stage 1, while it remains a constant in stage2. + 3. sdirn. See the definition of sdirn in the code for details. + 4. optnew. The two stages have different objectives, so optnew is updated differently. + 5. step. step <= cviol in stage 1. + ''' + # zdasav = np.zeros(z.shape[1]) + vmultd = np.zeros(np.size(vmultc)) + zdota = np.zeros(np.size(z, 1)) + + # Sizes + num_vars = np.size(A, 0) + mcon = np.size(A, 1) + + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + assert stage == 1 or stage == 2 + assert (mcon >= 0 and stage == 1) or (mcon >= 1 and stage == 2) + assert np.size(b) == mcon + assert np.size(iact) == mcon + assert np.size(vmultc) == mcon + assert np.size(d) == num_vars + assert np.size(z, 0) == num_vars and np.size(z, 1) == num_vars + assert delta > 0 + if stage == 2: + assert all(np.isfinite(d)) and np.linalg.norm(d) <= 2 * delta + assert nact >= 0 and nact <= np.minimum(mcon, num_vars) + assert all(vmultc[:mcon]) >= 0 + # N.B.: Stage 1 defines only VMULTC(1:M); VMULTC(M+1) is undefined! + + + # Initialize according to stage + if stage == 1: + iact = np.linspace(0, mcon-1, mcon, dtype=int) + nact = 0 + d = np.zeros(num_vars) + cviol = np.max(np.append(b, 0)) + vmultc = cviol - b + z = np.eye(num_vars) + if mcon == 0 or cviol <= 0: + # Check whether a quick return is possible. Make sure the in-outputs have been initialized. + return iact, nact, d, vmultc, z + + if all(np.isnan(b)): + return iact, nact, d, vmultc, z + else: + icon = np.where(b == max(b[~np.isnan(b)]))[0][0] + num_constraints = mcon + sdirn = np.zeros(len(d)) + else: + if np.dot(d, d) >= delta**2: + # Check whether a quick return is possible. + return iact, nact, d, vmultc, z + + iact[mcon-1] = mcon-1 + vmultc[mcon-1] = 0 + num_constraints = mcon - 1 + icon = mcon - 1 + + # In Powell's code, stage 2 uses the zdota and cviol calculated by stage1. Here we recalculate + # them so that they need not be passed from stage 1 to 2, and hence the coupling is reduced. + cviol = np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) + zdota[:nact] = [np.dot(z[:, k], A[:, iact[k]]) for k in range(nact)] + + # More initialization + optold = REALMAX + nactold = nact + nfail = 0 + + # Zaikun 20211011: vmultd is computed from scratch at each iteration, but vmultc is inherited + + # Powell's code can encounter infinite cycling, which did happen when testing the following CUTEst + # problems: DANWOODLS, GAUSS1LS, GAUSS2LS, GAUSS3LS, KOEBHELB, TAX13322, TAXR13322. Indeed, in all + # these cases, Inf/NaN appear in d due to extremely large values in A (up to 10^219). To resolve + # this, we set the maximal number of iterations to maxiter, and terminate if Inf/NaN occurs in d. + maxiter = np.minimum(10000, 100*np.maximum(num_constraints, num_vars)) + for iter in range(maxiter): + if DEBUGGING: + assert all(vmultc >= 0) + if stage == 1: + optnew = cviol + else: + optnew = -np.dot(d, A[:, mcon-1]) + + # End the current stage of the calculation if 3 consecutive iterations have either failed to + # reduce the best calculated value of the objective function or to increase the number of active + # constraints since the best value was calculated. This strategy prevents cycling, but there is + # a remote possibility that it will cause premature termination. + if optnew < optold or nact > nactold: + nactold = nact + nfail = 0 + else: + nfail += 1 + optold = np.minimum(optold, optnew) + if nfail == 3: + break + + # If icon exceeds nact, then we add the constraint with index iact[icon] to the active set. + # Apply Givens rotations so that the last num_vars-nact-1 columns of Z are orthogonal to the gradient + # of the new constraint, a scalar product being set to 0 if its nonzero value could be due to + # computer rounding errors, which is tested by ISMINOR. + if icon >= nact: # In Python this needs to be >= since Python is 0-indexed (in Fortran we have 1 > 0, in Python we need 0 >= 0) + # zdasav[:nact] = zdota[:nact] + nactsav = nact + z, zdota, nact = qradd_Rdiag(A[:, iact[icon]], z, zdota, nact) # May update nact to nact+1 + # Indeed it suffices to pass zdota[:min(num_vars, nact+1)] to qradd as follows: + # qradd(A[:, iact[icon]], z, zdota[:min(num_vars, nact+1)], nact) + + if nact == nactsav + 1: + # N.B.: It is possible to index arrays using [nact, icon] when nact == icon. + # Zaikun 20211012: Why should vmultc[nact] = 0? + if nact != (icon + 1): # Need to add 1 to Python for 0 indexing + vmultc[[icon, nact-1]] = vmultc[nact-1], 0 + iact[[icon, nact-1]] = iact[[nact-1, icon]] + else: + vmultc[nact-1] = 0 + else: + # Zaikun 20211011: + # 1. VMULTD is calculated from scratch for the first time (out of 2) in one iteration. + # 2. Note that IACT has not been updated to replace IACT[NACT] with IACT[ICON]. Thus + # A[:, IACT[:NACT]] is the UNUPDATED version before QRADD (note Z[:, :NACT] remains the + # same before and after QRADD). Therefore if we supply ZDOTA to LSQR (as Rdiag) as + # Powell did, we should use the UNUPDATED version, namely ZDASAV. + # vmultd[:nact] = lsqr(A[:, iact[:nact]], A[:, iact[icon]], z[:, :nact], zdasav[:nact]) + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], A[:, iact[icon]], rcond=None)[0] + if not any(np.logical_and(vmultd[:nact] > 0, iact[:nact] <= num_constraints)): + # N.B.: This can be triggered by NACT == 0 (among other possibilities)! This is + # important, because NACT will be used as an index in the sequel. + break + # vmultd[NACT+1:mcon] is not used, but we have to initialize it in Fortran, or compilers + # complain about the where construct below (another solution: restrict where to 1:NACT). + # TODO: How does this apply to Python? + vmultd[nact:mcon] = -1 # len(vmultd) == mcon + + # Revise the Lagrange multipliers. The revision is not applicable to vmultc[nact:num_constraints]. + fracmult = [vmultc[i]/vmultd[i] if vmultd[i] > 0 and iact[i] <= num_constraints else REALMAX for i in range(nact)] + # Only the places with vmultd > 0 and iact <= m is relevant below, if any. + frac = min(fracmult[:nact]) # fracmult[nact:mcon] may contain garbage + vmultc[:nact] = np.maximum(np.zeros(len(vmultc[:nact])), vmultc[:nact] - frac*vmultd[:nact]) + + # Zaikun 20210811: Powell's code includes the following, which is IMPOSSIBLE TO REACH + # !if (icon < nact) then + # ! do k = icon, nact-1 + # ! hypt = sqrt(zdota(k+1)**2+inprod(z(:, k), A(:, iact(k+1)))**2) + # ! grot = planerot([zdota(k+1), inprod(z(:, k), A(:, iact(k+1)))]) + # ! z(:, [k, k+1]) = matprod(z(:, [k+1, k]), transpose(grot)) + # ! zdota([k, k+1]) = [hypt, (zdota(k+1) / hypt) * zdota(k)] + # ! end do + # ! iact(icon:nact) = [iact(icon+1:nact), iact(icon)] + # ! vmultc(icon:nact) = [vmultc(icon+1:nact), vmultc(icon)] + # !end if + + # Reorder the active constraints so that the one to be replaced is at the end of the list. + # Exit if the new value of zdota[nact] is not acceptable. Powell's condition for the + # following If: non abs(zdota[nact]) > 0. Note that it is different from + # 'abs(zdota[nact]) <=0)' as zdota[nact] can be NaN. TODO: Does that make sense with Python? + # It seems Python returns false in both cases, whereas Fortran returns true for abs(NaN) > 0 + # and false for the other one. + # N.B.: We cannot arrive here with nact == 0, which should have triggered a break above + if np.isnan(zdota[nact - 1]) or abs(zdota[nact - 1]) <= EPS**2: + break + vmultc[[icon, nact - 1]] = 0, frac # vmultc[[icon, nact]] is valid as icon > nact + iact[[icon, nact - 1]] = iact[[nact - 1, icon]] + # end if nact == nactsav + 1 + + # In stage 2, ensure that the objective continues to be treated as the last active constraint. + # Zaikun 20211011, 20211111: Is it guaranteed for stage 2 that iact[nact-1] = mcon when + # iact[nact] != mcon??? If not, then how does the following procedure ensure that mcon is + # the last of iact[:nact]? + if stage == 2 and iact[nact - 1] != (mcon - 1): + assert nact > 0, "nact > 0 is required" + z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], nact - 2) # We pass nact-2 in Python instead of nact-1 + # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows: + # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], nact - 1) + iact[[nact-2, nact-1]] = iact[[nact-1, nact-2]] + vmultc[[nact-2, nact-1]] = vmultc[[nact-1, nact-2]] + # Zaikun 20211117: It turns out that the last few lines do not guarantee iact[nact] == num_vars in + # stage 2; the following test cannot be passed. IS THIS A BUG?! + # assert iact[nact] == mcon or stage == 1, 'iact[nact] must == mcon in stage 2' + + # Powell's code does not have the following. It avoids subsequent floating points exceptions. + if np.isnan(zdota[nact-1]) or abs(zdota[nact-1]) <= EPS**2: + break + + # Set sdirn to the direction of the next change to the current vector of variables + # Usually during stage 1 the vector sdirn gives a search direction that reduces all the + # active constraint violations by one simultaneously. + if stage == 1: + sdirn -= ((np.dot(sdirn, A[:, iact[nact-1]]) - 1)/zdota[nact-1])*z[:, nact-1] + else: + sdirn = 1/zdota[nact-1]*z[:, nact-1] + else: # icon < nact + # Delete the constraint with the index iact[icon] from the active set, which is done by + # reordering iact[icont:nact] into [iact[icon+1:nact], iact[icon]] and then reduce nact to + # nact - 1. In theory, icon > 0. + # assert icon > 0, "icon > 0 is required" # For Python I think this is irrelevant + z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], icon) # qrexc does nothing if icon == nact + # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows: + # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], icon) + iact[icon:nact] = [*iact[icon+1:nact], iact[icon]] + vmultc[icon:nact] = [*vmultc[icon+1:nact], vmultc[icon]] + nact -= 1 + + # Powell's code does not have the following. It avoids subsequent exceptions. + # Zaikun 20221212: In theory, nact > 0 in stage 2, as the objective function should always + # be considered as an "active constraint" --- more precisely, iact[nact] = mcon. However, + # looking at the copde, I cannot see why in stage 2 nact must be positive after the reduction + # above. It did happen in stage 1 that nact became 0 after the reduction --- this is + # extremely rare, and it was never observed until 20221212, after almost one year of + # random tests. Maybe nact is theoretically positive even in stage 1? + assert stage == 1 or nact > 0, "nact > 0 is required in stage 2" + if stage == 2 and nact < 0: + break # If this case ever occurs, we have to break, as nact is used as an index below. + if nact > 0: + if np.isnan(zdota[nact-1]) or abs(zdota[nact-1]) <= EPS**2: + break + + # Set sdirn to the direction of the next change to the current vector of variables. + if stage == 1: + sdirn -= np.dot(sdirn, z[:, nact]) * z[:, nact] + # sdirn is orthogonal to z[:, nact+1] + else: + sdirn = 1/zdota[nact-1] * z[:, nact-1] + # end if icon > nact + + # Calculate the step to the trust region boundary or take the step that reduces cviol to 0. + # ----------------------------------------------------------------------------------------- # + # The following calculation of step is adopted from NEWUOA/BOBYQA/LINCOA. It seems to improve + # the performance of COBYLA. We also found that removing the precaution about underflows is + # beneficial to the overall performance of COBYLA --- the underflows are harmless anyway. + dd = delta**2 - np.dot(d, d) + ss = np.dot(sdirn, sdirn) + sd = np.dot(sdirn, d) + if dd <= 0 or ss <= EPS * delta**2 or np.isnan(sd): + break + # sqrtd: square root of a discriminant. The max avoids sqrtd < abs(sd) due to underflow + sqrtd = max(np.sqrt(ss*dd + sd**2), abs(sd), np.sqrt(ss * dd)) + if sd > 0: + step = dd / (sqrtd + sd) + else: + step = (sqrtd - sd) / ss + # step < 0 should not happen. Step can be 0 or NaN when, e.g., sd or ss becomes inf + if step <= 0 or not np.isfinite(step): + break + + # Powell's approach and comments are as follows. + # -------------------------------------------------- # + # The two statements below that include the factor eps prevent + # some harmless underflows that occurred in a test calculation + # (Zaikun: here, eps is the machine epsilon; Powell's original + # code used 1.0e-6, and Powell's code was written in single + # precision). Further, we skip the step if it could be 0 within + # a reasonable tolerance for computer rounding errors. + + # !dd = delta**2 - sum(d**2, mask=(abs(d) >= EPS * delta)) + # !ss = inprod(sdirn, sdirn) + # !if (dd <= 0) then + # ! exit + # !end if + # !sd = inprod(sdirn, d) + # !if (abs(sd) >= EPS * sqrt(ss * dd)) then + # ! step = dd / (sqrt(ss * dd + sd**2) + sd) + # !else + # ! step = dd / (sqrt(ss * dd) + sd) + # !end if + # -------------------------------------------------- # + + if stage == 1: + if isminor(cviol, step): + break + step = min(step, cviol) + + # Set dnew to the new variables if step is the steplength, and reduce cviol to the corresponding + # maximum residual if thage 1 is being done + dnew = d + step * sdirn + if stage == 1: + cviol = np.max(np.append(b[iact[:nact]] - dnew@A[:, iact[:nact]], 0)) + # N.B.: cviol will be used when calculating vmultd[nact+1:mcon]. + + # Zaikun 20211011: + # 1. vmultd is computed from scratch for the second (out of 2) time in one iteration. + # 2. vmultd[:nact] and vmultd[nact:mcon] are calculated separately with no coupling. + # 3. vmultd will be calculated from scratch again in the next iteration. + # Set vmultd to the vmultc vector that would occur if d became dnew. A device is included to + # force multd[k] = 0 if deviations from this value can be attributed to computer rounding + # errors. First calculate the new Lagrange multipliers. + # vmultd[:nact] = lsqr(A[:, iact[:nact]], dnew, z[:, :nact], zdota[:nact]) + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], dnew, rcond=None)[0] + if stage == 2: + vmultd[nact-1] = max(0, vmultd[nact-1]) # This seems never activated. + # Complete vmultd by finding the new constraint residuals. (Powell wrote "Complete vmultc ...") + cvshift = dnew@A[:, iact] - b[iact] + cviol # Only cvshift[nact+1:mcon] is needed + cvsabs = abs(dnew)@abs(A[:, iact]) + abs(b[iact]) + cviol + cvshift[isminor(cvshift, cvsabs)] = 0 + vmultd[nact:mcon] = cvshift[nact:mcon] + + # Calculate the fraction of the step from d to dnew that will be taken + fracmult = [vmultc[i]/(vmultc[i] - vmultd[i]) if vmultd[i] < 0 else REALMAX for i in range(len(vmultd))] + # Only the places with vmultd < 0 are relevant below, if any. + icon = np.where((temp_arr := np.array([1, *fracmult])) == min(temp_arr))[0][0] - 1 + frac = min(temp_arr) + + # Update d, vmultc, and cviol + dold = d + d = (1 - frac)*d + frac * dnew + # Break in the case of inf/nan in d. + if not np.isfinite(d).all(): + d = dold # Should we restore also iact, nact, vmultc, and z? + break + + vmultc = np.maximum(np.zeros(len(vmultc)), (1 - frac)*vmultc + frac*vmultd) + if stage == 1: + # cviol = (1 - frac) * cvold + frac * cviol # Powell's version + # In theory, cviol = np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)), yet the + # cviol updated as above can be quite different from this value if A has huge entries (e.g., > 1e20) + cviol = np.max(np.append(b[:num_constraints] - d@A[:, :num_constraints], 0)) + + if icon < 0 or icon >= mcon: + # In Powell's code, the condition is icon == 0. Indeed, icon < 0 cannot hold unless + # fracmult contains only nan, which should not happen; icon >= mcon should never occur. + break + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert np.size(iact) == mcon + assert np.size(vmultc) == mcon + assert all(vmultc >= 0) + assert np.size(d) == num_vars + assert all(np.isfinite(d)) + assert np.linalg.norm(d) <= 2 * delta + assert np.size(z, 0) == num_vars and np.size(z, 1) == num_vars + assert nact >= 0 and nact <= np.minimum(mcon, num_vars) + + return iact, nact, d, vmultc, z + + +def trrad(delta_in, dnorm, eta1, eta2, gamma1, gamma2, ratio): + ''' + This function updates the trust region radius according to RATIO and DNORM. + ''' + + # Preconditions + if DEBUGGING: + assert delta_in >= dnorm and dnorm > 0 + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + assert gamma1 > 0 and gamma1 < 1 and gamma2 > 1 + # By the definition of RATIO in ratio.f90, RATIO cannot be NaN unless the actual reduction is + # NaN, which should NOT happen due to the moderated extreme barrier. + assert not np.isnan(ratio) + + #====================# + # Calculation starts # + #====================# + + if ratio <= eta1: + delta = gamma1 * dnorm # Powell's UOBYQA/NEWOUA + # delta = gamma1 * delta_in # Powell's COBYLA/LINCOA + # delta = min(gamma1 * delta_in, dnorm) # Powell's BOBYQA + elif ratio <= eta2: + delta = max(gamma1 * delta_in, dnorm) # Powell's UOBYQA/NEWOUA/BOBYQA/LINCOA + else: + delta = max(gamma1 * delta_in, gamma2 * dnorm) # Powell's NEWUOA/BOBYQA + # delta = max(delta_in, gamma2 * dnorm) # Modified version. Works well for UOBYQA + # For noise-free CUTEst problems of <= 100 variables, Powell's version works slightly better + # than the modified one. + # delta = max(delta_in, 1.25*dnorm, dnorm + rho) # Powell's UOBYQA + # delta = min(max(gamma1 * delta_in, gamma2 * dnorm), gamma3 * delta_in) # Powell's LINCOA, gamma3 = np.sqrt(2) + + # For noisy problems, the following may work better. + # if ratio <= eta1: + # delta = gamma1 * dnorm + # elseif ratio <= eta2: # Ensure DELTA >= DELTA_IN + # delta = delta_in + # else: # Ensure DELTA > DELTA_IN with a constant factor + # delta = max(delta_in * (1 + gamma2) / 2, gamma2 * dnorm) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert delta > 0 + return delta diff --git a/python/src/prima/cobyla/update.py b/python/src/prima/cobyla/update.py new file mode 100644 index 0000000000..b1cf0a1e68 --- /dev/null +++ b/python/src/prima/cobyla/update.py @@ -0,0 +1,272 @@ +from prima.common.consts import DEBUGGING +from prima.common.infos import DAMAGING_ROUNDING, INFO_DEFAULT +from prima.common.linalg import isinv +import numpy as np + + +def updatexfc(jdrop, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi): + ''' + This function revises the simplex by updating the elements of SIM, SIMI, FVAL, CONMAT, and CVAL + ''' + + # Local variables + itol = 1 + + # Sizes + num_constraints = np.size(constr) + num_vars = np.size(sim, 0) + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert jdrop >= 0 and jdrop <= num_vars + 1 + assert not any(np.isnan(constr) | np.isneginf(constr)) + assert not (np.isnan(cstrv) | np.isposinf(cstrv)) + assert np.size(d) == num_vars and all(np.isfinite(d)) + assert not (np.isnan(f) | np.isposinf(f)) + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + + #====================# + # Calculation starts # + #====================# + + + # Do nothing when JDROP is None. This can only happen after a trust-region step. + if jdrop is None: # JDROP is None is impossible if the input is correct. + return conmat, cval, fval, sim, simi, INFO_DEFAULT + + sim_old = sim + simi_old = simi + if jdrop < num_vars: + sim[:, jdrop] = d + simi_jdrop = simi[jdrop, :] / np.dot(simi[jdrop, :], d) + simi -= np.outer(simi@d, simi_jdrop) + simi[jdrop, :] = simi_jdrop + else: # jdrop == num_vars + sim[:, num_vars] += d + sim[:, :num_vars] -= np.tile(d, (num_vars, 1)).T + simid = simi@d + sum_simi = np.sum(simi, axis=0) + simi += np.outer(simid, sum_simi / (1 - sum(simid))) + + # Check whether SIMI is a poor approximation to the inverse of SIM[:, :NUM_VARS] + # Calculate SIMI from scratch if the current one is damaged by rounding errors. + itol = 1 + erri = np.max(abs(simi@sim[:, :num_vars] - np.eye(num_vars))) # np.max returns NaN if any input is NaN + if erri > 0.1 * itol or np.isnan(erri): + simi_test = np.linalg.inv(sim[:, :num_vars]) + erri_test = np.max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars))) + if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)): + simi = simi_test + erri = erri_test + + # If SIMI is satisfactory, then update FVAL, CONMAT, CVAL, and the pole position. Otherwise restore + # SIM and SIMI, and return with INFO = DAMAGING_ROUNDING. + if erri <= itol: + fval[jdrop] = f + conmat[:, jdrop] = constr + cval[jdrop] = cstrv + # Switch the best vertex to the pole position SIM[:, NUM_VARS] if it is not there already + conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim, simi) + else: + info = DAMAGING_ROUNDING + sim = sim_old + simi = simi_old + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) or info == DAMAGING_ROUNDING + + return sim, simi, fval, conmat, cval, info + +def findpole(cpen, cval, fval): + ''' + This subroutine identifies the best vertex of the current simplex with respect to the merit + function PHI = F + CPEN * CSTRV. + ''' + + # Size + num_vars = np.size(fval) - 1 + + # Preconditions + if DEBUGGING: + assert cpen > 0 + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + + #====================# + # Calculation starts # + #====================# + + # Identify the optimal vertex of the current simplex + jopt = np.size(fval) - 1 + phi = fval + cpen * cval + phimin = np.min(phi) + joptcandidate = np.argmin(phi) + if phi[joptcandidate] < phi[jopt]: + jopt = joptcandidate + if cpen <= 0 and any(cval < cval[jopt] & phi <= phimin): + # jopt is the index of the minimum value of cval + # on the set of cval values where phi <= phimin + jopt = np.where(cval == np.min(cval[phi <= phimin]))[0][0] + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert jopt >= 0 and jopt < num_vars + 1 + assert jopt == num_vars or phi[jopt] < phi[num_vars] or (phi[jopt] <= phi[num_vars] and cval[jopt] < cval[num_vars]) + return jopt + + +def updatepole(cpen, conmat, cval, fval, sim, simi): + #--------------------------------------------------------------------------------------------------! + # This subroutine identifies the best vertex of the current simplex with respect to the merit + # function PHI = F + CPEN * CSTRV, and then switch this vertex to SIM[:, NUM_VARS], which Powell called + # the "pole position" in his comments. CONMAT, CVAL, FVAL, and SIMI are updated accordingly. + # + # N.B. 1: In precise arithmetic, the following two procedures produce the same results: + # 1) apply UPDATEPOLE to SIM twice, first with CPEN = CPEN1 and then with CPEN = CPEN2; + # 2) apply UPDATEPOLE to SIM with CPEN = CPEN2. + # In finite-precision arithmetic, however, they may produce different results unless CPEN1 = CPEN2. + # + # N.B. 2: When JOPT == N+1, the best vertex is already at the pole position, so there is nothing to + # switch. However, as in Powell's code, the code below will check whether SIMI is good enough to + # work as the inverse of SIM(:, 1:N) or not. If not, Powell's code would invoke an error return of + # COBYLB; our implementation, however, will try calculating SIMI from scratch; if the recalculated + # SIMI is still of poor quality, then UPDATEPOLE will return with INFO = DAMAGING_ROUNDING, + # informing COBYLB that SIMI is poor due to damaging rounding errors. + # + # N.B. 3: UPDATEPOLE should be called when and only when FINDPOLE can potentially returns a value + # other than N+1. The value of FINDPOLE is determined by CPEN, CVAL, and FVAL, the latter two being + # decided by SIM. Thus UPDATEPOLE should be called after CPEN or SIM changes. COBYLA updates CPEN at + # only two places: the beginning of each trust-region iteration, and when REDRHO is called; + # SIM is updated only by UPDATEXFC, which itself calls UPDATEPOLE internally. Therefore, we only + # need to call UPDATEPOLE after updating CPEN at the beginning of each trust-region iteration and + # after each invocation of REDRHO. + + # Local variables + itol = 1 + + # Sizes + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + + # Preconditions + if DEBUGGING: + assert num_constraints >= 0 + assert num_vars >= 1 + assert cpen > 0 + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + assert isinv(sim[:, :num_vars], simi, itol) + + #====================# + # Calculation starts # + #====================# + + # INFO must be set, as it is an output. + info = INFO_DEFAULT + + # Identify the optimal vertex of the current simplex. + jopt = findpole(cpen, cval, fval) + + # Switch the best vertex to the pole position SIM[:, NUM_VARS] if it is not there already and update + # SIMI. Before the update, save a copy of SIM and SIMI. If the update is unsuccessful due to + # damaging rounding errors, we restore them and return with INFO = DAMAGING_ROUNDING. + sim_old = sim.copy() + simi_old = simi.copy() + if 0 <= jopt < num_vars: + # Unless there is a bug in FINDPOLE it is guaranteed that JOPT >= 0 + # When JOPT == NUM_VARS, there is nothing to switch; in addition SIMI[JOPT, :] will be illegal. + # fval[[jopt, -1]] = fval[[-1, jopt]] + # conmat[:, [jopt, -1]] = conmat[:, [-1, jopt]] # Exchange CONMAT[:, JOPT] AND CONMAT[:, -1] + # cval[[jopt, -1]] = cval[[-1, jopt]] + sim[:, num_vars] += sim[:, jopt] + sim_jopt = sim[:, jopt].copy() + sim[:, jopt] = 0 # np.zeros(num_constraints)? + sim[:, :num_vars] -= np.tile(sim_jopt, (num_vars, 1)).T + # The above update is equivalent to multiplying SIM[:, :NUM_VARS] from the right side by a matrix whose + # JOPT-th row is [-1, -1, ..., -1], while all the other rows are the same as those of the + # identity matrix. It is easy to check that the inverse of this matrix is itself. Therefore, + # SIMI should be updated by a multiplication with this matrix (i.e. its inverse) from the left + # side, as is done in the following line. The JOPT-th row of the updated SIMI is minus the sum + # of all rows of the original SIMI, whereas all the other rows remain unchanged. + simi[jopt, :] = -np.sum(simi, axis=0) + + # Check whether SIMI is a poor approximation to the inverse of SIM[:, :NUM_VARS] + # Calculate SIMI from scratch if the current one is damaged by rounding errors. + erri = np.max(abs(simi@sim[:, :num_vars] - np.eye(num_vars))) # np.max returns NaN if any input is NaN + itol = 1 + if erri > 0.1 * itol or np.isnan(erri): + simi_test = np.linalg.inv(sim[:, :num_vars]) + erri_test = np.max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars))) + if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)): + simi = simi_test + erri = erri_test + + + # If SIMI is satisfactory, then update FVAL, CONMAT, and CVAL. Otherwiae restore SIM and SIMI, and + # return with INFO = DAMAGING_ROUNDING. + if erri <= itol: + if 0 <= jopt < num_vars: + fval[[jopt, num_vars]] = fval[[num_vars, jopt]] + conmat[:, [jopt, num_vars]] = conmat[:, [num_vars, jopt]] + cval[[jopt, num_vars]] = cval[[num_vars, jopt]] + else: # erri > itol or erri is NaN + info = DAMAGING_ROUNDING + sim = sim_old + simi = simi_old + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert findpole(cpen, cval, fval) == num_vars or info == DAMAGING_ROUNDING + assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1 + assert not (np.isnan(conmat) | np.isneginf(conmat)).any() + assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval)) + assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval)) + assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1 + assert np.isfinite(sim).all() + assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0) + assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars + assert np.isfinite(simi).all() + # Do not check SIMI = SIM[:, :num_vars]^{-1}, as it may not be true due to damaging rounding. + assert isinv(sim[:, :num_vars], simi, itol) or info == DAMAGING_ROUNDING + + return conmat, cval, fval, sim, simi, info \ No newline at end of file diff --git a/python/src/prima/common/__init__.py b/python/src/prima/common/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/common/checkbreak.py b/python/src/prima/common/checkbreak.py new file mode 100644 index 0000000000..1329ca5169 --- /dev/null +++ b/python/src/prima/common/checkbreak.py @@ -0,0 +1,83 @@ +from prima.common.infos import INFO_DEFAULT, NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED + +import numpy as np + +def checkbreak_unc(maxfun, nf, f, ftarget, x): + ''' + This module checks whether to break out of the solver loop in the unconstrained case. + ''' + + # Outputs + info = INFO_DEFAULT + + # Local variables + srname = "CHECKbreak_UNC" + + # Preconditions + assert INFO_DEFAULT not in [NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED], f'NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, and MAXFUN_REACHED differ from INFO_DFT {srname}' + # X does not contain NaN if the initial X does not contain NaN and the subroutines generating + # trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf. + assert not any(np.isnan(x)), f'X does not contain NaN {srname}' + # With the moderated extreme barrier, F cannot be NaN/+Inf. + assert not (any(np.isnan(f)) or any(np.isposinf(f))), f'F is not NaN/+Inf {srname}' + + #====================# + # Calculation starts # + #====================# + + # Although X should not contain NaN unless there is a bug, we include the following for security. + # X can be Inf, as finite + finite can be Inf numerically. + if any(np.isnan(x)) or any(np.isinf(x)): + info = NAN_INF_X + + # Although NAN_INF_F should not happen unless there is a bug, we include the following for security. + if any(np.isnan(f)) or any(np.isposinf(f)): + info = NAN_INF_F + + if f <= ftarget: + info = FTARGET_ACHIEVED + + if nf >= maxfun: + info = MAXFUN_REACHED + + return info + +def checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x): + ''' + This module checks whether to break out of the solver loop in the constrained case. + ''' + + # Outputs + info = INFO_DEFAULT + + # Local variables + srname = "CHECKbreak_CON" + + # Preconditions + assert INFO_DEFAULT not in [NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED], f'NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, and MAXFUN_REACHED differ from INFO_DFT {srname}' + # X does not contain NaN if the initial X does not contain NaN and the subroutines generating + # trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf. + assert not any(np.isnan(x)), f'X does not contain NaN {srname}' + # With the moderated extreme barrier, F or CSTRV cannot be NaN/+Inf. + assert not (np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv)), f'F or CSTRV is not NaN/+Inf {srname}' + + #====================# + # Calculation starts # + #====================# + + # Although X should not contain NaN unless there is a bug, we include the following for security. + # X can be Inf, as finite + finite can be Inf numerically. + if any(np.isnan(x)) or any(np.isinf(x)): + info = NAN_INF_X + + # Although NAN_INF_F should not happen unless there is a bug, we include the following for security. + if np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv): + info = NAN_INF_F + + if cstrv <= ctol and f <= ftarget: + info = FTARGET_ACHIEVED + + if nf >= maxfun: + info = MAXFUN_REACHED + + return info \ No newline at end of file diff --git a/python/src/prima/common/consts.py b/python/src/prima/common/consts.py new file mode 100644 index 0000000000..3c8901e17c --- /dev/null +++ b/python/src/prima/common/consts.py @@ -0,0 +1,36 @@ +import numpy as np +import os + +# TODO: Reset this to False when making PR +DEBUGGING = True or bool(os.getenv('PRIMA_DEBUGGING')) + + +REALMIN = np.finfo(float).tiny +REALMAX = np.finfo(float).max +FUNCMAX = 2.0**100 +CONSTRMAX = FUNCMAX +EPS = np.finfo(float).eps + +# Some default values +RHOBEG_DEFAULT = 1 +RHOEND_DEFAULT = 1e-6 +FTARGET_DEFAULT = -REALMAX +CTOL_DEFAULT = np.sqrt(EPS) +CWEIGHT_DEFAULT = 1e8 +ETA1_DEFAULT = 0.1 +ETA2_DEFAULT = 0.7 +GAMMA1_DEFAULT = 0.5 +GAMMA2_DEFAULT = 2 +IPRINT_DEFAULT = 0 +MAXFUN_DIM_DEFAULT = 500 + +PRIMA_MAX_HIST_MEM_MB = 300 # 1MB > 10^5*REAL64. 100 can be too small. + +# Maximal amount of memory (Byte) allowed for XHIST, FHIST, CONHIST, CHIST, and the filters. +MHM = PRIMA_MAX_HIST_MEM_MB * 10**6 +# Make sure that MAXHISTMEM does not exceed HUGE(0) to avoid overflow and memory errors. +MAXHISTMEM = min(MHM, np.iinfo(np.int32).max) + +# Maximal length of the filter used in constrained solvers. +MIN_MAXFILT = 200 # Should be positive; < 200 is not recommended. +MAXFILT_DEFAULT = 10 * MIN_MAXFILT diff --git a/python/src/prima/common/evaluate.py b/python/src/prima/common/evaluate.py new file mode 100644 index 0000000000..4e0e81e4d5 --- /dev/null +++ b/python/src/prima/common/evaluate.py @@ -0,0 +1,71 @@ +import numpy as np +from prima.common.consts import FUNCMAX, CONSTRMAX, REALMAX, DEBUGGING + +# This is a module evaluating the objective/constraint function with +# Nan/Inf handling. + + +def moderatex(x): + ''' + This function moderates a decision variable. It replaces NaN by 0 and Inf/-Inf by REALMAX/-REALMAX. + ''' + y = x + y[np.isnan(x)] = 0 + y = np.maximum(-REALMAX, np.minimum(REALMAX, y)) + return y + +def moderatef(f): + f = FUNCMAX if np.isnan(f) else f + return min(FUNCMAX, f) + +def moderatec(c): + np.nan_to_num(c, copy=False, nan=-CONSTRMAX) + c = np.clip(c, -CONSTRMAX, CONSTRMAX) + return c + + +def evaluate(calcfc, x): + """ + This function evaluates CALCFC at X, setting F to the objective function value, CONSTR to the + constraint value, and CSTRV to the constraint violation. Nan/Inf are handled by a moderated + extreme barrier. + """ + + # Preconditions + if DEBUGGING: + # X should not contain NaN if the initial X does not contain NaN and the subroutines generating + # trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf. + assert not any(np.isnan(x)) + + #====================# + # Calculation starts # + #====================# + + if any(np.isnan(x)): + f = np.sum(x) + constr = f # TODO: This is supposed to be an array, but I don't know how long it is here. + cstrv = f + else: + f, constr = calcfc(moderatex(x)) # Evaluate F and CONSTR; We moderate X before doing so. + + # Moderated extreme barrier: replace NaN/huge objective or constraint values with a large but + # finite value. This is naive, and better approaches surely exist. + f = moderatef(f) + constr = moderatec(constr) + + # Evaluate the constraint violation for constraints CONSTR(X) >= 0. + cstrv = np.max(np.append(-constr, 0)) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + # With X not containing NaN, and with the moderated extreme barrier, F cannot be NaN/+Inf, and + # CONSTR cannot be NaN/-Inf. + assert not (np.isnan(f) or np.isposinf(f)) + assert not any(np.isnan(constr) | np.isneginf(constr)) + assert not (cstrv < 0 or np.isnan(cstrv) or np.isposinf(cstrv)) + + return f, constr, cstrv \ No newline at end of file diff --git a/python/src/prima/common/history.py b/python/src/prima/common/history.py new file mode 100644 index 0000000000..743536bbe7 --- /dev/null +++ b/python/src/prima/common/history.py @@ -0,0 +1,17 @@ +def savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist): + ''' + Save the data values to the history lists. + + The implementation of this function is vastly different from the Fortran implementation. + This is mostly due to the ease of creating and appending to lists in Python + + However just like the Fortran version we should be concerned about both performance + and memory constraints. It will probably be better to initialize an array of NaN for + each of the histories and keep track of how many indices we have stored. Not needed for + the moment. + ''' + if len(xhist) < maxhist: + xhist.append(x) + fhist.append(f) + chist.append(cstrv) + conhist.append(constr) \ No newline at end of file diff --git a/python/src/prima/common/infos.py b/python/src/prima/common/infos.py new file mode 100644 index 0000000000..f80f4d4925 --- /dev/null +++ b/python/src/prima/common/infos.py @@ -0,0 +1,26 @@ + +''' +This is a module defining exit flags. +''' + +INFO_DEFAULT = 0 +SMALL_TR_RADIUS = 0 +FTARGET_ACHIEVED = 1 +TRSUBP_FAILED = 2 +MAXFUN_REACHED = 3 +MAXTR_REACHED = 20 +NAN_INF_X = -1 +NAN_INF_F = -2 +NAN_INF_MODEL = -3 +NO_SPACE_BETWEEN_BOUNDS = 6 +DAMAGING_ROUNDING = 7 +ZERO_LINEAR_CONSTRAINT = 8 + +# Stop-codes. +# The following codes are used by ERROR STOP as stop-codes, which should be default integers. +INVALID_INPUT = 100 +ASSERTION_FAILS = 101 +VALIDATION_FAILS = 102 +MEMORY_ALLOCATION_FAILS = 103 + + diff --git a/python/src/prima/common/linalg.py b/python/src/prima/common/linalg.py new file mode 100644 index 0000000000..4580d2e3a3 --- /dev/null +++ b/python/src/prima/common/linalg.py @@ -0,0 +1,177 @@ +import numpy as np +from prima.common.consts import DEBUGGING, EPS, REALMAX, REALMIN +from prima.common.present import present + +def planerot(x): + ''' + As in MATLAB, planerot(x) returns a 2x2 Givens matrix G for x in R2 so that Y=G@x has Y[1] = 0. + Roughly speaking, G = np.array([[x[0]/R, x[1]/R], [-x[1]/R, x[0]/R]]), where R = np.linalg.norm(x). + 0. We need to take care of the possibilities of R=0, Inf, NaN, and over/underflow. + 1. The G defined above is continuous with respect to X except at 0. Following this definition, + G = np.array([[np.sign(x[0]), 0], [0, np.sign(x[0])]]) if x[1] == 0, + G = np.array([[0, np.sign(x[1])], [np.sign(x[1]), 0]]) if x[0] == 0 + Yet some implementations ignore the signs, leading to discontinuity and numberical instability. + 2. Difference from MATLAB: if x contains NaN of consists of only Inf, MATLAB returns a NaN matrix, + but we return an identity matrix or a matrix of +/-np.sqrt(2). We intend to keep G always orthogonal. + ''' + + # Preconditions + if DEBUGGING: + assert len(x) == 2, "x must be a 2-vector" + + # ================== + # Calculation starts + # ================== + + # Define C = X(1) / R and S = X(2) / R with R = HYPOT(X(1), X(2)). Handle Inf/NaN, over/underflow. + if (any(np.isnan(x))): + # In this case, MATLAB sets G to NaN(2, 2). We refrain from doing so to keep G orthogonal. + c = 1 + s = 0 + elif (all(np.isinf(x))): + # In this case, MATLAB sets G to NaN(2, 2). We refrain from doing so to keep G orthogonal. + c = 1 / np.sqrt(2) * np.sign(x[0]) + s = 1 / np.sqrt(2) * np.sign(x[1]) + elif (abs(x[0]) <= 0 and abs(x[1]) <= 0): # X(1) == 0 == X(2). + c = 1 + s = 0 + elif (abs(x[1]) <= EPS * abs(x[0])): + # N.B.: + # 0. With <= instead of <, this case covers X(1) == 0 == X(2), which is treated above separately + # to avoid the confusing SIGN(., 0) (see 1). + # 1. SIGN(A, 0) = ABS(A) in Fortran but sign(0) = 0 in MATLAB, Python, Julia, and R# + # 2. Taking SIGN(X(1)) into account ensures the continuity of G with respect to X except at 0. + c = np.sign(x[0]) + s = 0 + elif (abs(x[0]) <= EPS * abs(x[1])): + # N.B.: SIGN(A, X) = ABS(A) * sign of X /= A * sign of X # Therefore, it is WRONG to define G + # as SIGN(RESHAPE([ZERO, -ONE, ONE, ZERO], [2, 2]), X(2)). This mistake was committed on + # 20211206 and took a whole day to debug# NEVER use SIGN on arrays unless you are really sure. + c = 0 + s = np.sign(x[1]) + else: + # Here is the normal case. It implements the Givens rotation in a stable & continuous way as in: + # Bindel, D., Demmel, J., Kahan, W., and Marques, O. (2002). On computing Givens rotations + # reliably and efficiently. ACM Transactions on Mathematical Software (TOMS), 28(2), 206-238. + # N.B.: 1. Modern compilers compute SQRT(REALMIN) and SQRT(REALMAX/2.1) at compilation time. + # 2. The direct calculation without involving T and U seems to work better; use it if possible. + if (all(np.logical_and(np.sqrt(REALMIN) < np.abs(x), np.abs(x) < np.sqrt(REALMAX / 2.1)))): + # Do NOT use HYPOTENUSE here; the best implementation for one may be suboptimal for the other + r = np.linalg.norm(x) + c = x[0] / r + s = x[1] / r + elif (abs(x[0]) > abs(x[1])): + t = x[1] / x[0] + u = max(1, abs(t), np.sqrt(1 + t**2)) # MAXVAL: precaution against rounding error. + u *= np.sign(x[0]) ##MATLAB: u = sign(x(1))*sqrt(1 + t**2) + c = 1 / u + s = t / u + else: + t = x[0] / x[1] + u = max([1, abs(t), np.sqrt(1 + t**2)]) # MAXVAL: precaution against rounding error. + u *= np.sign(x[1]) ##MATLAB: u = sign(x(2))*sqrt(1 + t**2) + c = t / u + s = 1 / u + + G = np.array([[c, s], [-s, c]]) # MATLAB: G = [c, s; -s, c] + + #====================# + # Calculation ends # + #====================# + + # Postconditions + if DEBUGGING: + assert G.shape == (2,2) + assert np.all(np.isfinite(G)) + assert abs(G[0, 0] - G[1, 1]) + abs(G[0, 1] + G[1, 0]) <= 0 + tol = np.maximum(1.0E-10, np.minimum(1.0E-1, 1.0E6 * EPS)) + assert isorth(G, tol) + if all(np.logical_and(np.isfinite(x), np.abs(x) < np.sqrt(REALMAX / 2.1))): + r = np.linalg.norm(x) + assert max(abs(G@x - [r, 0])) <= max(tol, tol * r), 'G @ X = [||X||, 0]' + + return G + + +def isminor(x, ref): + ''' + This function tests whether x is minor compared to ref. It is used by Powell, e.g., in COBYLA. + In precise arithmetic, isminor(x, ref) is true if and only if x == 0; in floating point + arithmetic, isminor(x, ref) is true if x is 0 or its nonzero value can be attributed to + computer rounding errrors according to ref. + Larger sensitivity means the function is more strict/precise, the value 0.1 being due to Powell. + + For example: + isminor(1e-20, 1e300) -> True, because in floating point arithmetic 1e-20 cannot be added to + 1e300 without being rounded to 1e300. + isminor(1e300, 1e-20) -> False, because in floating point arithmetic adding 1e300 to 1e-20 + dominates the latter number. + isminor(3, 4) -> False, because 3 can be added to 4 without being rounded off + ''' + + sensitivity = 0.1 + refa = abs(ref) + sensitivity * abs(x) + refb = abs(ref) + 2 * sensitivity * abs(x) + return np.logical_or(abs(ref) >= refa, refa >= refb) + + +def isinv(A, B, tol=None): + ''' + This procedure tests whether A = B^{-1} up to the tolerance TOL. + ''' + + # Sizes + n = np.size(A, 0) + + # Preconditions + if DEBUGGING: + assert np.size(A, 0) == np.size(A, 1) + assert np.size(B, 0) == np.size(B, 1) + assert np.size(A, 0) == np.size(B, 0) + if present(tol): + assert tol >= 0 + + #====================# + # Calculation starts # + #====================# + + tol = tol if present(tol) else np.minimum(1e-3, 1e2 * EPS * np.maximum(np.size(A, 0), np.size(A, 1))) + tol = np.max([tol, tol * np.max(abs(A)), tol * np.max(abs(B))]) + is_inv = ((abs(A@B) - np.eye(n)) <= tol).all() or ((abs(B@A - np.eye(n))) <= tol).all() + + #===================# + # Calculation ends # + #===================# + return is_inv + + +def isorth(A, tol=None): + ''' + This function tests whether the matrix A has orthonormal columns up to the tolerance TOL. + ''' + + # Preconditions + if DEBUGGING: + if present(tol): + assert tol >= 0 + + #====================# + # Calculation starts # + #====================# + + num_vars = np.size(A, 1) + + if num_vars > np.size(A, 0): + is_orth = False + elif (np.isnan(np.sum(abs(A)))): + is_orth = False + else: + if present(tol): + is_orth = (abs(A.T@A - np.eye(num_vars)) <= np.maximum(tol, tol * np.max(abs(A)))).all() + else: + is_orth = (abs(A.T@A - np.eye(num_vars)) <= 0).all() + + #====================# + # Calculation ends # + #====================# + return is_orth diff --git a/python/src/prima/common/message.py b/python/src/prima/common/message.py new file mode 100644 index 0000000000..02cd8236b6 --- /dev/null +++ b/python/src/prima/common/message.py @@ -0,0 +1,276 @@ +from prima.common.consts import DEBUGGING +from prima.common.infos import FTARGET_ACHIEVED, MAXFUN_REACHED, MAXTR_REACHED, \ + SMALL_TR_RADIUS, TRSUBP_FAILED, NAN_INF_F, NAN_INF_X, NAN_INF_MODEL, DAMAGING_ROUNDING, \ + NO_SPACE_BETWEEN_BOUNDS, ZERO_LINEAR_CONSTRAINT +from prima.common.present import present +import numpy as np + +''' +# This module provides some functions that print messages to terminal/files. +# +# N.B.: +# 1. In case parallelism is desirable (especially during initialization), the functions may +# have to be modified or disabled due to the IO operations. +# 2. IPRINT indicates the level of verbosity, which increases with the absolute value of IPRINT. +# IPRINT = +/-3 can be expensive due to high IO operations. +''' + +spaces = ' ' + +def retmsg(solver, info, iprint, nf, f, x, cstrv=None, constr=None): + ''' + This function prints messages at return. + ''' + # Local variables + valid_exit_codes = [FTARGET_ACHIEVED, MAXFUN_REACHED, MAXTR_REACHED, + SMALL_TR_RADIUS, TRSUBP_FAILED, NAN_INF_F, NAN_INF_X, NAN_INF_MODEL, DAMAGING_ROUNDING, + NO_SPACE_BETWEEN_BOUNDS, ZERO_LINEAR_CONSTRAINT] + + # Preconditions + if DEBUGGING: + assert info in valid_exit_codes + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 1: # No printing (iprint == 0) + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver}_output.txt' + + # Decide whether the problem is truly constrained. + if present(constr): + is_constrained = (np.size(constr) > 0) + else: + is_constrained = present(cstrv) + + # Decide the constraint violation. + if present(cstrv): + cstrv_loc = cstrv + elif present(constr): + cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0. + else: + cstrv_loc = 0 + + # Decide the return message. + if info == FTARGET_ACHIEVED: + reason = 'the target function value is achieved.' + elif info == MAXFUN_REACHED: + reason = 'the objective function has been evaluated MAXFUN times.' + elif info == MAXTR_REACHED: + reason = 'the maximal number of trust region iterations has been reached.' + elif info == SMALL_TR_RADIUS: + reason = 'the trust region radius reaches its lower bound.' + elif info == TRSUBP_FAILED: + reason = 'a trust region step has failed to reduce the quadratic model.' + elif info == NAN_INF_X: + reason = 'NaN or Inf occurs in x.' + elif info == NAN_INF_F: + reason = 'the objective function returns NaN/+Inf.' + elif info == NAN_INF_MODEL: + reason = 'NaN or Inf occurs in the models.' + elif info == DAMAGING_ROUNDING: + reason = 'rounding errors are becoming damaging.' + elif info == NO_SPACE_BETWEEN_BOUNDS: + reason = 'there is no space between the lower and upper bounds of variable.' + elif info == ZERO_LINEAR_CONSTRAINT: + reason = 'one of the linear constraints has a zero gradient' + else: + reason = 'UNKNOWN EXIT FLAG' + ret_message = f'\nReturn from {solver} because {reason.strip()}' + + if np.size(x) <= 2: + x_message = f'\nThe corresponding X is: {x}' # Printed in one line + else: + x_message = f'\nThe corresponding X is:\n{x}' + + if is_constrained: + nf_message = (f'\nNumber of function values = {nf}{spaces}' + f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}') + else: + nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}' + + if is_constrained and present(constr): + if np.size(constr) <= 2: + constr_message = f'\nThe constraint value is: {constr}' # Printed in one line + else: + constr_message = f'\nThe constraint value is:\n{constr}' + else: + constr_message = '' + + # Print the message. + if abs(iprint) >= 2: + message = f'\n{ret_message}{nf_message}{x_message}{constr_message}\n' + else: + message = f'{ret_message}{nf_message}{x_message}{constr_message}\n' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + +def rhomsg(solver, iprint, nf, delta, f, rho, x, cstrv=None, constr=None, cpen=None): + ''' + This function prints messages when RHO is updated. + ''' + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 2: # No printing + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver.strip()}_output.txt' + + # Decide whether the problem is truly constrained. + if present(constr): + is_constrained = (np.size(constr) > 0) + else: + is_constrained = present(cstrv) + + # Decide the constraint violation. + if present(cstrv): + cstrv_loc = cstrv + elif present(constr): + cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0. + else: + cstrv_loc = 0 + + if present(cpen): + rho_message = (f'\nNew RHO = {rho}{spaces}Delta = {delta}{spaces}' + f'CPEN = {cpen}') + else: + rho_message = f'\nNew RHO = {rho}{spaces}Delta = {delta}' + + if np.size(x) <= 2: + x_message = f'\nThe corresponding X is: {x}' # Printed in one line + else: + x_message = f'\nThe corresponding X is:\n{x}' + + if is_constrained: + nf_message = (f'\nNumber of function values = {nf}{spaces}' + f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}') + else: + nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}' + + if is_constrained and present(constr): + if np.size(constr) <= 2: + constr_message = f'\nThe constraint value is: {constr}' # Printed in one line + else: + constr_message = f'\nThe constraint value is:\n{constr}' + else: + constr_message = '' + + # Print the message. + if abs(iprint) >= 3: + message = f'\n{rho_message}{nf_message}{x_message}{constr_message}' + else: + message = f'{rho_message}{nf_message}{x_message}{constr_message}' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + #====================# + # Calculation ends # + #====================# + + +def cpenmsg(solver, iprint, cpen): + ''' + This function prints a message when CPEN is updated. + ''' + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 2: # No printing + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver.strip()}_output.txt' + + # Print the message. + if abs(iprint) >= 3: + message = f'\nSet CPEN to {cpen}' + else: + message = f'\n\nSet CPEN to {cpen}' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + #====================# + # Calculation ends # + #====================# + + +def fmsg(solver, state, iprint, nf, delta, f, x, cstrv=None, constr=None): + ''' + This subroutine prints messages for each evaluation of the objective function. + ''' + + #====================# + # Calculation starts # + #====================# + + if abs(iprint) < 2: # No printing + return + elif iprint > 0: # Print the message to the standard out. + fname = '' + else: # Print the message to a file named FNAME. + fname = f'{solver.strip()}_output.txt' + + # Decide whether the problem is truly constrained. + if present(constr): + is_constrained = (np.size(constr) > 0) + else: + is_constrained = present(cstrv) + + # Decide the constraint violation. + if present(cstrv): + cstrv_loc = cstrv + elif present(constr): + cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0. + else: + cstrv_loc = 0 + + delta_message = f'\n{state} step with radius = {delta}' + + if is_constrained: + nf_message = (f'\nNumber of function values = {nf}{spaces}' + f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}') + else: + nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}' + + if np.size(x) <= 2: + x_message = f'\nThe corresponding X is: {x}' # Printed in one line + else: + x_message = f'\nThe corresponding X is:\n{x}' + + if is_constrained and present(constr): + if np.size(constr) <= 2: + constr_message = f'\nThe constraint value is: {constr}' # Printed in one line + else: + constr_message = f'\nThe constraint value is:\n{constr}' + else: + constr_message = '' + + # Print the message. + message = f'{delta_message}{nf_message}{x_message}{constr_message}' + if len(fname) > 0: + with open(fname, 'a') as f: f.write(message) + else: + print(message) + + #====================# + # Calculation ends # + #====================# diff --git a/python/src/prima/common/powalg.py b/python/src/prima/common/powalg.py new file mode 100644 index 0000000000..a44b37bc6e --- /dev/null +++ b/python/src/prima/common/powalg.py @@ -0,0 +1,118 @@ +import numpy as np +from prima.common.linalg import isminor, planerot +from prima.common.consts import EPS + + +def qradd_Rdiag(c, Q, Rdiag, n): + ''' + This function updates the QR factorization of an MxN matrix A of full column rank, attempting to + add a new column C to this matrix as the LAST column while maintaining the full-rankness. + Case 1. If C is not in range(A) (theoretically, it implies N < M), then the new matrix is np.hstack([A, C]) + Case 2. If C is in range(A), then the new matrix is np.hstack([A[:, :n-1], C]) + N.B.: + 0. Instead of R, this subroutine updates Rdiag, which is np.diag(R), with a size at most M and at + least min(m, n+1). The number is min(m, n+1) rather than min(m, n) as n may be augmented by 1 in + the function. + 1. With the two cases specified as above, this function does not need A as an input. + 2. The function changes only Q[:, nsave:m] (nsave is the original value of n) and + R[:, n-1] (n takes the updated value) + 3. Indeed, when C is in range(A), Powell wrote in comments that "set iOUT to the index of the + constraint (here, coloumn of A --- Zaikun) to be deleted, but branch if no suitable index can be + found". The idea is to replace a column of A by C so that the new matrix still has full rank + (such a column must exist unless C = 0). But his code essentially sets iout=n always. Maybe he + found this worked well enough in practice. Meanwhile, Powell's code includes a snippet that can + never be reached, which was probably intended to deal with the case that IOUT != n + ''' + m = Q.shape[1] + nsave = n # Needed for debugging (only) + + # As in Powell's COBYLA, CQ is set to 0 at the positions with CQ being negligible as per ISMINOR. + # This may not be the best choice if the subroutine is used in other contexts, e.g. LINCOA. + cq = c @ Q + cqa = abs(c) @ abs(Q) + # The line below basically makes an element of cq 0 if adding it to the corresponding element of + # cqa does not change the latter. + cq = np.array([0 if isminor(cqi, cqai) else cqi for cqi, cqai in zip(cq, cqa)]) + + # Update Q so that the columns of Q[:, n+1:m] are orthogonal to C. This is done by applying a 2D + # Givens rotation to Q[:, [k, k+1]] from the right to zero C' @ Q[:, k+1] out for K=n+1, ... m-1. + # Nothing will be done if n >= m-1 + for k in range(m-2, n-1, -1): + if abs(cq[k+1]) > 0: + # Powell wrote cq[k+1] != 0 instead of abs. The two differ if cq[k+1] is NaN. + # If we apply the rotation below when cq[k+1] = 0, then cq[k] will get updated to |cq[k]|. + G = planerot(cq[k:k+2]) + Q[:, [k, k+1]] = Q[:, [k, k+1]] @ G.T + cq[k] = np.linalg.norm(cq[k:k+2]) + + # Augment n by 1 if C is not in range(A) + # The two ifs cannot be merged as Fortran may evaluate cq[n+1] even if n>=m, leading to a segfault. + # TODO: Reword the above as appropriate for Python. RangeError as opposed to segafult perhaps + if n < m: + # Powell's condition for the following if: cq[n+1] != 0 + if abs(cq[n]) > EPS*2 and not isminor(cq[n], cqa[n]): + n += 1 + + # Update Rdiag so that Rdiag[n] = cq[n] = npdot(c, q[:, n]). Note that N may be been augmented. + if n - 1 >= 0 and n - 1 < m: # n >= m should not happen unless the input is wrong + Rdiag[n - 1] = cq[n - 1] + return Q, Rdiag, n + + +def qrexc_Rdiag(A, Q, Rdiag, i): # Used in COBYLA + ''' + This function updates the QR factorization for an MxN matrix A=Q@R so that the updated Q and + R form a QR factorization of [A_0, ..., A_{I-1}, A_{I+1}, ..., A_{N-1}, A_I] which is the matrix + obtained by rearranging columns [I, I+1, ... N-1] of A to [I+1, ..., N-1, I]. Here A is ASSUMED TO + BE OF FULL COLUMN RANK, Q is a matrix whose columns are orthogonal, and R, which is not present, + is an upper triangular matrix whose diagonal entries are nonzero. Q and R need not be square. + N.B.: + 0. Instead of R, this function updates Rdiag, which is np.diag(R), the size being n. + 1. With L = Q.shape[1] = R.shape[0], we have M >= L >= N. Most often L = M or N. + 2. This function changes only Q[:, i:] and Rdiag[i:] + 3. (NDB 20230919) In Python, i is either icon or nact - 2, whereas in FORTRAN it is either icon or nact - 1. + ''' + + # Sizes + m, n = A.shape + + # Preconditions + assert n >= 1 and n <= m + assert i >= 0 and i < n + assert len(Rdiag) == n + assert Q.shape[0] == m and Q.shape[1] >= n and Q.shape[1] <= m + # tol = max(1.0E-8, min(1.0E-1, 1.0E8 * EPS * m + 1)) + # assert isorth(Q, tol) # Costly! + + + if i < 0 or i >= n: + return Q, Rdiag + + # Let R be the upper triangular matrix in the QR factorization, namely R = Q.T@A. + # For each k, find the Givens rotation G with G@(R[k:k+2, :]) = [hypt, 0], and update Q[:, k:k+2] + # to Q[:, k:k+2]@(G.T). Then R = Q.T@A is an upper triangular matrix as long as A[:, [k, k+1]] is + # updated to A[:, [k+1, k]]. Indeed, this new upper triangular matrix can be obtained by first + # updating R[[k, k+1], :] to G@(R[[k, k+1], :]) and then exchanging its columns K and K+1; at the same + # time, entries k and k+1 of R's diagonal Rdiag become [hypt, -(Rdiag[k+1]/hypt)*RDiag[k]]. + # After this is done for each k = 0, ..., n-2, we obtain the QR factorization of the matrix that + # rearranges columns [i, i+1, ... n-1] of A as [i+1, ..., n-1, i]. + # Powell's code, however, is slightly different: before everything, he first exchanged columns k and + # k+1 of Q (as well as rows k and k+1 of R). This makes sure that the entries of the update Rdiag + # are all positive if it is the case for the original Rdiag. + for k in range(i, n-1): + G = planerot([Rdiag[k+1], np.dot(Q[:, k], A[:, k+1])]) + Q[:, [k, k+1]] = Q[:, [k+1, k]]@(G.T) + # Powell's code updates Rdiag in the following way: + # hypt = np.sqrt(Rdiag[k+1]**2 + np.dot(Q[:, k], A[:, k+1])**2) + # Rdiag[[k, k+1]] = [hypt, (Rdiag[k+1]/hypt)*Rdiag[k]] + # Note that Rdiag[n-1] inherits all rounding in Rdiag[i:n-1] and Q[:, i:n-1] and hence contains + # significant errors. Thus we may modify Powell's code to set only Rdiag[k] = hypt here and then + # calculate Rdiag[n] by an inner product after the loop. Nevertheless, we simple calculate RDiag + # from scratch below. + + # Calculate Rdiag(i:n) from scratch + Rdiag[i:n-1] = [np.dot(Q[:, k], A[:, k+1]) for k in range(i, n-1)] + Rdiag[n-1] = np.dot(Q[:, n-1], A[:, i]) + # Rdiag = np.array(Rdiag) + + return Q, Rdiag \ No newline at end of file diff --git a/python/src/prima/common/preproc.py b/python/src/prima/common/preproc.py new file mode 100644 index 0000000000..c92844b554 --- /dev/null +++ b/python/src/prima/common/preproc.py @@ -0,0 +1,267 @@ +from prima.common.consts import DEBUGGING, EPS, IPRINT_DEFAULT, FTARGET_DEFAULT, \ + MIN_MAXFILT, MAXFILT_DEFAULT, MAXHISTMEM, ETA1_DEFAULT, ETA2_DEFAULT, \ + GAMMA1_DEFAULT, GAMMA2_DEFAULT, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ + CTOL_DEFAULT, CWEIGHT_DEFAULT +from prima.common.present import present +from warnings import warn +import numpy as np + + +def preproc(solver, num_vars, iprint, maxfun, maxhist, ftarget, rhobeg, rhoend, + num_constraints=None, npt=None, maxfilt=None, ctol=None, cweight=None, + eta1=None, eta2=None, gamma1=None, gamma2=None, is_constrained=None, has_rhobeg=None, + honour_x0=None, xl=None, xu=None, x0=None): + ''' + This subroutine preprocesses the inputs. It does nothing to the inputs that are valid. + ''' + # Preconditions + if DEBUGGING: + assert num_vars >= 1 + if present(num_constraints): + assert num_constraints >= 0 + assert num_constraints == 0 or solver.lower() == 'cobyla' + if solver.lower() == 'cobyla' and present(num_constraints) and present(is_constrained): + assert num_constraints == 0 or is_constrained + if solver.lower() == 'bobyqa': + assert present(xl) and present(xu) + assert all(xu - xl >= 2 * EPS) + if present(honour_x0): + assert solver.lower() == 'bobyqa' and present(has_rhobeg) and present(xl) and present(xu) and present(x0) + + #====================# + # Calculation starts # + #====================# + + # Read num_constraints, if necessary + num_constraints = num_constraints if (present(num_constraints) and solver.lower() == 'cobyla') else 0 + + # Decide whether the problem is truly constrained + is_constrained = is_constrained if (present(is_constrained)) else num_constraints > 0 + + # Validate IPRINT + if np.abs(iprint) > 3: + iprint = IPRINT_DEFAULT + warn(f'{solver}: Invalid IPRINT; it should be 0, 1, -1, 2, -2, 3, or -3; it is set to {iprint}') + + # Validate MAXFUN + if solver.lower() == 'uobyqa': + min_maxfun = (num_vars + 1) * (num_vars + 2) / 2 + 1 + min_maxfun_str = '(N+1)(N+2)/2 + 1' + elif solver.lower() == 'cobyla': + min_maxfun = num_vars + 2 + min_maxfun_str = 'num_vars + 2' + else: # CASE ('NEWUOA', 'BOBYQA', 'LINCOA') + min_maxfun = num_vars + 3 + min_maxfun_str = 'num_vars + 3' + if maxfun < min_maxfun: + maxfun = min_maxfun + warn(f'{solver}: Invalid MAXFUN; it should be at least {min_maxfun_str}; it is set to {maxfun}') + + # Validate MAXHIST + if maxhist < 0: + maxhist = maxfun + warn(f'{solver}: Invalid MAXHIST; it should be a nonnegative integer; it is set to {maxhist}') + maxhist = min(maxhist, maxfun) # MAXHIST > MAXFUN is never needed. + + # Validate FTARGET + if np.isnan(ftarget): + ftarget = FTARGET_DEFAULT + warn(f'{solver}: Invalid FTARGET; it should be a real number; it is set to {ftarget}') + + # Validate NPT + if (solver.lower() == 'newuoa' or solver.lower() == 'bobyqa' or solver.lower() == 'lincoa') and present(npt): + if (npt < num_vars + 2 or npt > min(maxfun - 1, ((num_vars + 2) * (num_vars + 1)) / 2)): + npt = int(min(maxfun - 1, 2 * num_vars + 1)) + warn(f'{solver}: Invalid NPT; it should be an integer in the interval [N+2, (N+1)(N+2)/2] and less than MAXFUN; it is set to {npt}') + + # Validate MAXFILT + if present(maxfilt) and (solver.lower() == 'lincoa' or solver.lower() == 'cobyla'): + maxfilt_in = maxfilt + if maxfilt < 1: + maxfilt = MAXFILT_DEFAULT # The inputted MAXFILT is obviously wrong. + else: + maxfilt = max(MIN_MAXFILT, maxfilt) # The inputted MAXFILT is too small. + # Further revise MAXFILT according to MAXHISTMEM. + if solver.lower() == 'lincoa': + unit_memo = (num_vars + 2) * np.dtype(float).itemsize + elif solver.lower() == 'cobyla': + unit_memo = (num_constraints + num_vars + 2) * np.dtype(float).itemsize + else: + unit_memo = 1 + # We cannot simply set MAXFILT = MIN(MAXFILT, MAXHISTMEM/...), as they may not have + # the same kind, and compilers may complain. We may convert them, but overflow may occur. + if maxfilt > MAXHISTMEM / unit_memo: + maxfilt = int(MAXHISTMEM / unit_memo) # Integer division. + maxfilt = min(maxfun, max(MIN_MAXFILT, maxfilt)) + if is_constrained: + if maxfilt_in < 1: + warn(f'{solver}: Invalid MAXFILT; it should be a positive integer; it is set to {maxfilt}') + elif maxfilt_in < min(maxfun, MIN_MAXFILT): + warn(f'{solver}: MAXFILT is too small; it is set to {maxfilt}') + elif maxfilt < min(maxfilt_in, maxfun): + warn(f'{solver}: MAXFILT is set to {maxfilt} due to memory limit') + + # Validate ETA1 and ETA2 + eta1_local = eta1 if present(eta1) else ETA1_DEFAULT + eta2_local = eta2 if present(eta2) else ETA2_DEFAULT + + # When the difference between ETA1 and ETA2 is tiny, we force them to equal. + # See the explanation around RHOBEG and RHOEND for the reason. + if present(eta1) and present(eta2): + if np.abs(eta1 - eta2) < 1.0E2 * EPS * max(np.abs(eta1), 1): + eta2 = eta1 + + if present(eta1): + if np.isnan(eta1): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + eta1 = ETA1_DEFAULT + elif eta1 < 0 or eta1 >= 1: + # Take ETA2 into account if it has a valid value. + if present(eta2) and eta2_local > 0 and eta2_local <= 1: + eta1 = max(EPS, eta2 / 7.0) + else: + eta1 = ETA1_DEFAULT + warn(f'{solver}: Invalid ETA1; it should be in the interval [0, 1) and not more than ETA2; it is set to {eta1}') + + if present(eta2): + if np.isnan(eta2): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + eta2 = ETA2_DEFAULT + elif present(eta1) and (eta2 < eta1_local or eta2 > 1): + eta2 = (eta1 + 2) / 3.0 + warn(f'{solver}: Invalid ETA2; it should be in the interval [0, 1) and not less than ETA1; it is set to {eta2}') + + # Validate GAMMA1 and GAMMA2 + if present(gamma1): + if np.isnan(gamma1): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + gamma1 = GAMMA1_DEFAULT + elif gamma1 <= 0 or gamma1 >= 1: + gamma1 = GAMMA1_DEFAULT + warn(f'{solver}: Invalid GAMMA1; it should in the interval (0, 1); it is set to {gamma1}') + + if present(gamma2): + if np.isnan(gamma2): + # In this case, we take the value hard coded in Powell's original code + # without any warning. It is useful when interfacing with MATLAB/Python. + gamma2 = GAMMA2_DEFAULT + elif gamma2 < 1 or np.isinf(gamma2): + gamma2 = GAMMA2_DEFAULT + warn(f'{solver}: Invalid GAMMA2; it should be a real number not less than 1; it is set to {gamma2}') + + # Validate RHOBEG and RHOEND + + if np.abs(rhobeg - rhoend) < 1.0e2 * EPS * np.maximum(np.abs(rhobeg), 1): + # When the data is passed from the interfaces (e.g., MEX) to the Fortran code, RHOBEG, and RHOEND + # may change a bit. It was observed in a MATLAB test that MEX passed 1 to Fortran as + # 0.99999999999999978. Therefore, if we set RHOEND = RHOBEG in the interfaces, then it may happen + # that RHOEND > RHOBEG, which is considered as an invalid input. To avoid this situation, we + # force RHOBEG and RHOEND to equal when the difference is tiny. + rhoend = rhobeg + + # Revise the default values for RHOBEG/RHOEND according to the solver. + if solver.lower() == 'bobyqa': + rhobeg_default = np.maximum(EPS, np.min(RHOBEG_DEFAULT, np.min(xu - xl) / 4.0)) + rhoend_default = np.maximum(EPS, np.min(0.1 * rhobeg_default, RHOEND_DEFAULT)) + else: + rhobeg_default = RHOBEG_DEFAULT + rhoend_default = RHOEND_DEFAULT + + if solver.lower() == 'bobyqa': + # Do NOT merge the IF below into the ELIF above! Otherwise, XU and XL may be accessed even if + # the solver is not BOBYQA, because the logical evaluation is not short-circuit. + if rhobeg > np.min(xu - xl) / 2: + # Do NOT make this revision if RHOBEG not positive or not finite, because otherwise RHOBEG + # will get a huge value when XU or XL contains huge values that indicate unbounded variables. + rhobeg = np.min(xu - xl) / 4.0 # Here, we do not take RHOBEG_DEFAULT. + warn(f'{solver}: Invalid RHOBEG; {solver} requires 0 < RHOBEG <= np.min(XU-XL)/2; it is set to np.min(XU-XL)/4') + if rhobeg <= 0 or np.isnan(rhobeg) or np.isinf(rhobeg): + # Take RHOEND into account if it has a valid value. We do not do this if the solver is BOBYQA, + # which requires that RHOBEG <= (XU-XL)/2. + if np.isfinite(rhoend) and rhoend > 0 and solver.lower() != 'bobyqa': + rhobeg = max(10 * rhoend, rhobeg_default) + else: + rhobeg = rhobeg_default + warn(f'{solver}: Invalid RHOBEG; it should be a positive number; it is set to {rhobeg}') + + if rhoend <= 0 or rhobeg < rhoend or np.isnan(rhoend) or np.isinf(rhoend): + rhoend = max(EPS, min(0.1 * rhobeg, rhoend_default)) + warn(f'{solver}: Invalid RHOEND; it should be a positive number and RHOEND <= RHOBEG; it is set to {rhoend}') + + # For BOBYQA, revise X0 or RHOBEG so that the distance between X0 and the inactive bounds is at + # least RHOBEG. If HONOUR_X0 == TRUE, revise RHOBEG if needed; otherwise, revise HONOUR_X0 if needed. + if present(honour_x0): + if honour_x0: + rhobeg_old = rhobeg; + lbx = np.isfinite(xl) & (x0 - xl <= EPS * np.maximum(1, np.abs(xl))) # X0 essentially equals XL + ubx = np.isfinite(xu) & (x0 - xu >= -EPS * np.maximum(1, np.abs(xu))) # X0 essentially equals XU + x0[lbx] = xl[lbx] + x0[ubx] = xu[ubx] + rhobeg = max(EPS, np.min([rhobeg, x0[~lbx] - xl[~lbx], xu[~ubx] - x0[~ubx]])) + if rhobeg_old - rhobeg > EPS * max(1, rhobeg_old): + rhoend = max(EPS, min(0.1 * rhobeg, rhoend)) # We do not revise RHOEND unless RHOBEG is truly revised. + if has_rhobeg: + warn(f'{solver}: RHOBEG is revised to {rhobeg} and RHOEND to at most 0.1*RHOBEG so that the distance between X0 and the inactive bounds is at least RHOBEG') + else: + rhoend = np.minimum(rhoend, rhobeg) # This may update RHOEND slightly. + else: + x0_old = x0 # Recorded to see whether X0 is really revised. + # N.B.: The following revision is valid only if XL <= X0 <= XU and RHOBEG <= MINVAL(XU-XL)/2, + # which should hold at this point due to the revision of RHOBEG and moderation of X0. + # The cases below are mutually exclusive in precise arithmetic as MINVAL(XU-XL) >= 2*RHOBEG. + lbx = x0 <= xl + 0.5 * rhobeg + lbx_plus = (x0 > xl + 0.5 * rhobeg) & (x0 < xl + rhobeg) + ubx = x0 >= xu - 0.5 * rhobeg + ubx_minus = (x0 < xu - 0.5 * rhobeg) & (x0 > xu - rhobeg) + x0[lbx] = xl[lbx] + x0[lbx_plus] = xl[lbx_plus] + rhobeg + x0[ubx] = xu[ubx] + x0[ubx_minus] = xu[ubx_minus] - rhobeg + + if (any(np.abs(x0_old - x0) > 0)): + warn(f'{solver}: X0 is revised so that the distance between X0 and the inactive bounds is at least RHOBEG set HONOUR_X0 to .TRUE. if you prefer to keep X0 unchanged') + + # Validate CTOL (it can be 0) + if (present(ctol)): + if (np.isnan(ctol) or ctol < 0): + ctol = CTOL_DEFAULT + if (is_constrained): + warn(f'{solver}: Invalid CTOL; it should be a nonnegative number; it is set to {ctol}') + + # Validate CWEIGHT (it can be +Inf) + if (present(cweight)): + if (np.isnan(cweight) or cweight < 0): + cweight = CWEIGHT_DEFAULT + if (is_constrained): + warn(f'{solver}: Invalid CWEIGHT; it should be a nonnegative number; it is set to {cweight}') + + #====================! + # Calculation ends ! + #====================! + + # Postconditions + if DEBUGGING: + assert abs(iprint) <= 3 + assert maxhist >= 0 and maxhist <= maxfun + if present(npt): + assert maxfun >= npt + 1 + assert npt >= 3 + if present(maxfilt): + assert maxfilt >= np.minimum(MIN_MAXFILT, maxfun) and maxfilt <= maxfun + if present(eta1) and present(eta2): + assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1 + if present(gamma1) and present(gamma2): + assert gamma1 > 0 and gamma1 < 1 and gamma2 > 1 + assert rhobeg >= rhoend and rhoend > 0 + if solver.lower() == 'bobyqa': + assert all(rhobeg <= (xu - xl) / 2) + assert all(np.isfinite(x0)) + assert all(x0 >= xl and (x0 <= xl or x0 >= xl + rhobeg)) + assert all(x0 <= xu and (x0 >= xu or x0 <= xu - rhobeg)) + if present(ctol): + assert ctol >= 0 + + return iprint, maxfun, maxhist, ftarget, rhobeg, rhoend, npt, maxfilt, ctol, cweight, eta1, eta2, gamma1, gamma2, x0 \ No newline at end of file diff --git a/python/src/prima/common/present.py b/python/src/prima/common/present.py new file mode 100644 index 0000000000..15f8b47b67 --- /dev/null +++ b/python/src/prima/common/present.py @@ -0,0 +1,5 @@ +def present(x): + ''' + This is a Python equivalent of the Fortran 'present' function for optional arguments. + ''' + return x is not None \ No newline at end of file diff --git a/python/src/prima/common/ratio.py b/python/src/prima/common/ratio.py new file mode 100644 index 0000000000..80712f5104 --- /dev/null +++ b/python/src/prima/common/ratio.py @@ -0,0 +1,44 @@ +from prima.common.consts import DEBUGGING, REALMAX +import numpy as np + +def redrat(ared, pred, rshrink): + ''' + This function evaluates the reduction ratio of a trust-region step, handling inf/nan properly. + ''' + + # Preconditions + if DEBUGGING: + assert rshrink >= 0 + + #====================# + # Calculation starts # + #====================# + + if np.isnan(ared): + # This should not happen in unconstrained problems due to the moderated extreme barrier. + ratio = -REALMAX + elif np.isnan(pred) or pred <= 0: + # The trust-region subproblem solver fails in this rare case. Instead of terminating as Powell's + # original code does, we set ratio as follows so that the solver may continue to progress. + if ared > 0: + # The trial point will be accepted, but the trust-region radius will be shrunk if rshrink>0 + ratio = rshrink/2 + else: + # Set the ration to a large negative number to signify a bad trust-region step, so that the + # solver will check whether to take a geometry step or reduce rho. + ratio = -REALMAX + elif np.isposinf(pred) and np.isposinf(ared): + ratio = 1 # ared/pred = NaN if calculated directly + elif np.isposinf(pred) and np.isneginf(ared): + ratio = -REALMAX # ared/pred = NaN if calculated directly + else: + ratio = ared/pred + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert not np.isnan(ratio) + return ratio \ No newline at end of file diff --git a/python/src/prima/common/redrho.py b/python/src/prima/common/redrho.py new file mode 100644 index 0000000000..fd11baf92d --- /dev/null +++ b/python/src/prima/common/redrho.py @@ -0,0 +1,37 @@ +from prima.common.consts import DEBUGGING +import numpy as np + +def redrho(rho_in, rhoend): + ''' + This function calculates RHO when it needs to be reduced. + The sceme is shared by UOBYQA, NEWUOA, BOBYQA, LINCOA. For COBYLA, Powell's code reduces RHO by + 'RHO *= 0.5; if RHO <= 1.5 * RHOEND: RHO = RHOEND' as specified in (11) of the COBYLA + paper. However, this scheme seems to work better, especially after we introduce DELTA. + ''' + + # Preconditions + if DEBUGGING: + assert rho_in > rhoend > 0 + + #====================# + # Calculation starts # + #====================# + + rho_ratio = rho_in / rhoend + + if rho_ratio > 250: + rho = 0.1 * rho_in + elif rho_ratio <= 16: + rho = rhoend + else: + rho = np.sqrt(rho_ratio) * rhoend # rho = np.sqrt(rho * rhoend) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert rho_in > rho >= rhoend + + return rho \ No newline at end of file diff --git a/python/src/prima/common/selectx.py b/python/src/prima/common/selectx.py new file mode 100644 index 0000000000..8e33f15837 --- /dev/null +++ b/python/src/prima/common/selectx.py @@ -0,0 +1,293 @@ +''' +This module provides subroutines that ensure the returned X is optimal among all the calculted +points in the sense that no other point achieves both lower function value and lower constraint +violation at the same time. This module is needed only in the constrained case. + +Coded by Zaikun ZHANG (www.zhangzk.net). + +Python implementation by Nickolai Belakovski +''' + +import numpy as np +import numpy.typing as npt +from prima.common.consts import DEBUGGING, EPS, CONSTRMAX, REALMAX, FUNCMAX +from prima.common.present import present + +def isbetter(f1: float, c1: float, f2: float, c2: float, ctol: float) -> bool: + ''' + This function compares whether FC1 = (F1, C1) is (strictly) better than FC2 = (F2, C2), which + basically means that (F1 < F2 and C1 <= C2) or (F1 <= F2 and C1 < C2). + It takes care of the cases where some of these values are NaN or Inf, even though some cases + should never happen due to the moderated extreme barrier. + At return, BETTER = TRUE if and only if (F1, C1) is better than (F2, C2). + Here, C means constraint violation, which is a nonnegative number. + ''' + + # Preconditions + if DEBUGGING: + assert not any(np.isnan([f1, c1]) | np.isposinf([f2, c2])) + assert not any(np.isnan([f2, c2]) | np.isposinf([f2, c2])) + assert c1 >= 0 and c2 >= 0 + assert ctol >= 0 + + #====================# + # Calculation starts # + #====================# + + is_better = False + # Even though NaN/+Inf should not occur in FC1 or FC2 due to the moderated extreme barrier, for + # security and robustness, the code below does not make this assumption. + is_better = is_better or (any(np.isnan([f1, c1])) and not any(np.isnan([f2, c2]))) + + is_better = is_better or (f1 < f2 and c1 <= c2) + is_better = is_better or (f1 <= f2 and c1 < c2) + + # If C1 <= CTOL and C2 is significantly larger/worse than CTOL, i.e., C2 > MAX(CTOL,CREF), + # then FC1 is better than FC2 as long as F1 < REALMAX. Normally CREF >= CTOL so MAX(CTOL, CREF) + # is indeed CREF. However, this may not be true if CTOL > 1E-1*CONSTRMAX. + cref = 10 * max(EPS, min(ctol, 1.0E-2 * CONSTRMAX)) # The MIN avoids overflow. + is_better = is_better or (f1 < REALMAX and c1 <= ctol and (c2 > max(ctol, cref) or np.isnan(c2))) + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert not (is_better and f1 >= f2 and c1 >= c2) + assert is_better or not (f1 <= f2 and c1 < c2) + assert is_better or not (f1 < f2 and c1 <= c2) + + return is_better + + +def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None, confilt=None): + ''' + This subroutine saves X, F, and CSTRV in XFILT, FFILT, and CFILT (and CONSTR in CONFILT + if they are present), unless a vector in XFILT[:, :NFILT] is better than X. + If X is better than some vectors in XFLIT[:, :NFILT] then these vectors will be + removed. If X is not better than any of XFILT[:, :NFILT], but NFILT == MAXFILT, + then we remove a column from XFILT according to the merit function + PHI = FFILT + CWEIGHT * max(CFILT - CTOL, 0) + N.B.: + 1. Only XFILT[:, :NFILT] and FFILT[:, :NFILT] etc contains valid information, + while XFILT[:, NFILT+1:MAXFILT] and FFILT[:, NFILT+1:MAXFILT] etc are not + initialized yet. + 2. We decide whether and X is better than another by the ISBETTER function + ''' + + # Sizes + if present(constr): + num_constraints = len(constr) + else: + num_constraints = 0 + num_vars = len(x) + maxfilt = len(ffilt) + + # Preconditions + if DEBUGGING: + # Check the size of X. + assert num_vars >= 1 + # Check CWEIGHT and CTOL + assert cweight >= 0 + assert ctol >= 0 + # Check NFILT + assert nfilt >= 0 and nfilt <= maxfilt + # Check the sizes of XFILT, FFILT, CFILT. + assert maxfilt >= 1 + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert np.size(cfilt) == maxfilt + # Check the values of XFILT, FFILT, CFILT. + assert not (np.isnan(xfilt[:, :nfilt])).any() + assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt])) + assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt])) + # Check the values of X, F, CSTRV. + # X does not contain NaN if X0 does not and the trust-region/geometry steps are proper. + assert not any(np.isnan(x)) + # F cannot be NaN/+Inf due to the moderated extreme barrier. + assert not (np.isnan(f) | np.isposinf(f)) + # CSTRV cannot be NaN/+Inf due to the moderated extreme barrier. + assert not (cstrv < 0 | np.isnan(cstrv) | np.isposinf(cstrv)) + # Check CONSTR and CONFILT. + assert present(constr) == present(confilt) + if present(constr): + # CONSTR cannot contain NaN/-Inf due to the moderated extreme barrier. + assert not any(np.isnan(constr) | np.isneginf(constr)) + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any() + + #====================# + # Calculation starts # + #====================# + + # Return immeditely if any column of XFILT is better than X. TODO: Lazy evaluation + if any([isbetter(ffilt_i, cfilt_i, f, cstrv, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]): + return nfilt, cfilt, ffilt, xfilt, confilt + + # Decide which columns of XFILT to keep. + keep = np.logical_not([isbetter(f, cstrv, ffilt_i, cfilt_i, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]) + + # If NFILT == MAXFILT and X is not better than any column of XFILT, then we remove the worst column + # of XFILT according to the merit function PHI = FFILT + CWEIGHT * MAX(CFILT - CTOL, ZERO). + if sum(keep) == maxfilt: # In this case, NFILT = SIZE(KEEP) = COUNT(KEEP) = MAXFILT > 0. + # TODO: This code path has not been vetted + cfilt_shifted = np.maximum(cfilt - ctol, 0) + if cweight <= 0: + phi = ffilt + elif np.isposinf(cweight): + phi = cfilt_shifted + # We should not use CFILT here; if MAX(CFILT_SHIFTED) is attained at multiple indices, then + # we will check FFILT to exhaust the remaining degree of freedom. + else: + phi = np.maximum(ffilt, -REALMAX) + phi = np.nan_to_num(phi, nan=-REALMAX) # Replace NaN with -REALMAX and +/- inf with large numbers + phi += cweight * cfilt_shifted + # We select X to maximize PHI. In case there are multiple maximizers, we take the one with the + # largest CSTRV_SHIFTED; if there are more than one choices, we take the one with the largest F; + # if there are several candidates, we take the one with the largest CSTRV; if the last comparison + # still leads to more than one possibilities, then they are equally bad and we choose the first. + # N.B.: + # 1. This process is the opposite of selecting KOPT in SELECTX. + # 2. In finite-precision arithmetic, PHI_1 == PHI_2 and CSTRV_SHIFTED_1 == CSTRV_SHIFTED_2 do + # not ensure that F_1 == F_2! + phimax = max(phi) + cref = max(cfilt_shifted[phi >= phimax]) + fref = max(ffilt[cfilt_shifted >= cref]) + kworst = np.where(cfilt == max(cfilt[ffilt <= fref]))[0][0] + if kworst < 0 or kworst >= len(keep): # For security. Should not happen. + kworst = 0 + keep[kworst] = False + + # Keep the good xfilt values and remove all the ones that are strictly worse than the new x. + nfilt = sum(keep) + index_to_keep = np.where(keep)[0] + xfilt[:, :nfilt] = xfilt[:, index_to_keep] + ffilt[:nfilt] = ffilt[index_to_keep] + cfilt[:nfilt] = cfilt[index_to_keep] + if confilt is not None and constr is not None: + confilt[:, :nfilt] = confilt[:, index_to_keep] + + # Once we have removed all the vectors that are strictly worse than x, + # we add x to the filter. + xfilt[:, nfilt] = x + ffilt[nfilt] = f + cfilt[nfilt] = cstrv + if confilt is not None and constr is not None: + confilt[:, nfilt] = constr + nfilt += 1 # In Python we need to increment the index afterwards + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + # Check NFILT and the sizes of XFILT, FFILT, CFILT. + assert nfilt >= 1 and nfilt <= maxfilt + assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt + assert np.size(ffilt) == maxfilt + assert np.size(cfilt) == maxfilt + # Check the values of XFILT, FFILT, CFILT. + assert not (np.isnan(xfilt[:, :nfilt])).any() + assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt])) + assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt])) + # Check that no point in the filter is better than X, and X is better than no point. + assert not any([isbetter(ffilt_i, cfilt_i, f, cstrv, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]) + assert not any([isbetter(f, cstrv, ffilt_i, cfilt_i, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])]) + # Check CONFILT. + if present(confilt): + assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt + assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any() + + + return nfilt, cfilt, ffilt, xfilt, confilt + + +def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float): + ''' + This subroutine selects X according to FHIST and CHIST, which represents (a part of) history + of F and CSTRV. Normally, FHIST and CHIST are not the full history but only a filter, e.g. ffilt + and CFILT generated by SAVEFILT. However, we name them as FHIST and CHIST because the [F, CSTRV] + in a filter should not dominate each other, but this subroutine does NOT assume such a property. + N.B.: CTOL is the tolerance of the constraint violation (CSTRV). A point is considered feasible if + its constraint violation is at most CTOL. Not that CTOL is absolute, not relative. + ''' + + # Sizes + nhist = len(fhist) + + # Preconditions + if DEBUGGING: + assert nhist >= 1 + assert np.size(chist) == nhist + assert not any(np.isnan(fhist) | np.isposinf(fhist)) + assert not any(chist < 0 | np.isnan(chist) | np.isposinf(chist)) + assert cweight >= 0 + assert ctol >= 0 + + #====================# + # Calculation starts # + #====================# + + # We select X among the points with F < FREF and CSTRV < CREF. + # Do NOT use F <= FREF, because F == FREF (FUNCMAX or REALMAX) may mean F == INF in practice! + if any(np.logical_and(fhist < FUNCMAX, chist < CONSTRMAX)): + fref = FUNCMAX + cref = CONSTRMAX + elif any(np.logical_and(fhist < REALMAX, chist < CONSTRMAX)): + fref = REALMAX + cref = CONSTRMAX + elif any(np.logical_and(fhist < FUNCMAX, chist < REALMAX)): + fref = FUNCMAX + cref = REALMAX + else: + fref = REALMAX + cref = REALMAX + + if not any(np.logical_and(fhist < fref, chist < cref)): + kopt = nhist - 1 + else: + # Shift the constraint violations by ctol, so that cstrv <= ctol is regarded as no violation. + chist_shifted = np.maximum(chist - ctol, 0) + # cmin is the minimal shift constraint violation attained in the history. + cmin = np.min(chist_shifted[fhist < fref]) + # We consider only the points whose shifted constraint violations are at most the cref below. + # N.B.: Without taking np.maximum(EPS, .), cref would be 0 if cmin = 0. In that case, asking for + # cstrv_shift < cref would be WRONG! + cref = np.maximum(EPS, 2*cmin) + # We use the following phi as our merit function to select X. + if cweight <= 0: + phi = fhist + elif np.isposinf(cweight): + phi = chist_shifted + # We should not use chist here; if np.minimum(chist_shifted) is attained at multiple indices, then + # we will check fhist to exhaust the remaining degree of freedom. + else: + phi = np.maximum(fhist, -REALMAX) + cweight * chist_shifted + # np.maximum(fhist, -REALMAX) makes sure that phi will not contain NaN (unless there is a bug). + + # We select X to minimize phi subject to f < fref and cstrv_shift <= cref (see the comments + # above for the reason of taking "<" and "<=" in these two constraints). In case there are + # multiple minimizers, we take the one with the least cstrv_shift; if there is more than one + # choice, we take the one with the least f; if there are several candidates, we take the one + # with the least cstrv; if the last comparison still leads to more than one possibility, then + # they are equally good and we choose the first. + # N.B.: + # 1. This process is the opposite of selecting kworst in savefilt + # 2. In finite-precision arithmetic, phi_2 == phi_2 and cstrv_shift_1 == cstrv_shifted_2 do + # not ensure thatn f_1 == f_2! + phimin = np.min(phi[np.logical_and(fhist < fref, chist_shifted <= cref)]) + cref = np.min(chist_shifted[np.logical_and(fhist < fref, phi <= phimin)]) + fref = np.min(fhist[chist_shifted <= cref]) + kopt = np.where(chist == np.min(chist[fhist <= fref]))[0][0] + + #==================# + # Calculation ends # + #==================# + + # Postconditions + if DEBUGGING: + assert kopt >= 0 and kopt < nhist + assert not any([isbetter(fhisti, chisti, fhist[kopt], chist[kopt], ctol) for fhisti, chisti in zip(fhist[:nhist], chist[:nhist])]) + + return kopt \ No newline at end of file diff --git a/python/src/prima/examples/cobyla/cobyla_example.py b/python/src/prima/examples/cobyla/cobyla_example.py new file mode 100644 index 0000000000..7e4d6d4282 --- /dev/null +++ b/python/src/prima/examples/cobyla/cobyla_example.py @@ -0,0 +1,100 @@ +import numpy as np +from prima.cobyla.cobyla import cobyla + + +def test_chebyquad(): + f, _ = calcfc_chebyquad([1, 2]) + assert np.isclose(f, 91+1/9, atol=1e-6) + + +def calcfc_chebyquad(x): + x = np.array(x) + n = len(x) # or shape? + y = np.zeros((n + 1, n + 1)) + y[:n, 0] = 1 + y[:n, 1] = 2*x - 1 + for i in range(1, n): + y[:n, i+1] = 2*y[:n, 1] * y[:n, i] - y[:n, i - 1] + + f = 0 + for i in range(n+1): + tmp = sum(y[0:n, i]) / n + if i % 2 == 0: + tmp += 1/((i)**2 - 1) + f += tmp**2 + constr = np.zeros(0) + return f, constr + +def calcfc_hexagon(x): + # Test problem 10 in Powell's original algorithm + + assert len(x) == 9 + + f = -0.5 * (x[0] * x[3] - x[1] * x[2] + x[2] * x[8] - x[4] * x[8] + x[4] * x[7] - x[5] * x[6]) + constr = np.zeros(14) + constr[0] = 1 - x[2]**2 - x[3]**2 + constr[1] = 1 - x[8]**2 + constr[2] = 1 - x[4]**2 - x[5]**2 + constr[3] = 1 - x[1-1]**2 - (x[1] - x[8])**2 + constr[4] = 1 - (x[1-1] - x[4])**2 - (x[1] - x[5])**2 + constr[5] = 1 - (x[1-1] - x[6])**2 - (x[1] - x[7])**2 + constr[6] = 1 - (x[2] - x[4])**2 - (x[3] - x[5])**2 + constr[7] = 1 - (x[2] - x[6])**2 - (x[3] - x[7])**2 + constr[8] = 1 - x[6]**2 - (x[7] - x[8])**2 + constr[9] = x[1-1] * x[3] - x[1] * x[2] + constr[10] = x[2] * x[8] + constr[11] = -x[4] * x[8] + constr[12] = x[4] * x[7] - x[5] * x[6] + constr[13] = x[8] + + return f, constr + + +if __name__ == "__main__": + n_chebyquad = 6 + + # The following lines illustrates how to call the solver to solve the Chebyquad problem. + x_chebyquad = np.array([i/(n_chebyquad+1) for i in range(1, n_chebyquad+1)]) # Starting point + m = 0 # Dimension of constraints. M must be specified correctly, or the program will crash! + result = cobyla(calcfc_chebyquad, m, x_chebyquad) # This call will not print anything + + # In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + # and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + # take their default values coded in the solver. + x_chebyquad = np.array([i/(n_chebyquad+1) for i in range(1, n_chebyquad+1)]) # Starting point + result = cobyla(calcfc_chebyquad, m, x_chebyquad, rhobeg=0.2 * x_chebyquad[0], iprint=1) + + # The following lines illustrates how to call the solver to solve the Hexagon problem. + x_hexagon = np.zeros(9) + 2 + m_hexagon = 14 # Dimension of constraints. M must the specified correctly, or the program will crash! + result = cobyla(calcfc_hexagon, m_hexagon, x_hexagon) # This call will not print anything. + + # In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + # and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + # take their default values coded in the solver. Note that CONSTR is an output, which will be set to + # the value of CONSTR(X_HEXAGON) when the solver returns. + result = cobyla(calcfc_hexagon, m_hexagon, x_hexagon, rhobeg=1.0, rhoend=1.0e-3, iprint=1) + + ''' + ! The following lines illustrates how to call the solver to solve the Chebyquad problem. + x_chebyquad = [(real(i, RP) / real(n_chebyquad + 1, RP), i=1, n_chebyquad)] ! Starting point + m = 0 ! Dimension of constraints. M must the specified correctly, or the program will crash! + call cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv) ! This call will not print anything. + + ! In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + ! and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + ! take their default values coded in the solver. + x_chebyquad = [(real(i, RP) / real(n_chebyquad + 1, RP), i=1, n_chebyquad)] ! Starting point + call cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv, rhobeg=0.2_RP * x_chebyquad(1), iprint=1, nf=nf, info=info) + + ! The following lines illustrates how to call the solver to solve the Hexagon problem. + x_hexagon = 2.0_RP ! Starting point. + m = 14 ! + call cobyla(calcfc_hexagon, m, x_hexagon, f, cstrv) ! This call will not print anything. + + ! In addition to the compulsory arguments, the following illustration specifies also CONSTR, RHOBEG, + ! and IPRINT, which are optional. All the unspecified optional arguments (RHOEND, MAXFUN, etc.) will + ! take their default values coded in the solver. Note that CONSTR is an output, which will be set to + ! the value of CONSTR(X_HEXAGON) when the solver returns. + x_hexagon = 2.0_RP ! Starting point. + call cobyla(calcfc_hexagon, m, x_hexagon, f, cstrv, constr=constr, rhobeg=1.0_RP, rhoend=1.0D-3, iprint=1, nf=nf, info=info)''' \ No newline at end of file