Merge da32aa6 into e5cb995

openopt · May 3, 2021 · 4ff21dc · 4ff21dc
2 parents e5cb995 + da32aa6
commit 4ff21dc
Show file tree

Hide file tree

Showing 6 changed files with 534 additions and 2 deletions.
diff --git a/copt/constraint.py b/copt/constraint.py
@@ -324,3 +324,217 @@ def lmo(self, u, x, active_set=None):
         ut, _, vt = splinalg.svds(u_mat, k=1)
         vertex = self.alpha * np.outer(ut, vt).ravel()
         return vertex - x, None, None, 1.
+
+
+class TraceSpectrahedron:
+    """Projection of a square, symmetric matrix onto the set of
+    positive-semidefinite matrices with bounded trace norm.
+
+    Args:
+        alpha: float
+            radius of the spectrahedron.
+        dim: int
+            The ambient space is of dimension dim x dim
+
+    """
+    def __init__(self, alpha, dim):
+        assert type(dim) == int
+        self.alpha = alpha
+        self.dim = dim
+        self._shape = (dim,dim)
+
+    def __call__(self, x):
+        X = x.reshape(self._shape)
+        eigvals = linalg.eigvalsh(X)
+        # check that all non-zero eigenvalues are greater than zero
+        is_psd = np.all(eigvals[~np.isclose(0, eigvals)] > 0)
+        is_in_ball = eigvals.sum() <= self.alpha + np.finfo(np.float32).eps
+        if is_psd and is_in_ball:
+            return 0
+        else:
+            return np.inf
+
+    def prox(self, x, step_size):
+        X = x.reshape(self._shape)
+        X = .5*(X + X.T)
+        s, U = linalg.eigh(X)
+        s_psd = np.maximum(s, 0)
+        s_psd_threshold = euclidean_proj_l1ball(s_psd, self.alpha) 
+        ret = (U * s_psd_threshold).dot(U.T).ravel()
+        assert np.all(ret == ret.T)
+        return ret
+
+    def lmo(self, u, x, active_set=None):
+        r"""Linear Maximization Oracle.
+
+        Returns s - x with s solving the following
+            max_{s\in D} <u,s> 
+            where D := {X | X is p.s.d.;  ||X||_nuc <= alpha}
+
+        Args:
+          u: usually -gradient
+          x: usually the iterate of the considered algorithm
+          active_set: ignored
+
+        Returns:
+          update_direction: s - x, where s is the vertex of the constraint most correlated with u
+          None: not used here
+          None: not used here
+          max_step_size: 1. for a Frank-Wolfe step.
+        """
+        u_mat = u.reshape(self._shape)
+        u_mat = .5*(u_mat + u_mat)
+        s, ut = splinalg.eigsh(u_mat, k=1, which='LA')
+
+        if s < 0:
+            vertex = np.zeros(self._shape).ravel()
+        else:
+            vertex = self.alpha * np.outer(ut,ut).ravel()
+        return vertex - x, None, None, 1.
+
+
+class RowEqualityConstraint:
+    """Row equality constraint for a matrix-valued decision variable.
+    Homotopy smoothing is also implemented.
+
+    Row equality constraints are of the form
+
+    ..math::
+        Xv = b
+
+    where we call :math:`v` an operator since it maps the decision variable
+    :math:`X` to a vector.
+
+    Homotopy smoothing changes this constraint into a distance of the current
+    :math:`X` to the `offset` value, :math:`b`:
+
+    ..math::
+        \|Xv - b\|^2
+
+    Args:
+      shape: tuple of ints (n,m)
+        Describes the underlying of the decision variable x.
+      operator: vector of size (m,)
+      offset: vector of size (n,)
+      beta_scaling: ignored
+      name: string, optional
+        Used by other codes (e.g. trace objects) to identify this particular
+        constraint.
+    """
+    def __init__(self, shape, operator, offset, beta_scaling=1.0, name='row_equality_constraint'):
+        # TODO incorporate beta_scaling.  Currently not done because it is only
+        # used by element wise constraints.
+        assert len(shape) == 2
+        assert len(offset.shape) == 1
+        assert len(operator.shape) == 1
+        assert operator.shape[0] == shape[1]
+        assert offset.shape[0] == shape[0]
+        self.shape = shape
+        self.operator = operator
+        self.offset = offset
+        self.name = name
+        self.offset_norm = np.linalg.norm(self.offset)
+
+    def __call__(self, x):
+        X = x.reshape(self.shape)
+        z = np.matmul(X, self.operator)
+        if np.all(z == self.offset):
+            return 0
+        else:
+            return np.inf
+
+    def apply_operator(self, x):
+        """Evaluates A(x).  In this case, `x * operator` where '*' denotes
+        matrix multiplication.
+        """
+        X = x.reshape(self.shape)
+        return X.dot(self.operator)
+
+    def smoothed_grad(self, x):
+        """Returns the value and the gradient of the homotopy smoothed
+        constraint.
+
+        Args:
+          x: decision variable
+        """
+        X = x.reshape(self.shape)
+        err = X.dot(self.operator) - self.offset
+        val = np.linalg.norm(err) ** 2
+        grad = 2*np.outer(err, self.operator)
+        # import ipdb; ipdb.set_trace()
+        return val, grad.ravel()
+
+    def feasibility(self, x):
+        """Returns a normalized distance of the current iterate, x, to the
+        constraint specified by the offset. Intended usage is for tracking
+        the progress of the algorithm (e.g. by a trace object)
+
+        Args:
+          x: decision variable
+        """
+        X = x.reshape(self.shape)
+        err = X.dot(self.operator) - self.offset
+        val = np.linalg.norm(err)
+        return val / self.offset_norm
+
+
+class ElementWiseInequalityConstraint:
+    """Element-wise inequality constraint for vector or matrix-valued
+    decision variables. Homotopy smoothing is also implemented.
+
+    ..math::
+        X_{ij} \geq c
+
+    where :math:`c\in\mathbb R`
+
+    Homotopy changes this constraint into a simple distance 
+
+    ..math::
+        \|X - c\|^2
+
+    which is used to compute gradients and feasibility
+
+    Args:
+      shape: 2-ple.
+        Shape of the underlying matrix.
+      operator: ignored
+      offset: ignored
+      beta_scaling: float
+        The relative scaling of this constraint with respect to other constraints.
+    """
+    def __init__(self, shape, operator, offset, beta_scaling=1000.,
+                 name='elementwise_inequality_constraint', eps=np.finfo(np.float32).eps):
+        assert len(shape) == 2
+        assert operator.shape == shape
+        self.offset = offset
+        self.beta_scaling = beta_scaling
+        self.name = name
+        self.eps = eps
+
+    def __call__(self, x):
+        if np.all(x + self.eps >= 0):
+            return 0
+        else:
+            return np.inf
+
+    def smoothed_grad(self, x):
+        """Returns the value and gradient (flattened) of the smoothed
+        constraint, i.e. (np.float-like, np.array-like).
+
+        Args:
+          x: decision variable
+        """
+        mask = (x+self.eps>=0)
+        grad = x.copy()
+        grad[mask] = 0
+        val = linalg.norm(grad)**2
+        return val, self.beta_scaling*grad
+
+    def feasibility(self, x):
+        """Returns the norm of the elements of x which do not satisfy the
+        constraint. Elements which satisfy the constraint are not included.
+        """
+        mask = (x+self.eps>=0)
+        grad = x.copy()
+        grad[mask] = 0
+        return linalg.norm(grad)
diff --git a/copt/datasets.py b/copt/datasets.py
@@ -465,3 +465,43 @@ def load_criteo(md5_check=True):
         X = sparse.csr_matrix((X_data, X_indices, X_indptr))
         y = np.load(data_target)
     return X, y
+
+def load_sdp_mnist(md5_check=True, subset='reduced', data_dir=DATA_DIR):
+    """Download (TODO) if necessary and return the preprocessed MNIST dataset as described in [MVW16].
+
+    Args:
+        md5_check: bool (TODO)
+        Whether to do an md5 check on the downloaded files.
+
+    Returns:
+        C : np.array-like
+        n_labels: int
+          the number of labels in this subset of the data
+        opt_val: float
+          the optimal objective value
+
+    References:
+        [MVW16] D. G. Mixon, S. Villar, and R. Ward, “Clustering subgaussian
+        mixtures by semidefinite programming,” May 2016.
+        <http://arxiv.org/abs/1602.06612>,
+        GitHub Repo: <https://github.com/solevillar/kmeans_sdp>
+    """
+    import scipy.io as sio # lazy import
+
+    if subset == 'reduced':
+        file_path = os.path.join(data_dir,
+                                 "sdp_mnist/reduced_clustering_mnist_digits.mat")
+    elif subset == 'full':
+        file_path = os.path.join(data_dir,
+                                 "sdp_mnist/full_clustering_mnist_digits.mat")
+    else:
+        raise ValueError("invalid dataset choice <" + subset + ">")
+
+    mat = sio.loadmat(file_path)
+    mat = sio.loadmat(file_path)
+
+    C = mat['Problem']['C'][0][0]
+    opt_val = mat['Problem']['opt_val'][0][0][0][0]
+    labels = mat['Problem']['labels'][0][0][0]
+    n_labels = len(np.unique(labels))
+    return C, n_labels, opt_val
diff --git a/copt/homotopy.py b/copt/homotopy.py
@@ -0,0 +1,102 @@
+"""Homotopy Methods."""
+import json
+import os
+import sys
+import warnings
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy import io as sio
+from scipy import linalg, optimize
+
+import copt
+from copt import datasets, utils
+
+EPS = np.finfo(np.float32).eps
+
+# TODO :ref:`hcgm`
+def minimize_homotopy_cgm(objective_fun, smoothed_constraints, x0, shape,
+                          lmo, beta0, max_iter, tol, callback):
+    r"""(H)omotopy CGM
+
+    Implements HCGM. See :ref:`hcgm` for more details
+
+    Args:
+        objective_fun: callable
+          Takes an x0-like and return a tuple (value, gradient) corresponding
+          to the value and gradient of the objective function at x,
+          respectively.
+
+        smoothed_constraints: [list of obj]
+          Each object should support a `smoothed_grad` method. This method
+          takes an x0-like and returns a tuple (value, gradient)
+          corresponding to the value of the smoothed constraint at x and the
+          smoothed gradient, respectively.
+
+        x0: array-like
+          Initial guess for solution.
+
+        shape: tuple
+          The underlying shape of each iterate, e.g. (n,m) for an n x m matrix.
+        
+        beta0: float
+          Initial value for the smoothing parameter beta.
+
+        max_iter: integer, optional
+          Maximum number of iterations.
+
+        tol: float
+          Tolerance for the objective and homotopy smoothed constraints.
+
+        callback: callable, optional
+          Callback to execute at each iteration. If the callable returns False
+          then the algorithm with immediately return.
+
+        lmo: callable
+
+    Returns:
+        scipy.optimize.OptimizeResult
+        The optimization result represented as a
+        ``scipy.optimize.OptimizeResult`` object. Important attributes are:
+        ``x`` the solution array, ``success`` a Boolean flag indicating if
+        the optimizer exited successfully and ``message`` which describes
+        the cause of the termination. See `scipy.optimize.OptimizeResult`
+        for a description of other attributes.
+
+    References:
+        ..
+        [YURT2018] A. Yurtsever, O. Fercoq, F. Locatello, and V. Cevher. “A Conditional Gradient Framework for Composite Convex Minimization with Applications to Semidefinite Programming” <http://arxiv.org/abs/1804.08544> _ ICML 2018 
+    """
+
+    x0 = np.asanyarray(x0, dtype=np.float)
+    beta0 = np.asanyarray(beta0, dtype=np.float128)
+    if tol < 0:
+        raise ValueError("'tol' must be non-negative")
+    x = x0.copy()
+
+    for it in range(max_iter):
+        step_size = 2. / (it+2.)
+        beta_k = beta0 / np.sqrt(it+2)
+
+        total_constraint_grad = sum(c.smoothed_grad(x)[1] for c in smoothed_constraints)
+        f_t, f_grad = objective_fun(x)
+        grad = beta_k*f_grad + total_constraint_grad
+
+        active_set = None # vanilla FW
+        update_direction, _, _, _ = lmo(-grad, x, active_set)
+
+        feasibilities = [c(x) < tol for c in smoothed_constraints]
+        if f_t < tol and np.all(feasibilities):
+            break
+
+        x += step_size*update_direction
+
+        if callback is not None:
+            if callback(locals()) is False:  # pylint: disable=g-bool-id-comparison
+                break
+
+    if callback is not None:
+        callback(locals())
+
+    return optimize.OptimizeResult(x=x, nit=it)
diff --git a/copt/loss.py b/copt/loss.py
@@ -296,3 +296,32 @@ def f_grad(self, x, return_gradient=True):
     def lipschitz(self):
         s = splinalg.svds(self.A, k=1, return_singular_vectors=False)[0]
         return (s * s) / self.A.shape[0] + self.alpha
+
+
+class LinearLoss:
+    r"""Linear objective function.
+
+    A linear objective is defined as
+
+    .. math::
+        f(X) = \langle M,X \rangle
+
+    where :math:`\langle\cdot\rangle` is the dot product for
+    finite-dimensional vector and :math:`\tr(MX)` for matrices.
+    """
+    def __init__(self, M, shape):
+        self.shape = shape
+        self.m = M.flatten()
+
+    def __call__(self, X):
+        loss,_ = self.f_grad(X)
+        return loss
+
+    def f_grad(self, X):
+        x = X.flatten()
+        loss = np.dot(x, self.m)
+        return loss, self.m
+
+    @property
+    def lipschitz(self):
+        raise NotImplementedError