<a href="https://colab.research.google.com/github/m-mehabadi/grad-maker/blob/main/_notebooks/Testing_GradientMaker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

http://www.cs.cmu.edu/~pradeepr/convexopt/Lecture_Slides/dual-ascent.pdf

https://web.stanford.edu/class/ee364b/lectures/primal_dual_subgrad_slides.pdf

https://www.cvxpy.org/examples/basic/quadratic_program.html

In [None]:
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def gradient_maker(grads, solver=None):
    """
    - make sure to install `cvxpy`. you can use: `pip install cvxpy`
    - `grads` in a numpy's `ndarray`
    - `grads.shape == (n, d)`, where `n` is the number of domains and `d` is the dimension
    - this method will return a tuple of size two, where:
        * the first one is the generalized vector to use with size `d`
        * the second one is the weight vector of the linear combination
    - finally, use g, _ = gradient_maker(grads), if you have no need to use the 2nd return
    """

    import cvxpy as cp
    from numpy import linalg as la

    def nearestPD(A):

        B = (A + A.T) / 2
        _, s, V = la.svd(B)

        H = np.dot(V.T, np.dot(np.diag(s), V))

        A2 = (B + H) / 2

        A3 = (A2 + A2.T) / 2

        if isPD(A3):
            return A3

        spacing = np.spacing(la.norm(A))
        
        I = np.eye(A.shape[0])
        k = 1
        while not isPD(A3):
            mineig = np.min(np.real(la.eigvals(A3)))
            A3 += I * (-mineig * k**2 + spacing)
            k += 1

        return A3


    def isPD(B):
        try:
            _ = la.cholesky(B)
            return True
        except la.LinAlgError:
            return False

    #
    G = grads.T
    n, d = grads.shape
    g_ = np.mean(grads, axis=0).reshape(-1, 1)

    #
    P = nearestPD(n*G.T@G)
    q = -n*G.T@g_
    F = -G.T@G
    h = np.zeros(n, dtype=np.float32)
    A = np.ones(n, dtype=np.float32).reshape(1, -1)
    b = np.ones((1, 1), dtype=np.float32)

    # define opt variable
    x = cp.Variable(n)
    prob = cp.Problem(cp.Minimize((1/2)*cp.quad_form(x, P) + q.T @ x),
                    [F @ x <= h,
                    A @ x == b])
    #
    if solver is None:
        return cp.OSQP
    prob.solve(solver=solver, verbose=False)
    s = np.array(x.value)

    return G@s, s

def gm_search(grads, return_first=True, verbose=0):
    import gc
    import cvxpy as cp
    from numpy.linalg import norm

    def gc_collect(*vars):
        for var in vars:
            del var
        gc.collect()
    
    def get_solver(solver):
        return eval('cp.'+solver)
    
    def gm_search_in_scales(grads, solver=None):
        min_inner = -float('inf')
        g = None
        scales = range(20)
        for scale in scales:
            try:
                _scale = 1e1**float(scale)
                dgr = np.copy(grads)
                dgr /= norm(dgr, axis=1).reshape(-1, 1)
                dgr *= _scale

                g_scaled, _ = gradient_maker(dgr, solver)

                # we need to scale it back
                g_scaled_back = np.mean(norm(grads, axis=1))/norm(g_scaled) * g_scaled

                # then, we will replace the scaled back g with g if min_inner is increased
                if np.min(grads@g_scaled_back) >= min_inner:
                    g = np.copy(g_scaled_back)
                    min_inner = np.min(grads@g_scaled_back)
                
                #
                if verbose==2:
                    print("----------------")
                    print(f"the minimum inner product found with scaling {_scale} is {np.min(dgr@g_scaled)}")
                    print(f"the minimum inner product found without scaling is {np.min(grads@g_scaled_back)}")

                #
                gc_collect(_scale, dgr, g_scaled, g_scaled_back)
            except:
                break
        return g
    
    # excluding: 'SCIP'
    qp_solvers = ['OSQP','CPLEX','NAG','ECOS','GUROBI','MOSEK','CVXOPT','SCS','XPRESS']
    
    #
    min_inner = -float('inf')
    g = None

    for solver in qp_solvers:

        if verbose>=1:
            print()
            print(f"############## Solver {solver} ##############")

        _g = gm_search_in_scales(grads, get_solver(solver))

        if _g is None:
            continue
        
        if verbose>=1:
            print()
            print(f"best result with solver = {solver} is {np.min(grads@_g)}")

        if return_first and np.min(grads@_g) > 0. :
            g = np.copy(_g)
            break

        if np.min(grads@_g) >= min_inner:
            g = np.copy(_g)
            min_inner = np.min(grads@_g)
        
        #
        gc_collect(_g)
    
    if g is None:
        print("FOUND NOTHING!!!!")
        g = np.mean(grads, axis=0)

    if verbose>=1:
        print()
        print("============================================================")
        print(f"final result is {np.min(grads@g)}")

    return g

### Testcases

Now let's write some test cases to make sure everything is working correctly


In [None]:
grads = 1e-15*np.random.randn(20, 23)+1e-5

In [None]:
g = gm_search(grads)

In [None]:
np.min(grads@g)

2.2999999999001716e-09