<a href="https://colab.research.google.com/github/m-mehabadi/grad-maker/blob/main/_notebooks/Testing_GradientMaker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

http://www.cs.cmu.edu/~pradeepr/convexopt/Lecture_Slides/dual-ascent.pdf

https://web.stanford.edu/class/ee364b/lectures/primal_dual_subgrad_slides.pdf

https://www.cvxpy.org/examples/basic/quadratic_program.html

In [1]:
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp

In [2]:
def gradient_maker(grads, solver=None):
    """
    - make sure to install `cvxpy`. you can use: `pip install cvxpy`
    - `grads` in a numpy's `ndarray`
    - `grads.shape == (n, d)`, where `n` is the number of domains and `d` is the dimension
    - this method will return a tuple of size two, where:
        * the first one is the generalized vector to use with size `d`
        * the second one is the weight vector of the linear combination
    - finally, use g, _ = gradient_maker(grads), if you have no need to use the 2nd return
    """

    import cvxpy as cp
    from numpy import linalg as la

    def nearestPD(A):

        B = (A + A.T) / 2
        _, s, V = la.svd(B)

        H = np.dot(V.T, np.dot(np.diag(s), V))

        A2 = (B + H) / 2

        A3 = (A2 + A2.T) / 2

        if isPD(A3):
            return A3

        spacing = np.spacing(la.norm(A))
        
        I = np.eye(A.shape[0])
        k = 1
        while not isPD(A3):
            mineig = np.min(np.real(la.eigvals(A3)))
            A3 += I * (-mineig * k**2 + spacing)
            k += 1

        return A3


    def isPD(B):
        try:
            _ = la.cholesky(B)
            return True
        except la.LinAlgError:
            return False

    #
    G = grads.T
    n, d = grads.shape
    g_ = np.mean(grads, axis=0).reshape(-1, 1)

    #
    P = nearestPD(n*G.T@G)
    q = -n*G.T@g_
    F = -G.T@G
    h = np.zeros(n, dtype=np.float32)
    A = np.ones(n, dtype=np.float32).reshape(1, -1)
    b = np.ones((1, 1), dtype=np.float32)

    # define opt variable
    x = cp.Variable(n)
    prob = cp.Problem(cp.Minimize((1/2)*cp.quad_form(x, P) + q.T @ x),
                    [F @ x <= h,
                    A @ x == b])
    #
    if solver is None:
        solver = cp.OSQP
    prob.solve(solver=solver, verbose=False)
    s = np.array(x.value)

    return G@s, s

### Testcases

Now let's write some test cases to make sure everything is working correctly


In [3]:
grads = (1000*(np.random.randn(20, 23)+1))*np.random.randn(20, 23)

In [4]:
g, _ = gradient_maker(grads)

In [5]:
grads@g

array([ 2.39367435e+06,  2.76608829e+06,  1.26954728e+06,  7.35109039e+05,
        1.44982369e+06,  2.15022620e+06,  5.82076609e-10,  3.56162657e+06,
        2.95725330e+06,  1.56792038e+06,  2.34780642e+06,  3.57887385e+06,
        1.47453438e+06,  2.25668366e+06, -7.27595761e-11,  9.39994480e+05,
        8.59900851e+05, -9.89530236e-10,  1.39667068e+06,  4.52109546e+06])

In [6]:
from numpy.linalg import norm

In [7]:
grads_norm = norm(grads, axis=1)

In [8]:
grads_norm/np.min(grads_norm)

array([1.08388646, 1.51629313, 1.27092154, 1.59399   , 1.31379914,
       1.31801583, 1.49837578, 1.70716584, 1.31866085, 1.        ,
       1.3826933 , 2.27879919, 1.6583476 , 2.19158263, 2.13461028,
       1.19098893, 1.29831724, 1.2547083 , 1.18926768, 1.76766778])

In [9]:
grads_scaled = grads/np.min(grads_norm)

In [10]:
_g, _ = gradient_maker(grads_scaled)

In [11]:
grads_scaled@g

array([ 5.39403602e+02,  6.23325384e+02,  2.86086691e+02,  1.65653470e+02,
        3.26711159e+02,  4.84543671e+02,  1.13686838e-13,  8.02596309e+02,
        6.66403549e+02,  3.53323708e+02,  5.29067472e+02,  8.06482904e+02,
        3.32279599e+02,  5.08533374e+02,  1.27897692e-13,  2.11823470e+02,
        1.93774736e+02, -2.13162821e-13,  3.14733370e+02,  1.01880825e+03])

In [12]:
g/_g

array([4437.63136444, 4437.63137768, 4437.63136835, 4437.63132103,
       4437.63138494, 4437.63139213, 4437.63134715, 4437.63157199,
       4437.63130937, 4437.63135263, 4437.63136524, 4437.63141212,
       4437.63129862, 4437.63136311, 4437.6313919 , 4437.63138986,
       4437.63135469, 4437.63139857, 4437.63141446, 4437.63136269,
       4437.631387  , 4437.63141195, 4437.63102117])

In [17]:
from numpy import linalg as la

def nearestPD(A):

    B = (A + A.T) / 2
    _, s, V = la.svd(B)

    H = np.dot(V.T, np.dot(np.diag(s), V))

    A2 = (B + H) / 2

    A3 = (A2 + A2.T) / 2

    if isPD(A3):
        return A3

    spacing = np.spacing(la.norm(A))
    
    I = np.eye(A.shape[0])
    k = 1
    while not isPD(A3):
        mineig = np.min(np.real(la.eigvals(A3)))
        A3 += I * (-mineig * k**2 + spacing)
        k += 1

    return A3


def isPD(B):
    try:
        _ = la.cholesky(B)
        return True
    except la.LinAlgError:
        return False

In [13]:
A = grads@grads.T

In [18]:
A_ = np.linalg.inv(nearestPD(A))

In [21]:
s = A_@np.ones(20)

In [22]:
g = grads.T@s

In [23]:
np.min(grads@g)

0.9999999999999842