<a href="https://colab.research.google.com/github/m-mehabadi/grad-maker/blob/main/_notebooks/Testing_GradientMaker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

http://www.cs.cmu.edu/~pradeepr/convexopt/Lecture_Slides/dual-ascent.pdf

In [13]:
import numpy as np
# import torch
import matplotlib.pyplot as plt

In [88]:
# dim=2000000
# a = 100*np.random.randn(4, dim) + 1123
# g = np.random.randn(dim, 1)
# b = (a-np.mean(a, axis=1)[:, np.newaxis])/np.std(a, axis=1)[:, np.newaxis]
# np.mean(b@g/dim)

In [27]:
def gradient_maker(domain_grads, epsilon=0.5, alpha=0.01, eff=0.1):
    def log():
        print(f"Iter={iter}, Condition={np.min(dgr@g/dim)}")
    
    # def null(A, eps=1e-15):
    #     u, s, vh = np.linalg.svd(A)
    #     # print(vh)
    #     null_space = np.compress(s <= eps, vh, axis=0)
    #     return null_space.T

    def null(A):
        from scipy.linalg import null_space
        n = null_space(A, rcond=1)
        if n.shape[1] < 1:
            return False, None
        r = np.sqrt(np.sum((A@n)**2, axis=0)/n.shape[1])
        if np.min(r) >= 1e-2:
            return False, None
        return True, n.T[np.argmin(r)].reshape(-1, 1)
    
    def scale():
        G = np.concatenate((g.reshape(1, -1), domain_grads), axis=0)
        is_fine, n = null(G@G.T)
        if not is_fine:
            return np.mean(np.sqrt(np.sum(domain_grads**2, axis=1)))/np.sqrt(np.sum(g**2))
        n /= np.sum(n[1:])
        return -1.*n[0,0]

    dgr = np.copy(domain_grads)
    number_of_domains, dim = dgr.shape

    #
    dgr /= np.sqrt(np.sum(dgr**2, axis=1))[:, np.newaxis]
    
    #
    g = np.random.randn(dim)
    g /= np.sqrt(np.sum(g**2))
    u_ = np.zeros(number_of_domains)
    
    iter = 0
    while not np.min(dgr@g/dim) >= epsilon:

        log()
        
        u_ = u_ + alpha*((1.+eff)*epsilon - (dgr@g)/dim)
        g = (1./number_of_domains)*np.sum(((1+u_).reshape(number_of_domains, 1))*dgr, axis=0)

        iter += 1
    
    log()
    return g
    # return scale()*g

### Testcases

Now let's write some test cases to make sure everything is working correctly


In [16]:
epsilon=10
alpha=0.9

In [25]:
grads = 20*np.random.randn(2, 3) - 10
print(grads)

[[-17.79259088  -6.6306844  -38.75626371]
 [-18.81916069 -13.96884986   2.84194223]]


In [28]:
g = gradient_maker(grads, epsilon=epsilon, alpha=alpha)
print(g)

Iter=0, Condition=-0.03640108660973767
Iter=1, Condition=2.3680987179500925
Iter=2, Condition=4.0654365525947265
Iter=3, Condition=5.4289530109455555
Iter=4, Condition=6.524308584915555
Iter=5, Condition=7.404248085697314
Iter=6, Condition=8.111141280895358
Iter=7, Condition=8.67902375683657
Iter=8, Condition=9.13523631872191
Iter=9, Condition=9.501741901735041
Iter=10, Condition=9.796183431057473
Iter=11, Condition=10.032733589575534
[-27.76745497 -17.11456424 -17.84057387]


In [30]:
G = np.concatenate((g.reshape(1, -1), grads), axis=0)

In [31]:
G

array([[-27.76745497, -17.11456424, -17.84057387],
       [-17.79259088,  -6.6306844 , -38.75626371],
       [-18.81916069, -13.96884986,   2.84194223]])

In [40]:
def null(A, eps=1e-15):
    u, s, vh = np.linalg.svd(A)
    # print(vh)
    null_space = np.compress(s <= eps, vh, axis=0)
    return null_space.T

In [44]:
null(G@G.T, eps=1)

array([[-0.66979101],
       [ 0.35610344],
       [ 0.65159063]])

In [53]:
ns = null_space(G@G.T, rcond=1)

In [60]:
G@G.T@ns.T[-1]

array([3.41060513e-13, 1.70530257e-13, 2.84217094e-13])