Вычисление частных производных относительно параметров квадратичной программы: https://arxiv.org/pdf/1703.00443.pdf

In [1]:
import numpy as np
from scipy.linalg import norm

In [2]:
from utils.simplex_projection import euclidean_proj_simplex

def euclidean_proj_l1_ball(v, s=1):
    u = np.abs(v)
    if u.sum() <= s:
        return v
    w = euclidean_proj_simplex(u, s=s)
    w *= np.sign(v)
    return w

In [3]:
def projected_gradient_descent(objective_and_grad, x0, project=lambda x: x, stepsize=1e-3, reltol=1e-3, maxiters=1000):
    x = x0.copy()
    for k in range(maxiters):
        f, grad = objective_and_grad(x)
        xp = project(x - stepsize*grad)
        if norm(x - xp)/norm(x) < reltol:
            break
        x = xp
    return x

Пример: LASSO

Пусть дана функция $f$ скалярного аргумента:

$$ f(\gamma) = g(x_*(\gamma)) $$

где $ x_*(\gamma) = \arg\min_x \|Ax - b\|_2^2, \mathrm{s.t.} \|x\|_1 \leq \gamma $ и $g$ - произвольная дифференцируемая функция.

Необходимо найти производную функции $f$.

In [5]:
gamma = 0.2

In [6]:
dim = 3
n = 10

np.random.seed(0)
A = np.random.randn(n, dim)
b = np.random.randn(n,)

def objective_and_grad(x):
    e = np.dot(A,x) - b
    return 0.5*np.sum(e**2), np.dot(A.T,e)

Пусть, например, $g(x) = \mathrm{exp}(x)$.

In [14]:
def some_function_and_grad(x):
    exp_ = np.exp(x)
    return np.sum(exp_), exp_

Будем искать решение задачи оптимизации методом проекции градиента.

In [16]:
def funcion(gamma):
    proj = lambda x: euclidean_proj_l1_ball(x, gamma)
    x0 = np.random.randn(dim,)
    x = projected_gradient_descent(objective_and_grad, x0, project=proj, reltol=1e-5, maxiters=200)
    return some_function_and_grad(x)[0] # arbitrary function of x

Решение системы порожденной условиями Каруша-Куна-Таккера

In [17]:
proj = lambda x: euclidean_proj_l1_ball(x, gamma)
x0 = np.random.randn(dim,)
x = projected_gradient_descent(objective_and_grad, x0, project=proj, reltol=1e-5, maxiters=200)

print(x)

[-0.   0.  -0.2]


In [18]:
AA = np.c_[A, -A] # hstack

#Q = np.kron(np.array([[1, -1],[-1, 1]]), np.dot(A.T,A))
Q = np.dot(AA.T, AA)
p = - np.expand_dims(np.dot(AA.T, b), axis=1)

G = np.r_[np.ones((1, 2*dim)), -1*np.eye(2*dim)]
h = np.zeros((2*dim + 1, ))
h[0] = gamma

In [19]:
from cvxopt import matrix, solvers
from cvxopt.solvers import options

Q_ = matrix(Q)
p_ = matrix(p)
G_ = matrix(G)
h_ = matrix(h)

sol=solvers.qp(Q_, p_, G_, h_, options={'show_progress':False, 'maxiters':1000, 'reltol':1e-16, 'feastol':1e-16})

uv = np.array(sol['x']).flatten()
u = uv[:dim]
v = uv[dim:]
x = u - v

print(x)

[-1.48564381e-308  7.72635962e-309 -2.00000000e-001]


In [20]:
# dual variables

lmbda = np.array(sol['z']).flatten() # inequality constraints
#nu = np.array(sol['y']) # equality constraints

print(lmbda)

[4.98713175e+000 5.61736782e+000 3.63728294e+000 9.97426350e+000
 4.35689568e+000 6.33698057e+000 9.35289451e-307]


In [21]:
from numpy.linalg import solve

J_z = np.block([[Q, G.T], [np.dot(np.diag(lmbda), G), np.diag(np.dot(G,uv) - h)]])
J_h_gam = np.zeros(h.shape)
J_h_gam[0] = 1.

J_h = np.r_[np.zeros((2*dim,)), np.dot(np.diag(lmbda), -1*J_h_gam)]

J_gam_uv = -1*solve(J_z, J_h)[:2*dim]

g = some_function_and_grad(x)[1]

fprime_gam = np.sum(g*J_gam_uv[:dim]) - np.sum(g*J_gam_uv[dim:])

print(fprime_gam)

-0.8187307530779818


Численная проверка:

In [22]:
eps = 1e-5

f_plus = funcion(gamma + eps)
f_minus = funcion(gamma - eps)

fprime = 0.5*(f_plus - f_minus)/eps

print(fprime)

-0.8187307531049014
