In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import os
import pickle

import numpy as np
from sklearn.datasets import load_svmlight_file
from scipy.linalg import norm

from scfw import log_reg as lr
from scfw.frank_wolfe import frank_wolfe

In [11]:
problem_name = 'a1a'
results = {problem_name: {}}
Phi, y = load_svmlight_file('data/a1a')
N, n = Phi.shape
x = np.ones(n) / n
Phi.shape, y.shape

((1605, 119), (1605,))

In [14]:
f, Phix = lr.log_reg(Phi, y, x, 1, 0)

# Check grad

$$\nabla f(x) = \frac{1}{N} \sum_{i=1}^N \frac{-y_i \phi_i}{\exp(y_i(\langle \Phi_i; x \rangle + \mu) + 1} + \gamma x$$

In [15]:
def grad_naive(Phi, y, x, mu, gamma):
    numer = -Phi.multiply(y.reshape(-1, 1))
    denom = np.exp(y * (Phi @ x + mu)) + 1
    return np.array(np.mean(numer / denom.reshape(-1, 1), axis=0) + gamma * x).flatten()

In [29]:
grad_n = grad_naive(Phi, y, x, 1, 0)
print(grad_n.shape)

(119,)


In [30]:
grad = lr.grad_log_reg(Phi, y, x, 1, 0, Phix)
print(grad.shape)

(1605, 119)
(1605, 1)
(119, 1)
(119,)


# Check hess mult vec

$$\nabla^2 f(x) = \frac{1}{N} \sum_{i=1}^N \phi_i \phi_i^T \frac{exp(...)}{(1 + exp(...))^2} + \gamma$$

$$\nabla^2 f(x) x = \frac{1}{N} \sum_{i=1}^N \phi_i \phi_i^T x \frac{exp(...)}{(1 + exp(...))^2} + \gamma x$$

In [34]:
def hess_mult_naive(Phi, y, x, mu, gamma):
    Phix = Phi @ x
    Phi_m_x = Phi.multiply(x.reshape(1, -1)).sum(axis=1) # N x 1
    exp_product = np.exp(y * (Phix + mu)) # N x 1
    frac = exp_product / (1 + exp_product) ** 2
    fst_term = Phi.multiply(frac.reshape(-1, 1)) # N x n
    print(fst_term.shape)
    print(Phi_m_x.shape)
    print(x.reshape(-1, 1).shape)
    return np.array(np.mean(fst_term.multiply(Phi_m_x), axis=0) + gamma * x).flatten()

In [35]:
hess_mult_n = hess_mult_naive(Phi, y, x, 1, 0)
print(hess_mult_n.shape)

(1605, 119)
(1605, 1)
(119, 1)
(119,)


In [36]:
hess_mult = lr.hess_mult_vec(Phi, y, x, 1, 0, Phix)
print(hess_mult.shape)

(119,)


# Check hess mult vec vec

$$x^T \nabla^2 f(x) x = \frac{1}{N} \sum_{i=1}^N \exp(...) \left( \frac{\phi_i^T x}{1 + \exp(...)} \right)^2 + \gamma \|x\|_2^2$$

In [38]:
def hess_mult_log_reg_naive(Phi, y, x, mu, gamma):
    Phix = Phi @ x
    exp_product = np.exp(y * (Phix + mu))
    Z = (Phix / (1 + exp_product))**2
    return 1 / N * np.sum(exp_product * Z) + gamma * x.dot(x)

In [40]:
hess_mult_lr_n = hess_mult_log_reg_naive(Phi, y, x, 1, 0)
hess_mult_lr = lr.hess_mult_log_reg(Phi, y, x, 1, , Phix)
hess_mult_lr_n == hess_mult_lr

False

## Data preprocessing and parameters initialization

In [None]:
if max(y) == 2:
    y = 2 * y - 3

Normalize

In [61]:
N, n = Phi.shape

for i, row in enumerate(Phi):
    if np.sum(row.multiply(row)) != 0:
        Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

In [40]:
gamma = 1 / 4
Mf = 1 / np.sqrt(gamma) * np.max(np.sqrt((Phi.multiply(Phi)).sum(axis=1)))
mu = 0
nu = 2
M = 1

#running parameters
x0 = np.zeros(n)
terminate_tol = 1e-20

#parameters for FW
FW_params={
    'iter_FW':50000,
    'line_search_tol':1e-10,
    'rho':np.sqrt(n), #parameters for ll00
    'diam_X':np.sqrt(2),
    'sigma_f':1,                   
}


sc_params={
    #parameters for SCOPT
    'iter_SC': 1000,
    'Lest': 'backtracking',#,'estimate', #estimate L
    'use_two_phase':False,
    #FISTA parameters
    'fista_type': 'mfista',
    'fista_tol': 1e-5,
    'fista_iter': 1000,
    #Conjugate Gradient Parameters
    'conj_grad_tol':1e-5,
    'conj_grad_iter':1000,
}

## Auxilary functions

In [46]:
func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma)
grad_x = lambda x, dot_product: lr.grad_log_reg(Phi, y, x, mu, gamma, dot_product)
grad_beta = lambda x, s, beta, dot_product, dot_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * dot_product + beta * dot_product_s)
# hess_x = lambda x, dot_product: lr.hess_portfolio(R, x, dot_product)
hess_mult_x = lambda x, dot_product: lr.hess_mult_log_reg(Phi, y, x, mu, gamma, dot_product)
hess_mult_vec_x = lambda s, dot_product: lr.hess_mult_vec(Phi, y, s, mu, gamma, dot_product)
extra_func = lambda x: Phi @ x
linear_oracle = lambda grad: lr.linear_oracle_full_simplex(grad, M)
# llo_oracle = lambda x, r, grad, rho: pr.llo_oracle(x, r, grad,rho)
# prox_func = lambda x, L: lr.proj_simplex(x)

## Run FW

In [96]:
files = ['a4a','w4a','a1a','a2a','a3a','a5a','a6a','a7a','a8a','a9a','w1a','w2a','w3a','w5a','w6a','w7a','w8a']
out_dir = 'out'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
def run_fw(file_name, out_dir):
    results = {problem_name: {}}
    Phi, y = load_svmlight_file(os.path.join('data', file_name))

    # fix classes
    if max(y) == 2:
        y = 2 * y - 3

    N, n = Phi.shape

    # normalize
    for i, row in enumerate(Phi):
        if np.sum(row.multiply(row)) != 0:
            Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

    rho = 1 / 4
    # regularization param
    gamma = 0
    # self-concordant coef
    Mf = 1 / np.sqrt(rho) * np.max(np.sqrt((Phi.multiply(Phi)).sum(axis=1)))
    # bias
    mu = 0
    # from tran-dihn
    nu = 2

    #running parameters
    x0 = np.ones(n) / n
    # radius
    M = lr.log_reg(Phi, y, x0, mu, gamma)[0] / rho

    # terminate_tolerance
    terminate_tol = 1e-20

    #parameters for FW
    FW_params={
        'iter_FW':50000,
        'line_search_tol':1e-10,
        'rho':np.sqrt(n), #parameters for ll00
        'diam_X':np.sqrt(2),
        'sigma_f':1,
    }

    func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma)
    grad_x = lambda x, dot_product: lr.grad_log_reg(Phi, y, x, mu, gamma, dot_product)
    grad_beta = lambda x, s, beta, dot_product, dot_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * dot_product + beta * dot_product_s)
    # hess_x = lambda x, dot_product: lr.hess_portfolio(R, x, dot_product)
    hess_mult_x = lambda x, dot_product: lr.hess_mult_log_reg(Phi, y, x, mu, gamma, dot_product)
    hess_mult_vec_x = lambda s, dot_product: lr.hess_mult_vec(Phi, y, s, mu, gamma, dot_product)
    extra_func = lambda x: Phi @ x
    linear_oracle = lambda grad: lr.linear_oracle_full_simplex(grad, M)

    run_alpha_policies = ["standard", "line_search", "icml"]
#     run_alpha_policies = ['standard']
    # run_alpha_policies = ['new_lloo','lloo']

    for policy in run_alpha_policies:
        x, alpha_hist, Gap_hist, Q_hist, time_hist, grad_hist = frank_wolfe(func_x,
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=1000, 
                           debug_info=False)

        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Gap_hist': Gap_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
            'grad_hist': grad_hist
        }
        
#     with open(os.path.join(out_dir, file_name), 'wb') as f:
#         pickle.dump(results, f)
    return results