In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle
import scipy.io
import numpy as np

from scfw.scopt import scopt
from scfw import log_reg as lr
from scfw.frank_wolfe import frank_wolfe
import matplotlib.pyplot as plt
from sklearn.datasets import load_svmlight_file


In [3]:
data_folder = '../data'

problem_name = 'a9a'
Phi, y = load_svmlight_file(os.path.join(data_folder, problem_name))
N, n = Phi.shape
x = np.ones(n) / n
Phi.shape, y.shape

((32561, 123), (32561,))

In [4]:
results_folder = './results/'
results_file=os.path.join(results_folder,'log_reg',problem_name+'.pckl')
results = {problem_name: {}}
if os.path.isfile(results_file):
    with open(results_file, "rb") as f:
        try:
            results=pickle.load(f)
        except Exception: # so many things could go wrong, can't be more specific.
            pass 

Normalize

In [5]:
N, n = Phi.shape

for i, row in enumerate(Phi):
    if np.sum(row.multiply(row)) != 0:
        Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

In [6]:
gamma = 1 / 2*np.sqrt(N)
Mf = 1/gamma*np.max(np.sqrt(np.sum(Phi.multiply(Phi),axis=1)))
nu = 3
mu = 0


#running parameters
x0 = np.zeros(n)
r = n*0.05
terminate_tol = 1e-20

#parameters for FW
FW_params={
    'iter_FW':50000,
    'line_search_tol':1e-10,
    'rho':np.sqrt(n), #parameters for ll00
    'diam_X':2,
    'sigma_f':1,                   
}


sc_params={
    #parameters for SCOPT
    'iter_SC': 1000,
    'Lest': 'backtracking',#,'estimate', #estimate L
    'use_two_phase':False,
    #FISTA parameters
    'fista_type': 'mfista',
    'fista_tol': 1e-5,
    'fista_iter': 1000,
    #Conjugate Gradient Parameters
    'conj_grad_tol':1e-5,
    'conj_grad_iter':1000,
}

## Auxilary functions

In [7]:
func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma)
func_beta = lambda x, s, beta, exp_product, exp_product_s:lr.log_reg(Phi, y, (1 - beta) * x + beta * s, mu,gamma,np.exp(np.log(exp_product)*(1-beta)+np.log(exp_product_s)*beta))
grad_x = lambda x, exp_product: lr.grad_log_reg(Phi, y, x,  mu, gamma, exp_product)
grad_beta = lambda x, s, beta, exp_product, exp_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * exp_product + beta * exp_product_s)
hess_x = lambda s, exp_product: lr.hess(Phi, y, mu, gamma, exp_product,s)
hess_mult_x = lambda s, exp_product: lr.hess_mult_log_reg(Phi, y, mu, gamma, exp_product,s)
hess_mult_vec_x = lambda s, exp_product: lr.hess_mult_vec(Phi, y,mu, gamma, exp_product,s)
extra_func = lambda x: np.exp(-y*(Phi @ x+mu))
linear_oracle = lambda grad: lr.linear_oracle_l1(grad, r)
# llo_oracle = lambda x, r, grad, rho: pr.llo_oracle(x, r, grad,rho)
prox_func = lambda x, L: lr.projection_l1(x,r)

In [None]:
run_alpha_policies = ["backtracking","standard", "line_search", "icml"]

for policy in run_alpha_policies:
        print(policy)
        x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                           func_beta,                                      
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=1000, 
                           debug_info=False)

        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Gap_hist': Gap_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
        }
        
        with open(results_file, 'wb') as f:
            pickle.dump(results, f)

backtracking
********* Algorithm starts *********
iter = 1, stepsize = 0.37446301349305844, criterion = 9.999999999999999e-11, upper_bound=22569.565346212377, lower_bound=8066.522962502984, real_Gap=14503.042383709393
iter = 1000, stepsize = 0.001102253403032789, criterion = 9.999999999999999e-11, upper_bound=16881.98159321663, lower_bound=16867.216628229766, real_Gap=14.76496498686538
iter = 2000, stepsize = 0.0006169625467931153, criterion = 9.999999999999999e-11, upper_bound=16875.837640769478, lower_bound=16868.467464287805, real_Gap=7.370176481672388
iter = 3000, stepsize = 0.00031320518186063543, criterion = 9.999999999999999e-11, upper_bound=16873.752020427935, lower_bound=16868.77615953425, real_Gap=4.975860893686331
iter = 4000, stepsize = 0.00021739331009457027, criterion = 9.999999999999999e-11, upper_bound=16872.70391031122, lower_bound=16868.949379242844, real_Gap=3.75453106837449
iter = 5000, stepsize = 0.00023555957855542203, criterion = 9.999999999999999e-11, upper_boun

iter = 47000, stepsize = 2.514355813977891e-05, criterion = 9.999999999999999e-11, upper_bound=16869.788276061794, lower_bound=16869.481432850458, real_Gap=0.30684321133594494
iter = 48000, stepsize = 3.09681286809868e-05, criterion = 9.999999999999999e-11, upper_bound=16869.782568575225, lower_bound=16869.483605139216, real_Gap=0.2989634360092168
iter = 49000, stepsize = 1.6046908243744704e-05, criterion = 9.999999999999999e-11, upper_bound=16869.777103676846, lower_bound=16869.483605139216, real_Gap=0.29349853763051215
iter = 50000, stepsize = 3.165108872298265e-05, criterion = 9.999999999999999e-11, upper_bound=16869.771860558572, lower_bound=16869.483605139216, real_Gap=0.28825541935657384
1246.4250462055206
standard
********* Algorithm starts *********
iter = 1, stepsize = 0.6666666666666666, criterion = 9.999999999999999e-11, upper_bound=22569.565346212377, lower_bound=8066.522962502984, real_Gap=14503.042383709393
iter = 1000, stepsize = 0.001996007984031936, criterion = 9.99999

iter = 43000, stepsize = 4.650946467606158e-05, criterion = 9.999999999999999e-11, upper_bound=16869.514312760995, lower_bound=16869.441173205898, real_Gap=0.07313955509744119
iter = 44000, stepsize = 4.545247943275306e-05, criterion = 9.999999999999999e-11, upper_bound=16869.514312760995, lower_bound=16869.441173205898, real_Gap=0.07313955509744119
iter = 45000, stepsize = 4.444246922359006e-05, criterion = 9.999999999999999e-11, upper_bound=16869.514312760995, lower_bound=16869.441173205898, real_Gap=0.07313955509744119
iter = 46000, stepsize = 4.347637059258293e-05, criterion = 9.999999999999999e-11, upper_bound=16869.51431138115, lower_bound=16869.441173205898, real_Gap=0.07313817525209743
iter = 47000, stepsize = 4.255138079230671e-05, criterion = 9.999999999999999e-11, upper_bound=16869.51431138115, lower_bound=16869.441173205898, real_Gap=0.07313817525209743
iter = 48000, stepsize = 4.1664930627890504e-05, criterion = 9.999999999999999e-11, upper_bound=16869.51431138115, lower_b

In [None]:
x, alpha_hist, Q_hist, time_hist = scopt(func_x,
        grad_x,
        hess_mult_x,
        hess_mult_vec_x,
        Mf,
        nu,
        prox_func,
        x0,  
        sc_params,                                              
        eps=terminate_tol,                                              
        print_every=10)
    
results[problem_name]['scopt'] = {
    'x': x,
    'alpha_hist': alpha_hist,
#    'Gap_hist': Gap_hist,
    'Q_hist': Q_hist,
    'time_hist': time_hist,
#    'grad_hist': grad_hist
}

with open(results_file, 'wb') as f:
           pickle.dump(results, f)

## Run FW

In [None]:
files = ['a4a','w4a','a1a','a2a','a3a','a5a','a6a','a7a','a8a','a9a','w1a','w2a','w3a','w5a','w6a','w7a','w8a']
out_dir = 'out'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
def run_fw(file_name, out_dir):
    results = {problem_name: {}}
    Phi, y = load_svmlight_file(os.path.join('data', file_name))

    # fix classes
    if max(y) == 2:
        y = 2 * y - 3

    N, n = Phi.shape

    # normalize
    for i, row in enumerate(Phi):
        if np.sum(row.multiply(row)) != 0:
            Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

    rho = 1 / 4
    # regularization param
    gamma = 0
    # self-concordant coef
    Mf = 1 / np.sqrt(rho) * np.max(np.sqrt((Phi.multiply(Phi)).sum(axis=1)))
    # bias
    mu = 0
    # from tran-dihn
    nu = 2

    #running parameters
    x0 = np.ones(n) / n
    # radius
    M = lr.log_reg(Phi, y, x0, mu, gamma)[0] / rho

    # terminate_tolerance
    terminate_tol = 1e-20

    #parameters for FW
    FW_params={
        'iter_FW':50000,
        'line_search_tol':1e-10,
        'rho':np.sqrt(n), #parameters for ll00
        'diam_X':np.sqrt(2),
        'sigma_f':1,
    }

    func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma)
    grad_x = lambda x, dot_product: lr.grad_log_reg(Phi, y, x, mu, gamma, dot_product)
    grad_beta = lambda x, s, beta, dot_product, dot_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * dot_product + beta * dot_product_s)
    # hess_x = lambda x, dot_product: lr.hess_portfolio(R, x, dot_product)
    hess_mult_x = lambda x, dot_product: lr.hess_mult_log_reg(Phi, y, x, mu, gamma, dot_product)
    hess_mult_vec_x = lambda s, dot_product: lr.hess_mult_vec(Phi, y, s, mu, gamma, dot_product)
    extra_func = lambda x: Phi @ x
    linear_oracle = lambda grad: lr.linear_oracle_full_simplex(grad, M)

    run_alpha_policies = ["standard", "line_search", "icml"]
#     run_alpha_policies = ['standard']
    # run_alpha_policies = ['new_lloo','lloo']

    for policy in run_alpha_policies:
        x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=1000, 
                           debug_info=False)

        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Gap_hist': Gap_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
        }
        
#     with open(os.path.join(out_dir, file_name), 'wb') as f:
#         pickle.dump(results, f)
    return results