In [1]:
import os
import pickle
import numpy as np
from numpy import matlib
from sklearn.datasets import load_svmlight_file
import scfw.kl as kl
from scfw.frank_wolfe import frank_wolfe
from scfw.scopt import scopt
from scfw.prox_grad import prox_grad

In [2]:
data_folder = '../data'
results_folder = './results/'

## Run FW

In [3]:
def run_fw(problem_name):
    policy_list = ['sc', 'backtracking', 'line_search']
    results = {problem_name: {}}
    results_file = os.path.join(results_folder,'kl',problem_name+'.pckl')
    results = {problem_name: {}}
    if os.path.isfile(results_file):
        with open(results_file, "rb") as f:
            results=pickle.load(f)
    W, y = load_svmlight_file(os.path.join(data_folder, problem_name))
    y = y.reshape(-1, 1)
    W = W.toarray()
    W = matlib.repmat(y, 1, W.shape[1]) * W
    sA = np.sum(W, 1)
    W[sA < 0, :] = -W[sA < 0, :]
    W = W[sA != 0, :]
    Bias = 1
    b = (Bias * y).squeeze()
    b = np.abs(b)
    if (b.any == 0):
        print(' Input parameter y error')
    y = b
    N, n = W.shape
    lam =  np.sqrt(n)/2
    #lam =  0.005
    Mf = 1
    nu = 4
    R = 30
    x0 = np.hstack((np.ones(n) / n, R))
    terminate_tol = 1e-15
    FW_params={
        'iter_FW': 50000,
        'line_search_tol': 1e-10,
    }
    func_x = lambda x: kl.val(W, y, lam, x)
    func_beta = lambda x, s, beta, dot_product, dot_product_s: kl.val(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
    grad_x = lambda x, dot_product: kl.grad(W, y, lam, x, dot_product)
    grad_beta = lambda x, s, beta, dot_product, dot_product_s: kl.grad(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
    hess_mult_x = lambda x, dot_product: kl.hess_mult(W, y, lam, x, dot_product)
    hess_mult_vec_x = lambda s, dot_product: kl.hess_mult_vec(W, y, lam, s, dot_product)
    extra_func = lambda x: W @ x[:-1]
    linear_oracle = lambda grad, x: kl.linear_oracle(grad, x)
    for policy in policy_list:
        x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                                func_beta,                                      
                                grad_x,
                                grad_beta,
                                hess_mult_x,
                                extra_func,
                                Mf,
                                nu,
                                linear_oracle,                                                    
                                x0,
                                FW_params,
                                hess=None, 
                                lloo_oracle=None,                                                 
                                alpha_policy=policy,                                                    
                                eps=terminate_tol, 
                                print_every=10000, 
                                debug_info=False)
        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
        }
    with open(results_file, 'wb') as f:
        pickle.dump(results, f)

In [4]:
data_list =  ['a1a','a2a','a3a', 'a4a', 'a5a', 'a6a','a7a','a8a','a9a']
for problem_name in data_list:
    run_fw(problem_name)

********* Algorithm starts *********
iter=1, stepsize=3.64e-06, criterion=5.15e-06, upper_bound=-425.1844977638899, lower_bound=-98132.92996543091, real_Gap=9.77e+04, f_val=-425.1844977638899
Convergence achieved!
iter=5224, stepsize=-1.80e-16, criterion=2.34e-16, upper_bound=-1597.858908081988, lower_bound=-1597.8589077351394, real_Gap=-3.47e-07, f_val=-1597.8589080819877
********* Algorithm starts *********
iter=1, stepsize=1.33e-02, criterion=1.00e-05, upper_bound=-425.1844977638899, lower_bound=-98132.92996543091, real_Gap=9.77e+04, f_val=-425.1844977638899
Convergence achieved!
iter=814, stepsize=4.36e-16, criterion=5.36e-16, upper_bound=-1597.8056012660354, lower_bound=-1597.8056006648737, real_Gap=-6.01e-07, f_val=-1597.8056012660354
********* Algorithm starts *********
iter=1, stepsize=2.97e-02, criterion=1.00e-05, upper_bound=-425.1844977638899, lower_bound=-98132.92996543091, real_Gap=9.77e+04, f_val=-425.1844977638899
iter=10000, stepsize=5.82e-11, criterion=7.59e-11, upper_

In [2]:
problem_name = 'a5a'
results = {problem_name: {}}
W, y = load_svmlight_file(os.path.join(data_folder, problem_name))
y = y.reshape(-1, 1)
W = W.toarray()

# multiplicative multiplication of the structure of the recognizable structure
W = matlib.repmat(y, 1, W.shape[1]) * W
# summation of feature descriptions
sA = np.sum(W, 1)
# if the sum of the string is negative, invert it
W[sA < 0, :] = -W[sA < 0, :]
# zero row deletion
W = W[sA != 0, :]
# W = scipy.sparse.csr_matrix(W)

Bias = 1
b = (Bias * y).squeeze()
b = np.abs(b)
if (b.any == 0):
    print(' Input parameter y error')

y = b
N, n = W.shape

In [3]:
#lam =  np.sqrt(N)/2
lam =  0.005
#first set of parameters
Mf = 1
nu = 4
R = 30
#running parameters
x0 = np.hstack((np.ones(n) / n, R))
terminate_tol = 1e-15

FW_params={
    'iter_FW': 50000,
    'line_search_tol': 1e-10,
}

prox_params={
        #parameters for SCOPT
        'iter_prox': 50000,
        'Lest': 'estimate',#,'estimate', #estimate L
        'bb_type': 3,
        #FISTA parameters
        'fista_type': 'fista',
        'fista_tol': 1e-5,
        'fista_iter': 1000,
        'btk_iters': 100,
        'backtracking': True
    }

sc_params={
    #parameters for SCOPT
    'iter_SC': 1000,
    'Lest': 'estimate', #estimate L
    'use_two_phase': True,
    #FISTA parameters
    'fista_type': 'mfista',
    'fista_tol': 1e-5,
    'fista_iter': 1000,
    #Conjugate Gradient Parameters
    'conj_grad_tol': 1e-2,
    'conj_grad_iter': 100,
}

func_x = lambda x: kl.val(W, y, lam, x)
func_beta = lambda x, s, beta, dot_product, dot_product_s: kl.val(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
grad_x = lambda x, dot_product: kl.grad(W, y, lam, x, dot_product)
grad_beta = lambda x, s, beta, dot_product, dot_product_s: kl.grad(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
hess_mult_x = lambda x, dot_product: kl.hess_mult(W, y, lam, x, dot_product)
hess_mult_vec_x = lambda s, dot_product: kl.hess_mult_vec(W, y, lam, s, dot_product)
extra_func = lambda x: W @ x[:-1]
linear_oracle = lambda grad, x: kl.linear_oracle(grad, x)
prox_func = lambda s, L: kl.projection(s) #used for SCOPT
#prox_func = lambda s, L: np.maximum(s, 0) #used for SCOPT

In [4]:
policy = 'sc'
x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                           func_beta,                                      
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=1000, 
                           debug_info=False)

********* Algorithm starts *********
iter=1, stepsize=8.81e-07, criterion=1.25e-06, upper_bound=-2313.663171765081, lower_bound=-399864.93441811873, real_Gap=3.98e+05, f_val=-2313.663171765081
iter=1000, stepsize=1.11e-06, criterion=1.25e-06, upper_bound=-2685.8515026432538, lower_bound=-357148.00527738297, real_Gap=3.54e+05, f_val=-2685.8515026432538
iter=2000, stepsize=1.41e-06, criterion=1.25e-06, upper_bound=-3102.9808945046852, lower_bound=-314414.1441248068, real_Gap=3.11e+05, f_val=-3102.9808945046852
iter=3000, stepsize=1.79e-06, criterion=1.25e-06, upper_bound=-3562.5114451715904, lower_bound=-271726.799982505, real_Gap=2.68e+05, f_val=-3562.5114451715904
iter=4000, stepsize=2.27e-06, criterion=1.25e-06, upper_bound=-4058.040671243533, lower_bound=-229115.81064119458, real_Gap=2.25e+05, f_val=-4058.040671243533
iter=5000, stepsize=2.77e-06, criterion=1.25e-06, upper_bound=-4570.235107684244, lower_bound=-188566.3490315311, real_Gap=1.84e+05, f_val=-4570.235107684244
iter=6000,

In [10]:
x[-1]

1.7104211312130828

In [6]:
x, alpha_hist, Q_hist, time_hist = prox_grad(func_x,
                grad_x,
                prox_func,
                Mf,
                x0,
                prox_params,
                eps=terminate_tol,
                print_every=1000)

iter =    1, stepsize = 5.549e-04, rdiff = 9.997e-01 , f = -2313.66
iter = 1000, stepsize = 4.522e-03, rdiff = 5.234e-01 , f = -6235.16
iter = 2000, stepsize = 5.376e-03, rdiff = 3.697e-01 , f = -4645.9
iter = 3000, stepsize = 2.689e-05, rdiff = 7.334e-01 , f = -4740.57


ValueError: array must not contain infs or NaNs

In [21]:
x[-1]

1e-10

In [5]:
x, alpha_hist, Q_hist, time_hist = scopt(func_x,
          grad_x,
          hess_mult_x,
          hess_mult_vec_x,
          Mf,
          nu,
          prox_func,
          x0,  
          sc_params,                                              
          eps=terminate_tol,                                              
          print_every=1)

The value of nu is not valid


TypeError: cannot unpack non-iterable NoneType object

In [7]:
np.max((-1, 0))

1

In [28]:
np.tensordot(a[1], a[1], axes=0)

array([[ 9, 12],
       [12, 16]])