In [1]:
import os
import pickle
import numpy as np
from numpy import matlib
from sklearn.datasets import load_svmlight_file
import scfw.kl as kl
from scfw.frank_wolfe import frank_wolfe
from scfw.scopt import scopt
from scfw.prox_grad import prox_grad

In [2]:
data_folder = '../data'
results_folder = './results/'

## Run FW

In [3]:
def run_fw(problem_name):
    results = {problem_name: {}}
    results_file = os.path.join(results_folder,'kl',problem_name+'.pckl')
    results = {problem_name: {}}
    if os.path.isfile(results_file):
        with open(results_file, "rb") as f:
            results=pickle.load(f)
    W, y = load_svmlight_file(os.path.join(data_folder, problem_name))
    y = y.reshape(-1, 1)
    W = W.toarray()
    W = matlib.repmat(y, 1, W.shape[1]) * W
    sA = np.sum(W, 1)
    W[sA < 0, :] = -W[sA < 0, :]
    W = W[sA != 0, :]
    Bias = 1
    b = (Bias * y).squeeze()
    b = np.abs(b)
    if (b.any == 0):
        print(' Input parameter y error')
    y = b
    N, n = W.shape
    #lam =  np.sqrt(n)/2
    lam =  0.005
    Mf = 1
    nu = 4
    R = 30
    x0 = np.hstack((np.ones(n) / n, R))
    terminate_tol = 1e-15
    FW_params={
        'iter_FW': 50000,
        'line_search_tol': 1e-10,
    }
    prox_params={
        #parameters for SCOPT
        'iter_prox': 10000,
        'Lest': 'estimate',#,'estimate', #estimate L
        'bb_type': 3,
        #FISTA parameters
        'fista_type': 'fista',
        'fista_tol': 1e-5,
        'fista_iter': 1000,
        'btk_iters': 100,
        'backtracking': True
    }
    func_x = lambda x: kl.val(W, y, lam, x)
    func_beta = lambda x, s, beta, dot_product, dot_product_s: kl.val(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
    grad_x = lambda x, dot_product: kl.grad(W, y, lam, x, dot_product)
    grad_beta = lambda x, s, beta, dot_product, dot_product_s: kl.grad(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
    hess_mult_x = lambda x, dot_product: kl.hess_mult(W, y, lam, x, dot_product)
    hess_mult_vec_x = lambda s, dot_product: kl.hess_mult_vec(W, y, lam, s, dot_product)
    extra_func = lambda x: W @ x[:-1]
    linear_oracle = lambda grad, x: kl.linear_oracle(grad, x)
    prox_func = lambda s, L, x: kl.projection(s, x)
    #prox_func = lambda s, L: np.maximum(s, 0)
    policy_list = ['sc', 'backtracking', 'line_search']
    for policy in policy_list:
        x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                                func_beta,                                      
                                grad_x,
                                grad_beta,
                                hess_mult_x,
                                extra_func,
                                Mf,
                                nu,
                                linear_oracle,                                                    
                                x0,
                                FW_params,
                                hess=None, 
                                lloo_oracle=None,                                                 
                                alpha_policy=policy,                                                    
                                eps=terminate_tol, 
                                print_every=10000, 
                                debug_info=False)
        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
        }
    print('Prox grad started')
    x, alpha_hist, Q_hist, time_hist = prox_grad(func_x,
                grad_x,
                prox_func,
                Mf,
                x0,
                prox_params,
                eps=terminate_tol,
                print_every=1000)

    results[problem_name]['prox_grad'] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
        }
    with open(results_file, 'wb') as f:
        pickle.dump(results, f)

In [4]:
#data_list =  ['a1a','a2a','a3a', 'a4a', 'a5a', 'a6a','a7a','a8a','a9a']
data_list = ['a9a']
for problem_name in data_list:
    run_fw(problem_name)

********* Algorithm starts *********
iter=1, stepsize=1.72e-07, criterion=2.44e-07, upper_bound=-11682.119139451102, lower_bound=-2036881.5022619397, real_Gap=2.03e+06, f_val=-11682.119139451102
iter=10000, stepsize=2.74e-07, criterion=2.44e-07, upper_bound=-15609.138687206325, lower_bound=-1609421.7729409698, real_Gap=1.59e+06, f_val=-15609.138687206325
iter=20000, stepsize=4.37e-07, criterion=2.44e-07, upper_bound=-20357.404889443333, lower_bound=-1182847.947007085, real_Gap=1.16e+06, f_val=-20357.404889443333
iter=30000, stepsize=6.86e-07, criterion=2.44e-07, upper_bound=-25496.52284453637, lower_bound=-778516.7684981711, real_Gap=7.53e+05, f_val=-25496.52284453637
iter=40000, stepsize=1.09e-06, criterion=2.44e-07, upper_bound=-30156.646802934967, lower_bound=-384761.5069818148, real_Gap=3.55e+05, f_val=-30156.646802934967
iter=50000, stepsize=1.60e-05, criterion=2.44e-07, upper_bound=-32503.903266081146, lower_bound=-36564.05425510102, real_Gap=4.06e+03, f_val=-32503.903266081146
2

In [7]:
problem_name = 'a1a'
results = {problem_name: {}}
W, y = load_svmlight_file(os.path.join(data_folder, problem_name))
y = y.reshape(-1, 1)
W = W.toarray()

# multiplicative multiplication of the structure of the recognizable structure
W = matlib.repmat(y, 1, W.shape[1]) * W
# summation of feature descriptions
sA = np.sum(W, 1)
# if the sum of the string is negative, invert it
W[sA < 0, :] = -W[sA < 0, :]
# zero row deletion
W = W[sA != 0, :]
# W = scipy.sparse.csr_matrix(W)

Bias = 1
b = (Bias * y).squeeze()
b = np.abs(b)
if (b.any == 0):
    print(' Input parameter y error')

y = b
N, n = W.shape

In [20]:
#lam =  np.sqrt(N)/2
lam =  0.005
#first set of parameters
Mf = 1
nu = 4
R = 30
#running parameters
x0 = np.hstack((np.ones(n) / n, R))
terminate_tol = 1e-15

FW_params={
    'iter_FW': 50000,
    'line_search_tol': 1e-10,
}

prox_params={
        #parameters for SCOPT
        'iter_prox': 100,
        'Lest': 'estimate',#,'estimate', #estimate L
        'bb_type': 3,
        #FISTA parameters
        'fista_type': 'fista',
        'fista_tol': 1e-5,
        'fista_iter': 1000,
        'btk_iters': 100,
        'backtracking': False
    }

sc_params={
    #parameters for SCOPT
    'iter_SC': 1000,
    'Lest': 'estimate', #estimate L
    'use_two_phase': True,
    #FISTA parameters
    'fista_type': 'mfista',
    'fista_tol': 1e-5,
    'fista_iter': 100,
    #Conjugate Gradient Parameters
    'conj_grad_tol': 1e-2,
    'conj_grad_iter': 100,
}

func_x = lambda x: kl.val(W, y, lam, x)
func_beta = lambda x, s, beta, dot_product, dot_product_s: kl.val(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
grad_x = lambda x, dot_product: kl.grad(W, y, lam, x, dot_product)
grad_beta = lambda x, s, beta, dot_product, dot_product_s: kl.grad(W, y, lam, (1 - beta) * x + beta * s, (1 - beta) * dot_product + beta * dot_product_s)
hess_mult_x = lambda x, dot_product: kl.hess_mult(W, y, lam, x, dot_product)
hess_mult_vec_x = lambda s, dot_product: kl.hess_mult_vec(W, y, lam, s, dot_product)
extra_func = lambda x: W @ x[:-1]
linear_oracle = lambda grad, x: kl.linear_oracle(grad, x)
#prox_func = lambda s, L: kl.projection(s) #used for SCOPT
prox_func = lambda s, L: np.maximum(s, 0) #used for SCOPT

In [9]:
policy = 'sc'
x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                           func_beta,                                      
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=10000, 
                           debug_info=False)

********* Algorithm starts *********
iter=1, stepsize=3.64e-06, criterion=5.15e-06, upper_bound=-588.6651794834256, lower_bound=-98132.92996543148, real_Gap=9.75e+04, f_val=-588.6651794834256
iter=10000, stepsize=3.73e-05, criterion=4.99e-06, upper_bound=-1604.7511456237717, lower_bound=-1605.337481925639, real_Gap=5.86e-01, f_val=-1604.7511456237717
iter=20000, stepsize=3.28e-05, criterion=2.94e-06, upper_bound=-1604.8556047801235, lower_bound=-1605.1980990404174, real_Gap=3.42e-01, f_val=-1604.8556047801235
iter=30000, stepsize=3.14e-06, criterion=2.94e-06, upper_bound=-1604.9052213473462, lower_bound=-1605.116852464407, real_Gap=2.12e-01, f_val=-1604.9052213473462
iter=40000, stepsize=5.36e-05, criterion=2.94e-06, upper_bound=-1604.9330412087918, lower_bound=-1605.0711389655385, real_Gap=1.38e-01, f_val=-1604.9330412087918
iter=50000, stepsize=1.19e-05, criterion=2.94e-06, upper_bound=-1604.949836777485, lower_bound=-1605.0575573570634, real_Gap=1.08e-01, f_val=-1604.949836777485
9.

In [10]:
x[-1]

5.949932622227108

In [24]:
def proj_simplex(y):
    ind = np.argsort(y)
    sum_y = sum(y)
    origin_y = sum_y
    n = len(y)
    Py = y.copy()
    for i in range(n):
        t = (sum_y - 1) / (n - i)
        if (origin_y > 1 and t < 0): #for numerical errors
            sum_y = sum(y[ind[i : n - 1]])
            t = (sum_y - 1) / (n - i)
        if i > 0:
            if t <= y[ind[i]] and t >= y[ind[i - 1]]:
                break
        elif t <= y[ind[i]]:
            break
        sum_y -= y[ind[i]]
        Py[ind[i]] = 0
    Py = np.maximum(y - t, np.zeros(n))
    return Py

def projection(y):
    t = y[-1]
    y = y[:-1]
    P_y = proj_simplex(y)
    P_y = P_y * t
    return np.hstack((P_y, np.max((t, 0))))
    #return np.hstack((P_y, np.abs(t)))

prox_func = lambda s, L: projection(s) #used for SCOPT

x, alpha_hist, Q_hist, time_hist = prox_grad(func_x,
                grad_x,
                prox_func,
                Mf,
                x0,
                prox_params,
                eps=terminate_tol,
                print_every=10)

iter =    1, stepsize = 1.956e-03, rdiff = 9.997e-01 , f = -588.665
iter =   10, stepsize = 4.184e-04, rdiff = 7.473e-01 , f = -979.745
iter =   20, stepsize = 5.456e-04, rdiff = 6.355e-01 , f = -1217.7
iter =   30, stepsize = 6.510e-04, rdiff = 5.904e-01 , f = -1408.02
iter =   40, stepsize = 7.732e-04, rdiff = 5.092e-01 , f = -1535.91
iter =   50, stepsize = 1.020e-03, rdiff = 3.831e-01 , f = -1588.39
iter =   60, stepsize = 1.417e-02, rdiff = 2.923e-01 , f = -1603.08
iter =   70, stepsize = 4.112e-02, rdiff = 3.201e-01 , f = -1583.54
iter =   80, stepsize = 1.307e-01, rdiff = 4.112e-01 , f = -1525.26
iter =   90, stepsize = 1.601e-01, rdiff = 9.727e-01 , f = -885.107
iter =  100, stepsize = 4.050e-02, rdiff = 9.810e-01 , f = -1481.8
0.0009975433349609375


In [25]:
x[-1]

29.941674449072963

In [23]:
R

30

In [15]:
x, alpha_hist, Q_hist, time_hist = scopt(func_x,
          grad_x,
          hess_mult_x,
          hess_mult_vec_x,
          Mf,
          nu,
          prox_func,
          x0,  
          sc_params,                                              
          eps=terminate_tol,                                              
          print_every=1)

The value of nu is not valid


TypeError: cannot unpack non-iterable NoneType object

In [7]:
np.max((-1, 0))

1

In [28]:
np.tensordot(a[1], a[1], axes=0)

array([[ 9, 12],
       [12, 16]])