In [1]:
import numpy as np
import pickle
from sklearn.datasets import load_svmlight_file
import matplotlib.pyplot as plt
from scipy.io import savemat

from scfw.scopt import scopt
from scfw.prox_grad import prox_grad
from scfw import dwd
from scfw.frank_wolfe import frank_wolfe

In [2]:
def run_fw(problem_name):
    out_dir = os.path.join('results', 'dwd')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    results_file = os.path.join(out_dir, problem_name + '.pckl')
    if os.path.exists(results_file):
        with open(results_file, 'rb') as f:
            results = pickle.load(f)
    else:
        results = {problem_name: {}}
    A, y = load_svmlight_file(os.path.join('../data', problem_name))
    # fix classes
    if max(y) == 2:
        y = 2 * y - 3
    # normalize
    for i, row in enumerate(A):
        if np.sum(row.multiply(row)) != 0:
            A[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

    p, d = A.shape
    n = p + d + 1
    q = 2
    c = np.array([1] * p)

    E = np.eye(p)
    max_norm = max([np.linalg.norm(np.hstack((A[i].toarray().flatten(), y[i], E[i])))**(q/(q + 2)) for i in range(p)])
    Mf = (q + 2) / (q * (q + 1))**(1 / (q + 2)) * n**(1 / (q + 2)) * max_norm
    nu = 2 * (q + 3) / (q + 2)
    #nu = 3
    
    #running parameters
    R = 10
    R_w = 10
    u = 5
    x0 = np.array([0]*(d + 1) + [1 / p]*p)
    terminate_tol = 1e-15
    
    #savemat('./a1a.mat', {'What': A, 'yhat': y})

    #parameters for FW
    FW_params={
        'iter_FW':30000,
        'line_search_tol':1e-10,
        'rho':np.sqrt(n), #parameters for ll00
        'diam_X':2,
        'sigma_f':1,                   
    }
    
    
    sc_params={
        #parameters for SCOPT
        'iter_SC': 1000,
        'Lest': 'estimate',#,'estimate', #estimate L
        'use_two_phase':False,
        #FISTA parameters
        'fista_type': 'mfista',
        'fista_tol': 1e-5,
        'fista_iter': 5000,
        #Conjugate Gradient Parameters
        'conj_grad_tol':1e-5,
        'conj_grad_iter':1000,
    }

    prox_params={
        #parameters for SCOPT
        'iter_prox': 10000,
        'Lest': 'estimate',#,'estimate', #estimate L
        'bb_type': 3,
        #FISTA parameters
        'fista_type': 'fista',
        'fista_tol': 1e-5,
        'fista_iter': 1000,
        'btk_iters': 100,
        'backtracking': False
    }

    func_x = lambda x: dwd.dwd_val(A, y, c, x, n, d, p, q)
    func_beta = lambda x, s, beta, denom, denom_s: dwd.dwd_val(A, y, c, (1 - beta) * x + beta * s, n, d, p, q, (1 - beta) * denom + beta * denom_s)
    grad_x = lambda x, denom: dwd.dwd_grad(A, y, c, x, n, d, p, q, denom=denom)
    grad_beta = lambda x, s, beta, denom, denom_s: dwd.dwd_grad(A, y, c, (1 - beta) * x + beta * s, n, d, p, q, (1 - beta) * denom + beta * denom_s)
    hess_x = lambda s, exp_product: dwd.hess()
    hess_mult_x = lambda s, denom: dwd.hess_mult(A, y, c, s, n, d, p, q, denom=denom)
    hess_mult_vec_x = lambda s, denom: dwd.hess_mult_vec(A, y, c, s, n, d, p, q, denom=denom)
    extra_func = lambda x: A @ x[:d] + x[d] * y + x[(d + 1):]
    linear_oracle = lambda grad: dwd.linear_oracle(grad, d, p, R_w, R, u)
    # llo_oracle = lambda x, r, grad, rho: pr.llo_oracle(x, r, grad,rho)
    prox_func = lambda x, L: dwd.projection(x, n, d, p, R_w, R, u)

#    run_alpha_policies = ['standard', 'line_search', 'backtracking', 'sc']
    run_alpha_policies = []

    for policy in run_alpha_policies:
        print(f'{policy} for {problem_name} started!')
        x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                           func_beta,                                      
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=10000, 
                           debug_info=False)
  
        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Gap_hist': Gap_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
        }
        print('norm: %f' % np.linalg.norm(x[(d + 1):]))
    
#    x, alpha_hist, Q_hist, time_hist = scopt(func_x,
#        grad_x,
#        hess_mult_x,
#        hess_mult_vec_x,
#        Mf,
#        nu,
#        prox_func,
#        x0,  
#        sc_params,                                              
#        eps=terminate_tol,                                              
#        print_every=10,
#        linear_oracle=linear_oracle)

    x, alpha_hist, Q_hist, time_hist = prox_grad(func_x,
                grad_x,
                prox_func,
                Mf,
                x0,
                prox_params,
                eps=terminate_tol,
                print_every=1000)

    results[problem_name]['prox_grad'] = {
        'x': x,
        'alpha_hist': alpha_hist,
        'Q_hist': Q_hist,
        'time_hist': time_hist,
    }
        
#    results[problem_name]['scopt'] = {
#        'x': x,
#        'alpha_hist': alpha_hist,
#        'Q_hist': Q_hist,
#        'time_hist': time_hist,
#    }
#
    with open(results_file, 'wb') as f:
        pickle.dump(results, f)    

#    print(f'{policy} for {problem_name} finished!')
    return results

In [4]:
files = ['a1a','a2a','a3a', 'a4a', 'a5a', 'a6a','a7a','a8a','a9a']

for problem_name in files:
    results = run_fw(problem_name)

iter =    1, stepsize = 6.999e-09, rdiff = 1.498e+01 , f = 2.57603e+06
iter = 1000, stepsize = 8.857e-05, rdiff = 9.367e-01 , f = 340.69
iter = 2000, stepsize = 3.795e-02, rdiff = 1.153e+00 , f = -304.782
iter = 3000, stepsize = 3.658e-01, rdiff = 3.096e-02 , f = -394.156
iter = 4000, stepsize = 3.662e-01, rdiff = 3.093e-02 , f = -394.158
iter = 5000, stepsize = 3.662e-01, rdiff = 3.093e-02 , f = -394.158
iter = 6000, stepsize = 3.662e-01, rdiff = 3.093e-02 , f = -394.158
iter = 7000, stepsize = 3.662e-01, rdiff = 3.093e-02 , f = -394.158
iter = 8000, stepsize = 3.662e-01, rdiff = 3.093e-02 , f = -394.158
iter = 9000, stepsize = 3.662e-01, rdiff = 3.093e-02 , f = -394.158
iter = 10000, stepsize = 3.662e-01, rdiff = 3.093e-02 , f = -394.158
0.0010097026824951172
iter =    1, stepsize = 3.209e-09, rdiff = 1.499e+01 , f = 5.13023e+06
iter = 1000, stepsize = 6.369e-04, rdiff = 1.486e+01 , f = 232.533
iter = 2000, stepsize = 1.000e+00, rdiff = 1.082e-02 , f = -475.892
iter = 3000, stepsize 