In [1]:
%load_ext autoreload
%autoreload 2

In [13]:
import os
import pickle
import scipy.io
import numpy as np
import time

from scfw.scopt import scopt
from scfw import log_reg as lr
from scfw.frank_wolfe import frank_wolfe
import matplotlib.pyplot as plt
from sklearn.datasets import load_svmlight_file

In [44]:
data_folder = 'data'

problem_name = 'a4a'
Phi, y = load_svmlight_file(os.path.join(data_folder, problem_name))
N, n = Phi.shape
Phi.shape, y.shape

((4781, 122), (4781,))

In [46]:
data_folder = 'data'

problem_name = 'a9a'
Phi, y = load_svmlight_file(os.path.join(data_folder, problem_name))
N, n = Phi.shape
Phi.shape, y.shape

((32561, 123), (32561,))

In [45]:
Phi

<4781x122 sparse matrix of type '<class 'numpy.float64'>'
	with 66290 stored elements in Compressed Sparse Row format>

In [8]:
results_folder = './results/'
results_file=os.path.join(results_folder,'log_reg',problem_name+'.pckl')
results = {problem_name: {}}
if os.path.isfile(results_file):
    with open(results_file, "rb") as f:
        try:
            results=pickle.load(f)
        except Exception: # so many things could go wrong, can't be more specific.
            pass 

Normalize

In [14]:
N, n = Phi.shape

In [39]:
for i, row in enumerate(Phi):
    if np.sum(row.multiply(row)) != 0:
        Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

CPU times: user 38.8 s, sys: 6.37 ms, total: 38.9 s
Wall time: 38.9 s


In [4]:
gamma = 1 / 2*np.sqrt(N)
Mf = N/gamma*np.max(np.sqrt(np.sum(Phi.multiply(Phi),axis=1)))
nu = 3
mu = 0


#running parameters
x0 = np.zeros(n)
r = n*0.05
terminate_tol = 1e-20

#parameters for FW
FW_params={
    'iter_FW':50000,
    'line_search_tol':1e-10,
    'rho':np.sqrt(n), #parameters for ll00
    'diam_X':2,
    'sigma_f':1,
}


sc_params={
    #parameters for SCOPT
    'iter_SC': 1000,
    'Lest': 'backtracking',#,'estimate', #estimate L
    'use_two_phase':False,
    #FISTA parameters
    'fista_type': 'mfista',
    'fista_tol': 1e-5,
    'fista_iter': 1000,
    #Conjugate Gradient Parameters
    'conj_grad_tol':1e-5,
    'conj_grad_iter':1000,
}

## Auxilary functions

In [5]:
func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma)
func_beta = lambda x, s, beta, exp_product, exp_product_s:lr.log_reg(Phi, y, (1 - beta) * x + beta * s, mu,gamma,np.exp(np.log(exp_product)*(1-beta)+np.log(exp_product_s)*beta))
grad_x = lambda x, exp_product: lr.grad_log_reg(Phi, y, x,  mu, gamma, exp_product)
grad_beta = lambda x, s, beta, exp_product, exp_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * exp_product + beta * exp_product_s)
hess_x = lambda s, exp_product: lr.hess(Phi, y, mu, gamma, exp_product,s)
hess_mult_x = lambda s, exp_product: lr.hess_mult_log_reg(Phi, y, mu, gamma, exp_product,s)
hess_mult_vec_x = lambda s, exp_product: lr.hess_mult_vec(Phi, y,mu, gamma, exp_product,s)
extra_func = lambda x: np.exp(-y*(Phi @ x+mu))
linear_oracle = lambda grad: lr.linear_oracle_l1(grad, r)
# llo_oracle = lambda x, r, grad, rho: pr.llo_oracle(x, r, grad,rho)
prox_func = lambda x, L: lr.projection_l1(x,r)

## Run FW

In [1]:
files = ['a4a','w4a','a1a','a2a','a3a','a5a','a6a','a7a','a8a','a9a','w1a','w2a','w3a','w5a','w6a','w7a','w8a']
def run_fw(problem_name):
    out_dir = os.path.join('results', 'log_reg')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    results_file = os.path.join(out_dir, problem_name + '.pckl')
    if os.path.exists(results_file):
        with open(results_file, 'rb') as f:
            results = pickle.load(f)
    else:
        results = {problem_name: {}}

    Phi, y = load_svmlight_file(os.path.join('data', problem_name))

    # fix classes
    if max(y) == 2:
        y = 2 * y - 3

    N, n = Phi.shape

    # normalize
    for i, row in enumerate(Phi):
        if np.sum(row.multiply(row)) != 0:
            Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

    gamma = 1 / 2 * np.sqrt(N)
    Mf = 1/gamma*np.max(np.sqrt(np.sum(Phi.multiply(Phi),axis=1)))
    nu = 3
    mu = 0
    
    
    #running parameters
    x0 = np.zeros(n)
    r = n*0.05
    terminate_tol = 1e-20
    
    #parameters for FW
    FW_params={
        'iter_FW':50000,
        'line_search_tol':1e-10,
        'rho':np.sqrt(n), #parameters for ll00
        'diam_X':2,
        'sigma_f':1,                   
    }
    
    
    sc_params={
        #parameters for SCOPT
        'iter_SC': 1000,
        'Lest': 'backtracking',#,'estimate', #estimate L
        'use_two_phase':False,
        #FISTA parameters
        'fista_type': 'mfista',
        'fista_tol': 1e-5,
        'fista_iter': 1000,
        #Conjugate Gradient Parameters
        'conj_grad_tol':1e-5,
        'conj_grad_iter':1000,
    }

    func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma)
    func_beta = lambda x, s, beta, exp_product, exp_product_s:lr.log_reg(Phi, y, (1 - beta) * x + beta * s, mu,gamma,np.exp(np.log(exp_product)*(1-beta)+np.log(exp_product_s)*beta))
    grad_x = lambda x, exp_product: lr.grad_log_reg(Phi, y, x,  mu, gamma, exp_product)
    grad_beta = lambda x, s, beta, exp_product, exp_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * exp_product + beta * exp_product_s)
    hess_x = lambda s, exp_product: lr.hess(Phi, y, mu, gamma, exp_product,s)
    hess_mult_x = lambda s, exp_product: lr.hess_mult_log_reg(Phi, y, mu, gamma, exp_product,s)
    hess_mult_vec_x = lambda s, exp_product: lr.hess_mult_vec(Phi, y,mu, gamma, exp_product,s)
    extra_func = lambda x: np.exp(-y*(Phi @ x+mu))
    linear_oracle = lambda grad: lr.linear_oracle_l1(grad, r)
    # llo_oracle = lambda x, r, grad, rho: pr.llo_oracle(x, r, grad,rho)
    prox_func = lambda x, L: lr.projection_l1(x,r)

    run_alpha_policies = ["backtracking", "standard", "line_search", "icml"]
#     run_alpha_policies = ['icml']

    for policy in run_alpha_policies:
        print(f'{policy} for {problem_name} started!')
        x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                           func_beta,                                      
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=50000, 
                           debug_info=False)
  
        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Gap_hist': Gap_hist,
            'Q_hist': Q_hist,
            'time_hist': time_hist,
        }

        with open(results_file, 'wb') as f:
            pickle.dump(results, f)    

        print(f'{policy} for {problem_name} finished!')
    return results