In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle
import scipy.io
import numpy as np
import time

from scfw.scopt import scopt
from scfw import log_reg as lr
from scfw.frank_wolfe import frank_wolfe
import matplotlib.pyplot as plt
from sklearn.datasets import load_svmlight_file

In [3]:
data_folder = '../data'

problem_name = 'a5a'
Phi, y = load_svmlight_file(os.path.join(data_folder, problem_name))
N, n = Phi.shape
Phi.shape, y.shape

((6414, 122), (6414,))

In [4]:
#data_folder = 'data'

#problem_name = 'a9a'
#Phi, y = load_svmlight_file(os.path.join(data_folder, problem_name))
#N, n = Phi.shape
#Phi.shape, y.shape

In [5]:
Phi

<6414x122 sparse matrix of type '<class 'numpy.float64'>'
	with 88939 stored elements in Compressed Sparse Row format>

In [6]:
results_folder = './results/'
results_file=os.path.join(results_folder,'log_reg',problem_name+'.pckl')
results = {problem_name: {}}
if os.path.isfile(results_file):
    with open(results_file, "rb") as f:
        try:
            results=pickle.load(f)
        except Exception: # so many things could go wrong, can't be more specific.
            pass 

Normalize

In [7]:
N, n = Phi.shape

In [8]:
for i, row in enumerate(Phi):
    if np.sum(row.multiply(row)) != 0:
        Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

In [9]:
gamma = 1 / (np.sqrt(N)) 
Mf = np.max(np.sqrt(np.sum(Phi.multiply(Phi),axis=1)))/np.sqrt(gamma)
mult = N
Mf = Mf/np.sqrt(mult)
nu = 3
mu = 0


#running parameters
x0 = np.zeros(n)
r = n*0.05
terminate_tol = 1e-20

#parameters for FW
FW_params={
    'iter_FW':50000,
    'line_search_tol':1e-10,
    'rho':np.sqrt(n), #parameters for ll00
    'diam_X':2,
    'sigma_f':1,
}


sc_params={
    #parameters for SCOPT
    'iter_SC': 1000,
    'Lest': 'backtracking',#,'estimate', #estimate L
    'use_two_phase':False,
    #FISTA parameters
    'fista_type': 'mfista',
    'fista_tol': 1e-5,
    'fista_iter': 1000,
    #Conjugate Gradient Parameters
    'conj_grad_tol':1e-5,
    'conj_grad_iter':1000,
}

## Auxilary functions

In [10]:
func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma,mult)
func_beta = lambda x, s, beta, exp_product, exp_product_s:lr.log_reg(Phi, y, (1 - beta) * x + beta * s, mu,gamma,mult,np.exp(np.log(exp_product)*(1-beta)+np.log(exp_product_s)*beta))
grad_x = lambda x, exp_product: lr.grad_log_reg(Phi, y, x,  mu, gamma, exp_product,mult)
grad_beta = lambda x, s, beta, exp_product, exp_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * exp_product + beta * exp_product_s,mult)
hess_x = lambda s, exp_product: lr.hess(Phi, y, mu, gamma, exp_product,s,mult)
hess_mult_x = lambda s, exp_product: lr.hess_mult_log_reg(Phi, y, mu, gamma, exp_product,s,mult)
hess_mult_vec_x = lambda s, exp_product: lr.hess_mult_vec(Phi, y,mu, gamma, exp_product,s,mult)
extra_func = lambda x: np.exp(-y*(Phi @ x+mu))
linear_oracle = lambda grad: lr.linear_oracle_l1(grad, r)
# llo_oracle = lambda x, r, grad, rho: pr.llo_oracle(x, r, grad,rho)
prox_func = lambda x, L: lr.projection_l1(x,r)

## Run FW

In [16]:
files = ['a5a','a4a','w4a','a1a','a2a','a3a','a6a','a7a','a8a','a9a','w1a','w2a','w3a','w5a','w6a','w7a','w8a']
def run_fw(problem_name):
    out_dir = os.path.join('results', 'log_reg')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    results_file = os.path.join(out_dir, problem_name + '.pckl')
    if os.path.exists(results_file):
        with open(results_file, 'rb') as f:
            results = pickle.load(f)
    else:
        results = {problem_name: {}}

    Phi, y = load_svmlight_file(os.path.join('../data', problem_name))

    # fix classes
    if max(y) == 2:
        y = 2 * y - 3

    N, n = Phi.shape

    # normalize
    for i, row in enumerate(Phi):
        if np.sum(row.multiply(row)) != 0:
            Phi[i] = row.multiply(1 / np.sqrt(np.sum(row.multiply(row))))

    gamma = 1 / (np.sqrt(N)) 
    Mf = np.max(np.sqrt(np.sum(Phi.multiply(Phi),axis=1)))/np.sqrt(gamma)
    mult = 1
    Mf = Mf/np.sqrt(mult)
    nu = 3
    mu = 0
    
    gamma = 1 / (np.sqrt(N)) 
    Mf = np.max(np.sqrt(np.sum(Phi.multiply(Phi),axis=1)))
    nu = 2
    mult = 1/10
    Mf = Mf*mult
    
    #running parameters
    x0 = np.zeros(n)
    r = n*0.05
    terminate_tol = 1e-20
    
    #parameters for FW
    FW_params={
        'iter_FW':50000,
        'line_search_tol':1e-10,
        'rho':np.sqrt(n), #parameters for ll00
        'diam_X':2,
        'sigma_f':1,                   
    }
    
    
    sc_params={
        #parameters for SCOPT
        'iter_SC': 1000,
        'Lest': 'backtracking',#,'estimate', #estimate L
        'use_two_phase':False,
        #FISTA parameters
        'fista_type': 'mfista',
        'fista_tol': 1e-5,
        'fista_iter': 1000,
        #Conjugate Gradient Parameters
        'conj_grad_tol':1e-5,
        'conj_grad_iter':1000,
    }

    func_x = lambda x: lr.log_reg(Phi, y, x, mu, gamma,mult)
    func_beta = lambda x, s, beta, exp_product, exp_product_s:lr.log_reg(Phi, y, (1 - beta) * x + beta * s, mu,gamma,mult,np.exp(np.log(exp_product)*(1-beta)+np.log(exp_product_s)*beta))
    grad_x = lambda x, exp_product: lr.grad_log_reg(Phi, y, x,  mu, gamma, exp_product,mult)
    grad_beta = lambda x, s, beta, exp_product, exp_product_s: lr.grad_log_reg(Phi, y, (1 - beta) * x + beta * s, mu, gamma, (1 - beta) * exp_product + beta * exp_product_s,mult)
    hess_x = lambda s, exp_product: lr.hess(Phi, y, mu, gamma, exp_product,s,mult)
    hess_mult_x = lambda s, exp_product: lr.hess_mult_log_reg(Phi, y, mu, gamma, exp_product,s,mult)
    hess_mult_vec_x = lambda s, exp_product: lr.hess_mult_vec(Phi, y,mu, gamma, exp_product,s,mult)
    extra_func = lambda x: np.exp(-y*(Phi @ x+mu))
    linear_oracle = lambda grad: lr.linear_oracle_l1(grad, r)
    # llo_oracle = lambda x, r, grad, rho: pr.llo_oracle(x, r, grad,rho)
    prox_func = lambda x, L: lr.projection_l1(x,r)

    run_alpha_policies = ["backtracking", "standard", "line_search", "icml"]
    run_alpha_policies = ["icml"]

    for policy in run_alpha_policies:
        print(f'{policy} for {problem_name} started!')
        x, alpha_hist, Gap_hist, Q_hist, time_hist = frank_wolfe(func_x,
                           func_beta,                                      
                           grad_x,
                           grad_beta,
                           hess_mult_x,
                           extra_func,
                           Mf,
                           nu,
                           linear_oracle,                                                    
                           x0,
                           FW_params,
                           hess=None, 
                           lloo_oracle=None,                                                 
                           alpha_policy=policy,                                                    
                           eps=terminate_tol, 
                           print_every=1000, 
                           debug_info=False)
  
        results[problem_name][policy] = {
            'x': x,
            'alpha_hist': alpha_hist,
            'Gap_hist': [z / mult for z in Gap_hist],
            'Q_hist': [z / mult for z in Q_hist],
            'time_hist': time_hist,
        }

        with open(results_file, 'wb') as f:
            pickle.dump(results, f)    

        print(f'{policy} for {problem_name} finished!')
    return results


for problem_name in files:
    run_fw(problem_name)

icml for a5a started!
********* Algorithm starts *********
iter = 1, stepsize = 0.0009708048255842849, criterion = 9.999999999999999e-11, upper_bound=0.06931471805599453, lower_bound=0.026184488959438555, real_Gap=0.04313022909655597
iter = 1000, stepsize = 0.0003590866980244067, criterion = 9.999999999999999e-11, upper_bound=0.05382585337628295, lower_bound=0.05371606522050467, real_Gap=0.00010978815577827872
iter = 2000, stepsize = 0.0007043653987057228, criterion = 9.999999999999999e-11, upper_bound=0.053787812892409755, lower_bound=0.05373468946652804, real_Gap=5.312342588171398e-05
iter = 3000, stepsize = 0.00018970004727968074, criterion = 9.999999999999999e-11, upper_bound=0.053774283731557376, lower_bound=0.05373762950677946, real_Gap=3.665422477791558e-05
iter = 4000, stepsize = 0.00016965194746109758, criterion = 9.999999999999999e-11, upper_bound=0.05376731694304199, lower_bound=0.05374005785362956, real_Gap=2.7259089412430548e-05
iter = 5000, stepsize = 0.000143302408384065

iter = 46000, stepsize = 6.630734807850067e-05, criterion = 9.999999999999999e-11, upper_bound=0.05374729597409959, lower_bound=0.05374489250204521, real_Gap=2.4034720543797405e-06
iter = 47000, stepsize = 1.0723785589618333e-05, criterion = 9.999999999999999e-11, upper_bound=0.05374725345757777, lower_bound=0.05374489250204521, real_Gap=2.3609555325557197e-06
iter = 48000, stepsize = 3.62891115344233e-05, criterion = 9.999999999999999e-11, upper_bound=0.05374721283300403, lower_bound=0.05374489250204521, real_Gap=2.320330958816008e-06
iter = 49000, stepsize = 1.0366058196606922e-05, criterion = 9.999999999999999e-11, upper_bound=0.053747173822332, lower_bound=0.05374489250204521, real_Gap=2.2813202867899562e-06
iter = 50000, stepsize = 1.0326153691706996e-05, criterion = 9.999999999999999e-11, upper_bound=0.05374713638832212, lower_bound=0.05374489250204521, real_Gap=2.243886276905027e-06
93.12467074394226
icml for a5a finished!


FileNotFoundError: [Errno 2] No such file or directory: '../data\\a4a'