In [1]:
import numpy as np
from scipy.sparse.linalg import svds
from sklearn import datasets

In [2]:
def e(i, d):
    ei = np.zeros(d)
    ei[i] = 1
    return ei



def KWSA(F, w, c, d):
    """ 
    Kiefer-Wolfowitz stochastic approximation
    for gradient estimation 
    
    INPUT:
    - F: objective function
    - w: current weight
    - d: dimension
    - c: costant
    
    """

    F_wc = np.array([F(w + c * e(i, d)) for i in range(d)])
    return (F_wc - F(w)) / c



def detZFW(F, L, d, w0, r=1, T=100, eps=1e-5):
    """
    INPUT
    - F: loss function
    - L: Lip constant
    - d: dimension
    - w0: starting point
    - r: radius of the ball
    - T: max iteration
    - eps: tolerance
    """

    gamma = lambda t: 2/(t+2)
    c = lambda t: L*gamma(t)/d
    w = w0
    partial = 0
    for t in range(1, T):
        # comupute the gradient approx
        gt = KWSA(F, w, c(t), d)
        # compute the linear problem solution on the L1 Ball
        ei = e(np.argmax(np.abs(gt)), d)
        v = np.sign(-gt) * ei
        # compute step 
        w_pred = w
        w = (1 - gamma(t)) * w + gamma(t) * v
        partial += w
        loss_eval = F(w_pred) - F(w)
        print(f"Loss evaluation at time {t}:\t{loss_eval:.4f}\n")
        if loss_eval < eps: break # check stopping condition
    return F(w_pred), F(w), w, partial/T, t

## Stochastic lasso regression

In [3]:
# load data
X, y = datasets.load_svmlight_file("../Data/covtype.libsvm.binary.scale.bz2")

In [4]:
# space dimension
d = X.shape[1]
print(f"Space Dimensions\nd: {d}")
print(f"n: {y.shape[0]}")

Space Dimensions
d: 54
n: 581012


In [5]:
# define the objective function
F = lambda w: 0.5 * np.sum(np.power(y - X @ w, 2))

In [6]:
# initialize prarameters for the algorithm

# stating point - TODO: randomize
np.random.seed(1007)
w0 = np.random.rand(d)
w0 = w0/sum(w0)
#print(w0)
#print(F(w0))

# Lipschitz constant computation
L = 2/X.shape[0] * np.linalg.norm( X.T @ X )
#print(L/52)
L = 100 # which value consider???

In [7]:
fpred, f, w, mean, t = detZFW(F, L, d, w0)

Loss evaluation at time 1:	320442.8827

Loss evaluation at time 2:	57384.8266

Loss evaluation at time 3:	20408.6305

Loss evaluation at time 4:	9658.8938

Loss evaluation at time 5:	5356.1125

Loss evaluation at time 6:	3287.2771

Loss evaluation at time 7:	2165.7518

Loss evaluation at time 8:	1503.7615

Loss evaluation at time 9:	1087.3006

Loss evaluation at time 10:	811.9696

Loss evaluation at time 11:	622.5471

Loss evaluation at time 12:	487.8967

Loss evaluation at time 13:	389.5270

Loss evaluation at time 14:	315.9734

Loss evaluation at time 15:	259.8651

Loss evaluation at time 16:	216.3140

Loss evaluation at time 17:	181.9897

Loss evaluation at time 18:	154.5695

Loss evaluation at time 19:	132.3991

Loss evaluation at time 20:	114.2785

Loss evaluation at time 21:	99.3227

Loss evaluation at time 22:	86.8693

Loss evaluation at time 23:	76.4157

Loss evaluation at time 24:	67.5755

Loss evaluation at time 25:	60.0491

Loss evaluation at time 26:	53.6009

Loss evaluatio

In [None]:
print('F(w_pred) =', fpred, '\n',
      'F(w) =', f, '\n',
      'w =', w, '\n',
      'average w =', mean, '\n',
      'T =', t)