In [0]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.io import loadmat
from tqdm import tqdm

def load_data(filename='census_data.mat', test_size=0.33):
    data = loadmat(filename)
    X = data.get('A')  # Educational Attainment
    X = np.asarray(X, dtype='float64')
    # Variables
#    Sex
#    Age in 30 40
#    Age in 40 50
#    Age in 50 60
#    Age in 60 70
#    Age gte 70
#    Non white*
#    Unmarried
#    Education
#    Education code squared
    # Education
    #00 Not in universe (Under 3 years)
    #01 No schooling completed
    #02 Nursery school to 4th grade
    #03 5th grade or 6th grade
    #04 7th grade or 8th grade
    #05 9th grade
    #06 10th grade
    #07 11th grade
    #08 12th grade, no diploma
    #09 High school graduate
    #10 Some college, but less than 1 year
    #11 One or more years of college, no degree
    #12 Associate degree
    #13 Bachelorâ€™s degree
    #14 Masterâ€™s degree
    #15 Professional degree
    #16 Doctorate degree
    y = data.get('b')[:, 0].reshape(-1, 1)
    del data
    ind = np.nonzero(y == 0.)[0]  # remove samples with no income
    X = np.delete(X, ind, axis=0)
    y = np.delete(y, ind, axis=0)
    X = np.delete(X, 1, axis=1)  # this variable is just ones everywhere

    scaler_x = StandardScaler()
    scaler_y = StandardScaler()

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                test_size=test_size, random_state=0)
    X_train = scaler_x.fit_transform(X_train)
    y_train = scaler_y.fit_transform(y_train)
    X_test = scaler_x.transform(X_test)
    y_test = scaler_y.transform(y_test)

    return X_train, X_test, y_train, y_test

In [6]:
!wget https://perso.telecom-paristech.fr/ofercoq/tp_qr/census_data.mat

--2019-12-21 19:30:11--  https://perso.telecom-paristech.fr/ofercoq/tp_qr/census_data.mat
Resolving perso.telecom-paristech.fr (perso.telecom-paristech.fr)... 137.194.2.165, 2001:660:330f:2::a5
Connecting to perso.telecom-paristech.fr (perso.telecom-paristech.fr)|137.194.2.165|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 33091048 (32M)
Saving to: ‘census_data.mat’


2019-12-21 19:30:20 (4.05 MB/s) - ‘census_data.mat’ saved [33091048/33091048]



In [0]:
X_train, X_test, y_train, y_test = load_data("census_data.mat")

In [0]:
def pinball_loss(x, tau):
  return tau*x*(x>=0) + (tau-1)*x*(x<=0)

def L_tau(Xw,y, tau):
  return np.sum(pinball_loss(y-Xw, tau))

def g_fn(w, alpha):
  return alpha * 0.5 * np.linalg.norm(w[1:], ord=2)**2

def grad_g(w, alpha):
  return alpha * np.concatenate(([0], w[1:]), axis=0)

def prox_gamma_L_tau(x, tau, gamma,):
  res1 = (gamma-1)/gamma * x * ((x>=gamma*(tau-1)) & (x<=gamma*tau))
  res2 = (x-tau) * (x>gamma*tau)
  res3 = (x-tau+1) * (x<gamma*(tau-1))
  return res1+res2+res3


In [0]:
def vu_condat(X, y, tau, alpha, sigma, gamma=None,  w_init=None, lambda_init=None, n_iter=10000):
  n = X.shape[0]
  n_features = X.shape[1]
  w = w_init or np.random.normal(size=(n_features+1))
  lambd = lambda_init or np.random.normal(size=(n,))
  M = np.concatenate((np.ones((n,1)), X), axis=1)

  if not gamma:
    gamma =  (1/2 + np.linalg.norm(M)/sigma)   # theorem for convergence of Vu Condat
  for i in range(n_iter):
    w_next = w - 1./gamma * (grad_g(w,alpha) + M.T@lambd)
    v = M@(2*w_next-w)
    x = lambd+1./sigma*v
    prox = prox_gamma_L_tau(y-x, tau, sigma)
    lambd_next = lambd - sigma * ( v  - y + prox)

    w = w_next
    lambd = lambd_next
    if i%100==0:
      print(L_tau(M@w,y, tau))
      #print(lambd)
      #print(w)




In [0]:
vu_condat(X_train, np.squeeze(y_train), 0.3, 0.5,0.1)

In [0]:
def ADMM(X, y, tau, alpha, gamma=None,  w_init=None, lambda_init=None, u_init=None, n_iter=1000):
  n = X.shape[0]
  n_features = X.shape[1]
  M = np.concatenate((np.ones((n,1)), X), axis=1)
  e = np.ones((n))

  w = w_init or np.random.normal(size=(n_features+1))
  lambd_curr = lambda_init or np.random.normal(size=(n,))
  u_curr = u_init or np.random.normal(size=(n,))

  for i in range(n_iter):
    
    w1 = np.linalg.inv(gamma*X.T@X + alpha*np.eye(n_features)) @ X.T @ (gamma * y - lambd_curr - gamma * u_curr)
    w0 = 1./(n*gamma) * e.T@(gamma*y - lambd_curr - gamma*u_curr)
    w = np.concatenate(([w0], w1), axis=0)

    u = prox_gamma_L_tau(y-M@w-1./gamma*lambd_curr, tau, 1./gamma)

    lambd = lambd_curr + gamma*(M@w-y+u)

    u_curr = u
    lambd_curr = lambd

    if i %100==0:
      print(np.mean((M@w-y)**2))


In [84]:
ADMM(X_train, np.squeeze(y_train), 0.3, 0.5, 17000,n_iter=10000)

0.7054929266759229
1.7623012634374104


KeyboardInterrupt: ignored

In [24]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [0]:
from sklearn.linear_model import LinearRegression

In [0]:
model = LinearRegression()

In [0]:
reg = model.fit(X_train, y_train)

In [87]:
np.mean((reg.predict(X_train)-y_train)**2)

0.705490614781552