In [7]:
import pandas as pd
import numpy as np
# !pip install mip
from mip import Model, xsum, BINARY, minimize,CONTINUOUS, CBC, ConstrsGenerator

In [8]:
def regobj(x, y, s, gamma, k):
#   print('check', s[0] == 1)
  indices = [i for i in range(len(s)) if s[i] >= 0.5] #getting the indices of the currently chosen binary variables
  n = y.shape[0] #getting the number of observations in y
  denom = 2 * n #simplifying computations for later
#   print('\ncur indices', indices)
#   print('here s', s)
  Xs = x[:, indices] #only keeping x features that are chosen from the binary constraints
  
  #finding the akpha value for ease of later computation as well
#   print('x', x.shape)
#   print('xs', Xs.shape)
#   xt = np.transpose(Xs)
#   x_x = np.matmul(xt, Xs)
#   ident = np.identity(len(indices)) / gamma
# #   print('ident', ident.shape)
# #   print('x x', x_x.shape)
#   together = ident + x_x
#   inverse = np.linalg.inv(together)
#   x_y = np.matmul(xt, y)
# #   print('inverse', inverse.shape)
# #   print('x y ', x_y.shape)
#   first_mult = np.matmul(inverse, x_y)
#   second_mult = np.matmul(Xs, first_mult)
#   alpha = y - second_mult
  
  alpha = y - np.matmul(Xs, \
                        np.matmul(np.linalg.inv(np.identity(len(indices)) / gamma + np.matmul(np.transpose(Xs), Xs)),\
                                  np.matmul(np.transpose(Xs), y)))

  #finding the current objective function value
  obj_val = (np.dot(np.transpose(y), alpha)) / denom
  
  #finding the current gradient
  tmp = np.matmul(np.transpose(x), alpha)
  gradient = -1 * gamma * np.square(tmp) / denom
  
  return obj_val, list(gradient)

In [13]:
def SparseRegression(x, y, gamma, k):
  p = x.shape[1] #p is our number of total coefficients
  
  #defining our model to optimize with
  model = Model(solver_name=CBC)
  
  #defining the variables for our model
  s = [model.add_var(var_type=BINARY, name='s({})'.format(j)) for j in range(p)] #binary determing if coefficient is 0
  t = model.add_var(var_type=CONTINUOUS, name='t', lb=0) #variable approximate the value of our inner objective function, increases with each cutting plane cut
  
  #defining the objective function
  model.objective = minimize(t)
  
  #adding constraints to our model
  model += xsum(s) <= k #sparsity constraint on our binary variables

  #calculating the taylor series approximation and resulting first cut
  s0 = [1] * k + [0] * (p - k) #initialize starting point
  
  #looking at the current points objective value and gradient with respect to the variables
  obj_val, gradient = regobj(x, y, s0, gamma, k)
  
  #adding our first cut
  model += t >= obj_val + xsum(gradient[i] * (s[i] - s0[i]) for i in range(p))
  
#   print('s_val', s0)
#   print('obj', obj_val)
#   print('gradient', gradient)
  
  #defining a class for the outer approximation of the function which will add lazy constraints
  class outer_approximation(ConstrsGenerator):
    
    #initializing our class
    def __init__(self, s, t, x, y, p, k, gamma):
      #initialing the decision variables
      self.s, self.t = s, t
      
      #initializing the parameters
      self.x, self.y, self.p, self.k, self.gamma = x, y, p, k, gamma

    #adding the lazy constraint
    def generate_constrs(self, model: Model, depth: int = 0, npass: int = 0):
      #getting the current objective value and gradient
      cur_s = model.translate(self.s)
      cur_t = model.translate(self.t)
#       print('intial cur s', cur_s)
      cur_values = [cur_s[i].x for i in range(self.p)]
#       print('indexed cur s', cur_values)
#       print("entering with x", self.x.shape, 'y', self.y.shape, 'k', self.k, 'gamma', self.gamma)
      obj_val, gradient = regobj(self.x, self.y, cur_values, gamma, self.k)
#       print('s_val', cur_values)
#       print('obj', obj_val)
#       print('gradient', gradient)
      t_val = model.translate(self.t)
#       print('t value', t_val.x)
      #adding our lazy constraint
#       print('try to multiple', [cur_values[i] * gradient[i] for i in range(p)])
#       print('all their types')
      offset = 0
      for i in range(len(cur_values)):
        offset += cur_values[i] * gradient[i]
#       print('offset', offset)
#       print('gradient', gradient)
#       print('obj val', obj_val)
#       print('cur_s', cur_s)
      model += cur_t >= obj_val + xsum(gradient[i] * cur_s[i] for i in range(p)) - offset
#       print('model has {} vars, {} constraints and {} nzs'.format(model.num_cols, model.num_rows, model.num_nz))
#       print('added a lazy constraint')
      return 
  
  #adding the lazy constraint generator to our model
#   model.cuts_generator = outer_approximation(s, t, x, y, p, k, gamma)
  model.lazy_constrs_generator = outer_approximation(s, t, x, y, p, k, gamma)
  
  #adding our initial feasible solution
  model.start = [(s[j], 1) for j in range(k)] + [(s[j], 0) for j in range(k,p)]
  
  #solving the model
  print("Starting the model")
  status = model.optimize()
  print(status)
#   print('model has {} vars, {} constraints and {} nzs'.format(model.num_cols, model.num_rows, model.num_nz))
  
  #returning the beta coefficients for this it
  s_opt = []
  for j in range(p):
    s_opt.append(s[j].x)
  
  #deriving the actual beta values of this solution
  indices = [i for i in range(len(s_opt)) if s_opt[i] >= 0.5] #getting the indices of the currently chosen binary variables
  Xs = x[:, indices] #only keeping x features that are chosen from the binary constraints
#   print('xs shape', Xs.shape)
  xt = np.transpose(Xs)
#   print('xt shape', xt.shape)
  ident = np.identity(len(indices)) / gamma
#   print('ident', ident.shape)
  inner_add = ident + np.matmul(xt, Xs)
#   print('inner_add', inner_add.shape)
  x_y = np.matmul(xt, y)
#   print('x y ', x_y.shape)
  inner_division = np.matmul(np.linalg.inv(inner_add), x_y)
#   print('inner_division', inner_division.shape)
  x_thing = np.matmul(Xs, inner_division)
#   print('x_thing', x_thing.shape)
  subtract_inner = y - x_thing
#   print('subtract inner', subtract_inner.shape)
  beta_values = np.matmul(gamma * xt, subtract_inner)
  final_betas = [0] * p
  i = 0
  for indx in indices:
    final_betas[indx] = beta_values[i]
    i += 1
    
  return s_opt, [1 if x > 0.5 else 0 for x in s_opt], final_betas

In [14]:
df = pd.read_csv("Train lpga2008_opt.csv", header=None)
print(df.head(10))

        0      1     2     3      4     5     6
0  6.4673  225.1  78.2  55.0  29.47  1.26  47.9
1  9.4293  242.4  65.5  66.1  28.51  0.76  23.4
2  9.4986  242.4  70.7  67.7  28.64  0.70  48.4
3  9.4760  257.1  67.3  68.2  29.08  0.77  37.3
4  7.6488  238.1  71.7  57.2  27.58  0.95  38.6
5  7.3895  261.1  60.9  62.3  29.34  1.02  33.9
6  9.1652  261.1  68.7  65.3  28.14  1.14  40.2
7  7.4426  252.0  70.7  62.3  29.67  1.16  39.7
8  7.3775  238.2  65.3  61.4  30.04  1.05  36.2
9  7.8969  251.0  67.4  63.0  28.83  0.83  34.5


In [15]:
x_train = df.to_numpy()[:,1:]
y_train = df.loc[:, 0].to_numpy()
print(x_train.shape)
print(y_train.shape)

(78, 6)
(78,)


In [16]:
s, indx, betas = SparseRegression(x_train, y_train, 1, 3)

Starting the model
OptimizationStatus.OPTIMAL


In [17]:
print(s)
print(indx)
print(betas)

[0.0, 0.0, 1.0, 1.0, 0.0, 1.0]
[0, 0, 1, 1, 0, 1]
[0, 0, 0.2374568251083531, -0.2883098152283381, 0, 0.036401490485136634]


In [18]:
print(s)
print(indx)
print(betas)

[0.0, 0.0, 1.0, 1.0, 0.0, 1.0]
[0, 0, 1, 1, 0, 1]
[0, 0, 0.2374568251083531, -0.2883098152283381, 0, 0.036401490485136634]
