- Some non-negative coefficients (some of them having non-zero positive lower bounds)
- Some coefficients upper-bounded
- Bounds for Intercept depend on no_intercept flag also

# Imports

In [16]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from scipy.optimize import lsq_linear

# User inputs

In [17]:
df = pd.read_csv("data/train.csv")

In [18]:
no_intercept = True

In [19]:
target = 'SalePrice'

In [20]:
# Features
features = ['PoolArea', 'LotArea', 'TotalBsmtSF', 'GrLivArea', 'GarageArea']

# Constraints

In [26]:
# Put constraints 
len_coeffs = len(features) + 1

min_con = [-np.inf + i for i in range(len_coeffs)]
min_con[3] = 10
if no_intercept:
    min_con[0] = 0

max_con = [np.inf for i in range(len_coeffs)]
max_con[0] = 10
max_con[4:] = [50, 90]
if no_intercept:
    max_con[0] = 0.0001
    
print("Minimum constraints:", min_con)
print("Maximum constraints:", max_con)

Minimum constraints: [0, -inf, -inf, 10, -inf, -inf]
Maximum constraints: [0.0001, inf, inf, inf, 50, 90]


# Model

In [27]:
# Convert independent variables to a matrix
X = df[features].copy()
X = X.values

# Add an array of ones to act as intercept coefficient
ones = np.ones(X.shape[0])
# Combine array of ones and indepedent variables
X = np.concatenate((ones[:, np.newaxis], X), axis=1)
X

array([[1.000e+00, 0.000e+00, 8.450e+03, 8.560e+02, 1.710e+03, 5.480e+02],
       [1.000e+00, 0.000e+00, 9.600e+03, 1.262e+03, 1.262e+03, 4.600e+02],
       [1.000e+00, 0.000e+00, 1.125e+04, 9.200e+02, 1.786e+03, 6.080e+02],
       ...,
       [1.000e+00, 0.000e+00, 9.042e+03, 1.152e+03, 2.340e+03, 2.520e+02],
       [1.000e+00, 0.000e+00, 9.717e+03, 1.078e+03, 1.078e+03, 2.400e+02],
       [1.000e+00, 0.000e+00, 9.937e+03, 1.256e+03, 1.256e+03, 2.760e+02]])

In [28]:
# Convert target variable to a matrix
y = df[target].values
y

array([208500, 181500, 223500, ..., 266500, 142125, 147500], dtype=int64)

In [29]:
# Run optimization
results = lsq_linear(X, y, bounds=(min_con, max_con), lsmr_tol='auto')
print("Results:\n", results)

Results:
  active_mask: array([-1,  0,  0,  0,  1,  1])
        cost: 1617641015130.0615
         fun: array([-22304.75969658,  -2229.58460034, -23526.12644965, ...,
       -58471.99669185,  -2240.25451113,  14685.08079603])
     message: 'The relative change of the cost function is less than `tol`.'
         nit: 9
  optimality: 0.00016021728515625
      status: 2
     success: True
           x: array([ 6.82805847e-18, -4.37376510e+01,  3.40137220e-01,  5.66601411e+01,
        5.00000000e+01,  9.00000000e+01])


In [30]:
if results.success:
    print("Final Coefficients (including intercept):", results.x)
else:
    print("Convergence was not achieved!")

Final Coefficients (including intercept): [ 6.82805847e-18 -4.37376510e+01  3.40137220e-01  5.66601411e+01
  5.00000000e+01  9.00000000e+01]
