- No lower boundary for Intercept and the feature with expected lowest coefficient
- Upper bounded Intercept and the feature with expected lowest coefficient
- Bounds for Intercept depend on no_intercept flag also
- Specific order for all coefficients
- Min and max amount by which the coefficients can differ while maintaining the order

# Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from scipy.optimize import lsq_linear

# User inputs

In [2]:
df = pd.read_csv("data/train.csv")

In [3]:
no_intercept = True

In [4]:
target = 'SalePrice'

In [5]:
# Features in the expected ascending order of coefficients
features = ['PoolArea', 'LotArea', 'TotalBsmtSF', 'GarageArea', 'GrLivArea']

In [1]:
# Min and max gaps between successive features
min_gap_amt = [None, 0.1, 1, 10, 2]
max_gap_amt = [None, 2, 100, 50, 20]

# Constraints

In [6]:
# Initialize coefficients
len_coeffs = len(features) + 1
coeffs = list(np.zeros(len_coeffs))
print("Initialized coefficients:", coeffs)

Initialized coefficients: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [14]:
# Put constraints 
min_con = [-np.inf, -np.inf, min_gap_amt[1], min_gap_amt[2], min_gap_amt[3], min_gap_amt[4]]
if no_intercept:
    min_con[0] = 0

max_con = [10, 1, max_gap_amt[1], max_gap_amt[2], max_gap_amt[3], max_gap_amt[4]]
if no_intercept:
    max_con[0] = 0.0001

print("Minimum constraints:", min_con)
print("Maximum constraints:", max_con)

Minimum constraints: [0, -inf, 0.1, 1, 10, 2]
Maximum constraints: [0.0001, 1, 2, 100, 50, 20]


# Model

y = X0 + X1*PoolArea + (X1+X2)*LotArea + (X1+X2+X3)*TotalBsmtSF + (X1+X2+X3+X4)*GarageArea + (X1+X2+X3+X4+X5)*GrLivArea

y = X0 + X1*(PoolArea+LotArea+TotalBsmtSF+GarageArea+GrLivArea) + X2*(LotArea+TotalBsmtSF+GarageArea+GrLivArea) + X3*(TotalBsmtSF+GarageArea+GrLivArea) + X4*(GarageArea+GrLivArea) + X5*GrLivArea

In [15]:
# Feature engineer   
X = df[features].copy()
X['F1'] = X[features[0]] + X[features[1]] + X[features[2]] + X[features[3]] + X[features[4]]
X['F2'] = X[features[1]] + X[features[2]] + X[features[3]] + X[features[4]]
X['F3'] = X[features[2]] + X[features[3]] + X[features[4]]
X['F4'] = X[features[3]] + X[features[4]]
X['F5'] = X[features[4]]
X = X.drop(features, axis=1)

In [16]:
# Convert independent variables to a matrix
X = X.values

# Add an array of ones to act as intercept coefficient
ones = np.ones(X.shape[0])
# Combine array of ones and indepedent variables
X = np.concatenate((ones[:, np.newaxis], X), axis=1)
X

array([[1.0000e+00, 1.1564e+04, 1.1564e+04, 3.1140e+03, 2.2580e+03,
        1.7100e+03],
       [1.0000e+00, 1.2584e+04, 1.2584e+04, 2.9840e+03, 1.7220e+03,
        1.2620e+03],
       [1.0000e+00, 1.4564e+04, 1.4564e+04, 3.3140e+03, 2.3940e+03,
        1.7860e+03],
       ...,
       [1.0000e+00, 1.2786e+04, 1.2786e+04, 3.7440e+03, 2.5920e+03,
        2.3400e+03],
       [1.0000e+00, 1.2113e+04, 1.2113e+04, 2.3960e+03, 1.3180e+03,
        1.0780e+03],
       [1.0000e+00, 1.2725e+04, 1.2725e+04, 2.7880e+03, 1.5320e+03,
        1.2560e+03]])

In [17]:
# Convert target variable to a matrix
y = df[target].values
y

array([208500, 181500, 223500, ..., 266500, 142125, 147500], dtype=int64)

In [18]:
# Run optimization
results = lsq_linear(X, y, bounds=(min_con, max_con), lsmr_tol='auto')
print("Results:\n", results)

Results:
  active_mask: array([-1,  0,  1,  0,  0, -1])
        cost: 1614805624295.8193
         fun: array([-16875.02325198,  -7069.36739765, -19465.01255949, ...,
       -38122.2735626 ,  -2897.35966269,  14266.47002173])
     message: 'The relative change of the cost function is less than `tol`.'
         nit: 13
  optimality: 3.681207470245027
      status: 2
     success: True
           x: array([ 1.16449329e-21, -1.84606139e+00,  2.00000000e+00,  4.56303763e+01,
        1.96332325e+01,  2.00000000e+00])


In [19]:
if results.success:
    # Transform the coefficients back to the context of original features 
    coeffs[0] = results.x[0]
    coeffs[1] = results.x[1]
    coeffs[2] = results.x[1] + results.x[2]
    coeffs[3] = results.x[1] + results.x[2] + results.x[3]
    coeffs[4] = results.x[1] + results.x[2] + results.x[3] + results.x[4]
    coeffs[5] = results.x[1] + results.x[2] + results.x[3] + results.x[4] + results.x[5]
    print("Final Coefficients (including intercept):", coeffs)
else:
    print("Convergence was not achieved!")

Final Coefficients (including intercept): [1.164493293376129e-21, -1.8460613944239106, 0.15393860557557582, 45.78431490396352, 65.41754737515578, 67.41754737516203]
