- All non-negative coefficients
- Unbounded on the higher side
- Bounds for Intercept depend on no_intercept flag also
- Specific order for all coefficients

# Imports

In [14]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from scipy.optimize import lsq_linear

# User inputs

In [15]:
df = pd.read_csv("data/train.csv")

In [16]:
no_intercept = True

In [17]:
target = 'SalePrice'

In [18]:
# Features in the expected ascending order of coefficients
features = ['PoolArea', 'LotArea', 'TotalBsmtSF', 'GarageArea', 'GrLivArea']

# Constraints

In [19]:
# Initialize coefficients
len_coeffs = len(features) + 1
coeffs = list(np.zeros(len_coeffs))
print("Initialized coefficients:", coeffs)

Initialized coefficients: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [20]:
# Put constraints 
min_con = list(np.zeros(len_coeffs))
max_con = [np.inf for i in range(len_coeffs)]
if no_intercept:
    max_con[0] = 0.0001
print("Minimum constraints:", min_con)
print("Maximum constraints:", max_con)

Minimum constraints: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Maximum constraints: [0.0001, inf, inf, inf, inf, inf]


# Model

y = X0 + X1*PoolArea + (X1+X2)*LotArea + (X1+X2+X3)*TotalBsmtSF + (X1+X2+X3+X4)*GrLivArea + (X1+X2+X3+X4+X5)*GarageArea

y = X0 + X1*(PoolArea+LotArea+TotalBsmtSF+GrLivArea+GarageArea) + X2*(LotArea+TotalBsmtSF+GrLivArea+GarageArea) + X3*(TotalBsmtSF+GrLivArea+GarageArea) + X4*(GrLivArea+GarageArea) + X5*GarageArea

In [21]:
# Feature engineer   
X = df[features].copy()
X['F1'] = X[features[0]] + X[features[1]] + X[features[2]] + X[features[3]] + X[features[4]]
X['F2'] = X[features[1]] + X[features[2]] + X[features[3]] + X[features[4]]
X['F3'] = X[features[2]] + X[features[3]] + X[features[4]]
X['F4'] = X[features[3]] + X[features[4]]
X['F5'] = X[features[4]]
X = X.drop(features, axis=1)

In [22]:
# Convert independent variables to a matrix
X = X.values

# Add an array of ones to act as intercept coefficient
ones = np.ones(X.shape[0])
# Combine array of ones and indepedent variables
X = np.concatenate((ones[:, np.newaxis], X), axis=1)
X

array([[1.0000e+00, 1.1564e+04, 1.1564e+04, 3.1140e+03, 2.2580e+03,
        1.7100e+03],
       [1.0000e+00, 1.2584e+04, 1.2584e+04, 2.9840e+03, 1.7220e+03,
        1.2620e+03],
       [1.0000e+00, 1.4564e+04, 1.4564e+04, 3.3140e+03, 2.3940e+03,
        1.7860e+03],
       ...,
       [1.0000e+00, 1.2786e+04, 1.2786e+04, 3.7440e+03, 2.5920e+03,
        2.3400e+03],
       [1.0000e+00, 1.2113e+04, 1.2113e+04, 2.3960e+03, 1.3180e+03,
        1.0780e+03],
       [1.0000e+00, 1.2725e+04, 1.2725e+04, 2.7880e+03, 1.5320e+03,
        1.2560e+03]])

In [23]:
# Convert target variable to a matrix
y = df[target].values
y

array([208500, 181500, 223500, ..., 266500, 142125, 147500], dtype=int64)

In [24]:
# Run optimization
results = lsq_linear(X, y, bounds=(min_con, max_con), lsmr_tol='auto')
print("Results:\n", results)

Results:
  active_mask: array([-1, -1,  0,  0,  0, -1])
        cost: 1612639890094.163
         fun: array([-16816.56184783,  -7032.71990244, -19345.98196467, ...,
       -38838.29933725,  -3106.44329744,  14014.00562336])
     message: 'The relative change of the cost function is less than `tol`.'
         nit: 22
  optimality: 9.179502329467066e-05
      status: 2
     success: True
           x: array([2.43994863e-40, 2.50774692e-19, 1.56157982e-01, 4.54654720e+01,
       2.13897907e+01, 1.10382863e-30])


In [25]:
if results.success:
    # Transform the coefficients back to the context of original features 
    coeffs[0] = results.x[0]
    coeffs[1] = results.x[1]
    coeffs[2] = results.x[1] + results.x[2]
    coeffs[3] = results.x[1] + results.x[2] + results.x[3]
    coeffs[4] = results.x[1] + results.x[2] + results.x[3] + results.x[4]
    coeffs[5] = results.x[1] + results.x[2] + results.x[3] + results.x[4] + results.x[5]
    print("Final Coefficients (including intercept):", coeffs)
else:
    print("Convergence was not achieved!")

Final Coefficients (including intercept): [2.43994863154068e-40, 2.507746919464763e-19, 0.15615798191338764, 45.6216299633703, 67.01142070697755, 67.01142070697755]
