# Imports

In [2]:
import numpy as np
import pandas as pd
from scipy.optimize import lsq_linear

# User inputs

In [3]:
df = pd.read_csv("data/train.csv")

In [4]:
no_intercept = True

In [5]:
target = 'SalePrice'

In [6]:
# Features in the expected ascending order of coefficients
features = ['PoolArea', 'LotArea', 'TotalBsmtSF', 'GrLivArea', 'GarageArea'] 

# UDFs

y = X0 + X1*PoolArea + (X1+X2)*LotArea + (X1+X2+X3)*TotalBsmtSF + (X1+X2+X3+X4)*GrLivArea + (X1+X2+X3+X4+X5)*GarageArea

y = X0 + X1*(PoolArea+LotArea+TotalBsmtSF+GrLivArea+GarageArea) + X2*(LotArea+TotalBsmtSF+GrLivArea+GarageArea) + X3*(TotalBsmtSF+GrLivArea+GarageArea) + X4*(GrLivArea+GarageArea) + X5*GarageArea

In [7]:
def kb_cons_reg(df, features, target, no_intercept=False, verbose=True):
    assert(len(features)==5)
    
    # Initialize coefficients
    len_coeffs = len(features) + 1
    coeffs = list(np.zeros(len_coeffs))
    if verbose:
        print("Initialized coefficients:", coeffs)
        
    # Put constraints    
    min_con = list(np.zeros(len_coeffs))
    max_con = [np.inf + i for i in min_con]
    if no_intercept:
        max_con[0] = 0.0001
    if verbose:
        print("Minimum constraints:", min_con)
        print("Maximum constraints:", max_con)
    
    # Feature engineer
    X = df[features].copy()
    X['F1'] = X[features[0]] + X[features[1]] + X[features[2]] + X[features[3]] + X[features[4]]
    X['F2'] = X[features[1]] + X[features[2]] + X[features[3]] + X[features[4]]
    X['F3'] = X[features[2]] + X[features[3]] + X[features[4]]
    X['F4'] = X[features[3]] + X[features[4]]
    X['F5'] = X[features[4]]
    X = X.drop(features, axis=1)
    
    # Convert independent variables to a matrix
    X = X.values
    
    # Add an array of ones to act as intercept coefficient
    ones = np.ones(X.shape[0])
    # Combine array of ones and indepedent variables
    X = np.concatenate((ones[:, np.newaxis], X), axis=1)
    
    # Convert target variable to a matrix
    y = df[target].values
    
    # Run optimization
    results = lsq_linear(X, y, bounds=(min_con, max_con), lsmr_tol='auto')
    if verbose:
        print("Results:\n", results)
        
    if results.success:
        # Transform the coefficients back to the context of original features 
        coeffs[0] = results.x[0]
        coeffs[1] = results.x[1]
        coeffs[2] = results.x[1] + results.x[2]
        coeffs[3] = results.x[1] + results.x[2] + results.x[3]
        coeffs[4] = results.x[1] + results.x[2] + results.x[3] + results.x[4]
        coeffs[5] = results.x[1] + results.x[2] + results.x[3] + results.x[4] + results.x[5]
        if verbose:
            print("\n\nFinal Coefficients (including intercept):", coeffs)
    else:
        print("Convergence was not achieved!")
    
    return coeffs, results.success

# Model

In [8]:
coeffs, success = kb_cons_reg(df, features, target, no_intercept=no_intercept)

Initialized coefficients: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Minimum constraints: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Maximum constraints: [0.0001, inf, inf, inf, inf, inf]
Results:
  active_mask: array([-1,  0,  0,  0,  0,  0])
        cost: 1591264994307.6667
         fun: array([-15570.88002556,  -6211.14423445, -17020.43929655, ...,
       -51622.04361486,  -6813.20555247,   9554.2133226 ])
     message: 'The relative change of the cost function is less than `tol`.'
         nit: 17
  optimality: 0.758962769347648
      status: 2
     success: True
           x: array([1.02041714e-30, 6.77835405e-09, 1.81752444e-01, 4.25841583e+01,
       1.69575929e+01, 3.63686514e+01])


Final Coefficients (including intercept): [1.0204171358404372e-30, 6.778354049801431e-09, 0.18175245111440205, 42.7659107081641, 59.72350363618138, 96.09215507019732]


In [9]:
if success:
    print(coeffs)

[1.0204171358404372e-30, 6.778354049801431e-09, 0.18175245111440205, 42.7659107081641, 59.72350363618138, 96.09215507019732]
