- All non-negative coefficients
- Bounds for Intercept depend on no_intercept flag also
- Specific order for some coefficients
- Min percentage by which the coefficients (of ordered features) differ

# Imports

In [17]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from scipy.optimize import lsq_linear

# User inputs

In [18]:
df = pd.read_csv("data/wages.csv")

In [19]:
no_intercept = True

In [20]:
target = 'TotalWageCost_Values'

In [21]:
# Features in the expected ascending order of coefficients
features = ['HeadCount_DS_1', 'HeadCount_DS_2', 'HeadCount_DS_3', 'HeadCount_DS_4', 'HeadCount_DS_5']
features

['HeadCount_DS_1',
 'HeadCount_DS_2',
 'HeadCount_DS_3',
 'HeadCount_DS_4',
 'HeadCount_DS_5']

In [22]:
# Min percentage gaps between successive (ordered) features
min_gap_pct = [None, 0.13, 0.51, 0.09, 0.03]

# Constraints

In [23]:
# Initialize coefficients
len_coeffs = len(features) + 1
coeffs = list(np.zeros(len_coeffs))
print("Initialized coefficients:", coeffs)

Initialized coefficients: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [24]:
# Put constraints 
min_con_orig = [0, 56, 64, 108, 97, 111]
max_con_orig = [np.inf, 95, 106, 171, 160, 176]

if no_intercept:
    min_con_orig[0] = 0
    max_con_orig[0] = 0.0001

In [25]:
# Sanitize constraints
for i in range(2, len(features) + 1):
    min_con_orig[i] = max((1 + min_gap_pct[i - 1]) * min_con_orig[i - 1], min_con_orig[i])

i = len(features) - 1
while i:
    max_con_orig[i] = min(max_con_orig[i + 1] / (1 + min_gap_pct[i]), max_con_orig[i])
    i -= 1
    
print("min_con_orig", min_con_orig)
print("max_con_orig", max_con_orig)

for i in range(len_coeffs):
    if max_con_orig[i] < min_con_orig[i]:
        assert(0)

min_con_orig [0, 56, 64, 108, 117.72000000000001, 121.25160000000001]
max_con_orig [0.0001, 86.02765681632074, 97.21125220244244, 146.78899082568807, 160, 176]


In [26]:
focal_coeff_idx = 3

In [27]:
min_con = min_con_orig.copy()
max_con = max_con_orig.copy()

for i in range(focal_coeff_idx + 1, len(features) + 1):
    min_con[i] = max(0, min_con_orig[i] - max_con_orig[i - 1] * (1 + min_gap_pct[i - 1]))
    max_con[i] = max(min_con[i] + 0.001, max_con_orig[i] - min_con_orig[i - 1] * (1 + min_gap_pct[i - 1]))
    
i = focal_coeff_idx - 1
while i:
    min_con[i] = max(0, min_con_orig[i + 1] / (1 + min_gap_pct[i]) - max_con_orig[i])
    max_con[i] = max(min_con[i] + 0.001, max_con_orig[i + 1] / (1 + min_gap_pct[i]) - min_con_orig[i])
    i -= 1

print("min_con", min_con)
print("max_con", max_con)

min_con [0, 0, 0, 108, 0, 0]
max_con [0.0001, 30.027656816320743, 33.21125220244244, 146.78899082568807, 42.27999999999999, 54.74839999999999]


# Model

y = X0 + 
    X3*DS_3 + 
    (X3*1.09+X4)*DS_4 + ((X3*1.09+X4)*1.03+X5)*DS_5 + 
    (X3/1.51-X2)*DS_2 + ((X3/1.51-X2)/1.13-X1)*DS_1

y = X0 + 
    X5*DS_5 + X4*(DS_5*1.03+DS_4) + 
    X1*(-DS_1) + X2*(-DS_1/1.13-DS_2) + 
    X3*(DS_3+DS_4*1.09+DS_5*(1.09*1.03)+DS_2/1.51+DS_1/(1.51/1.13))

In [28]:
# Feature engineer   
X = df[features].copy()

X['F5'] = X[features[4]]
X['F4'] = X[features[3]] + X['F5']*(1+min_gap_pct[4])

X['F1'] = -X[features[0]]
X['F2'] = -X[features[1]] + X['F1']/(1+min_gap_pct[1])

X['F3'] = X[features[2]] + X['F4']*(1+min_gap_pct[3]) - X['F2']/(1+min_gap_pct[2])

X = X.drop(features, axis=1)

In [29]:
# Convert independent variables to a matrix
X = X.values

# Add an array of ones to act as intercept coefficient
ones = np.ones(X.shape[0])
# Combine array of ones and indepedent variables
X = np.concatenate((ones[:, np.newaxis], X), axis=1)
X

array([[  1.        ,   2.        ,   6.06      , -10.        ,
        -16.84955752,  23.76404737],
       [  1.        ,   1.        ,   4.03      ,  -9.        ,
        -14.96460177,  19.3030323 ],
       [  1.        ,   3.        ,   8.09      , -11.        ,
        -18.73451327,  28.22506243],
       [  1.        ,   2.        ,   6.06      , -10.        ,
        -16.84955752,  23.76404737],
       [  1.        ,   4.        ,  10.12      , -12.        ,
        -20.61946903,  32.6860775 ],
       [  1.        ,   3.        ,   8.09      , -11.        ,
        -18.73451327,  28.22506243],
       [  1.        ,   5.        ,  12.15      , -13.        ,
        -22.50442478,  37.14709257],
       [  1.        ,   4.        ,  10.12      , -12.        ,
        -20.61946903,  32.6860775 ],
       [  1.        ,   6.        ,  14.18      , -14.        ,
        -24.38938053,  41.60810764],
       [  1.        ,   5.        ,  12.15      , -13.        ,
        -22.50442478,  37.1

In [30]:
# Convert target variable to a matrix
y = df[target].values
y

array([3107, 2538, 3647, 3107, 4243, 3647, 4828, 4243, 5391, 4828, 5965,
       5391, 6575, 5965, 7108, 6575, 7724, 7108])

In [31]:
# Run optimization
results = lsq_linear(X, y, bounds=(min_con, max_con), lsmr_tol='auto')
print("Results:\n", results)

Results:
  active_mask: array([ 1,  1,  1, -1, -1,  1])
        cost: 137963860.6859192
         fun: array([-2624.64082723, -2289.32076335, -2930.96089111, -2624.64082723,
       -3293.28095499, -2930.96089111, -3644.60101887, -3293.28095499,
       -3973.92108275, -3644.60101887, -4314.24114663, -3973.92108275,
       -4690.56121051, -4314.24114663, -4989.88127439, -4690.56121051,
       -5372.20133827, -4989.88127439])
     message: 'The relative change of the cost function is less than `tol`.'
         nit: 7
  optimality: 2.0540852634913988e-08
      status: 2
     success: True
           x: array([1.00000000e-04, 3.00276568e+01, 3.32112522e+01, 1.08000000e+02,
       2.38299102e-24, 5.47484000e+01])


In [32]:
if results.success:
    # Transform the coefficients back to the context of original features 
    coeffs[0] = results.x[0]
    coeffs[3] = results.x[3]
    
    coeffs[4] = coeffs[3]*(1+min_gap_pct[3]) + results.x[4]
    coeffs[5] = coeffs[4]*(1+min_gap_pct[4]) + results.x[5]
    
    coeffs[2] = coeffs[3]/(1+min_gap_pct[2]) - results.x[2]
    coeffs[1] = coeffs[2]/(1+min_gap_pct[1]) - results.x[1]
    
    for i in range(1, len(features) + 1):
        if coeffs[i] < min_con_orig[i]:
            coeffs[i] = min_con_orig[i]
            if i < len(features) and coeffs[i + 1] < coeffs[i] * (1 + min_gap_pct[i]):
                coeffs[i + 1] = coeffs[i] * (1 + min_gap_pct[i])
        elif coeffs[i] > max_con_orig[i]:
            coeffs[i] = max_con_orig[i]
            if i > 1 and coeffs[i - 1] > coeffs[i] / (1 + min_gap_pct[i - 1]):
                coeffs[i - 1] = coeffs[i] / (1 + min_gap_pct[i - 1])
                
    for i in range(1, len(features) + 1):
        if coeffs[i] < min_con_orig[i] or coeffs[i] > max_con_orig[i] or\
        (i < len(features) and coeffs[i + 1] < coeffs[i] * (1 + min_gap_pct[i])) or\
        (i > 1 and coeffs[i - 1] > coeffs[i] / (1 + min_gap_pct[i - 1])):
            print("Convergence was not achieved!")
    else:
        print("Final Coefficients (including intercept):", coeffs)
else:
    print("Convergence was not achieved!")

Final Coefficients (including intercept): [9.999999999999999e-05, 56, 64, 108.00000000000001, 117.72000000000003, 176.0]
