# Try piecewise linear regression modify class
**The idea is modify class linear regression of sklearn, adding some calculation that not affect the training and predicting process**

-> DO THAT TO CHECK IF IT POSSIGLE USING CUSTOM CLASS IN GUROBI MACHINE LEARNING

### 1. ORIGINAL OPTIMIZATION WIH LINEAR REGRESSION

In [1]:
import pandas as pd
import warnings
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

In [2]:
# !pip install gurobipy
# !pip install gurobipy_pandas
# !pip install gurobi-machinelearning
import gurobipy_pandas as gppd
from gurobi_ml import add_predictor_constr
import gurobipy as gp

### 1.1 data

In [3]:
data_url = "https://raw.githubusercontent.com/Gurobi/modeling-examples/master/price_optimization/"
avocado = pd.read_csv(data_url+"HAB_data_2015to2022.csv")
avocado["date"] = pd.to_datetime(avocado["date"])
avocado = avocado.sort_values(by="date")

regions = [
    "Great_Lakes",
    "Midsouth",
    "Northeast",
    "Northern_New_England",
    "SouthCentral",
    "Southeast",
    "West",
    "Plains"
]
df = avocado[avocado.region.isin(regions)]


X = df[["region", "price", "year", "peak"]]
y = df["units_sold"]
# Split the data for training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=1
)



feat_transform = make_column_transformer(
    (OneHotEncoder(drop="first"), ["region"]),
    (StandardScaler(), ["price", "year"]),
    ("passthrough", ["peak"]),
    verbose_feature_names_out=False,
    remainder='drop'
)

### 1.2 train model

In [4]:
reg = make_pipeline(feat_transform, LinearRegression())
reg.fit(X_train, y_train)

# Get R^2 from test data
y_pred = reg.predict(X_test)
print(f"The R^2 value in the test set is {np.round(r2_score(y_test, y_pred),5)}")

##### ---->
reg.fit(X, y)

y_pred_full = reg.predict(X)
print(f"The R^2 value in the full dataset is {np.round(r2_score(y, y_pred_full),5)}")

The R^2 value in the test set is 0.9083
The R^2 value in the full dataset is 0.90667


### 1.3 optimization

In [5]:
# Sets and parameters
B = 35  # total amount of avocado supply

peak_or_not = 1  # 1 if it is the peak season; 0 if isn't
year = 2022

c_waste = 0.1  # the cost ($) of wasting an avocado

# the cost of transporting an avocado
c_transport = pd.Series(
    {
        "Great_Lakes": 0.3,
        "Midsouth": 0.1,
        "Northeast": 0.4,
        "Northern_New_England": 0.5,
        "SouthCentral": 0.3,
        "Southeast": 0.2,
        "West": 0.2,
        "Plains": 0.2,
    }, name='transport_cost'
)
c_transport = c_transport.loc[regions]

a_min = 0  # minimum avocado price
a_max = 3  # maximum avocado price

# Get the lower and upper bounds from the dataset for the price and the number of products to be stocked
data = pd.concat([c_transport,
                  df.groupby("region")["units_sold"].min().rename('min_delivery'),
                  df.groupby("region")["units_sold"].max().rename('max_delivery')], axis=1)


feats = pd.DataFrame(
    data={
        "year": year,
        "peak": peak_or_not,
        "region": regions,
    },
    index=regions
)


m = gp.Model("Avocado_Price_Allocation")

p = gppd.add_vars(m, data, name="price", lb=a_min, ub=a_max) # price of an avocado for each region
x = gppd.add_vars(m, data, name="x", lb='min_delivery', ub='max_delivery') # number of avocados supplied to each reagion
s = gppd.add_vars(m, data, name="s") # predicted amount of sales in each region for the given price
w = gppd.add_vars(m, data, name="w") # excess wasteage in each region
d = gppd.add_vars(m, data, lb=-gp.GRB.INFINITY, name="demand") # Add variables for the regression

m.addConstr(x.sum() == B)
gppd.add_constrs(m, s, gp.GRB.LESS_EQUAL, x)
gppd.add_constrs(m, s, gp.GRB.LESS_EQUAL, d)
gppd.add_constrs(m, w, gp.GRB.EQUAL, x - s)
m.update()

Restricted license - for non-production use only - expires 2025-11-24


In [6]:
# ----> restricction model
m_feats = pd.concat([feats, p], axis=1)[["region", "price", "year", "peak"]]
pred_constr = add_predictor_constr(m, reg, m_feats, d)
pred_constr.print_stats()

m.setObjective((p * s).sum() - c_waste * w.sum() - (c_transport * x).sum(),
               gp.GRB.MAXIMIZE)

m.Params.NonConvex = 2
m.optimize()

Model for pipe:
88 variables
24 constraints
Input has shape (8, 4)
Output has shape (8, 1)

Pipeline has 2 steps:

--------------------------------------------------------------------------------
Step            Output Shape    Variables              Constraints              
                                                Linear    Quadratic      General
col_trans            (8, 10)           24           16            0            0

lin_reg               (8, 1)           64            8            0            0

--------------------------------------------------------------------------------
Set parameter NonConvex to value 2
Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (win64 - Windows 10.0 (19043.2))

CPU model: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 49 rows, 128 columns and 184 nonzeros
Model fingerprint: 0xab60f9f8
Model has 8 quadratic ob

### 1.4 show solution

In [7]:
### print solution
solution = pd.DataFrame(index=regions)

solution["Price"] = p.gppd.X
solution["Allocated"] = x.gppd.X
solution["Sold"] = s.gppd.X
solution["Wasted"] = w.gppd.X
solution["Pred_demand"] = d.gppd.X

opt_revenue = m.ObjVal
print("\n The optimal net revenue: $%f million" % opt_revenue)
solution.round(4)


 The optimal net revenue: $41.167117 million


Unnamed: 0,Price,Allocated,Sold,Wasted,Pred_demand
Great_Lakes,1.6139,3.5566,3.5566,0.0,3.5566
Midsouth,1.5088,6.1686,3.5454,2.6231,3.5454
Northeast,1.989,4.1629,4.1629,0.0,4.1629
Northern_New_England,1.4412,0.918,0.918,0.0,0.918
SouthCentral,2.0027,4.4135,4.4135,0.0,4.4135
Southeast,1.6964,3.9588,3.9588,0.0,3.9588
West,2.1542,9.0623,4.9677,4.0947,4.9677
Plains,1.1521,2.7593,2.7593,0.0,2.7593


### 1.5 develop codes to change model

In [8]:
pred_constr.remove()
pred_constr = add_predictor_constr(m, reg, m_feats, d)  #### ADD NEW CONSTRAINT WITH UPDATED MODEL
pred_constr.print_stats()
m.update()
m.optimize()

Model for pipe0:
88 variables
24 constraints
Input has shape (8, 4)
Output has shape (8, 1)

Pipeline has 2 steps:

--------------------------------------------------------------------------------
Step            Output Shape    Variables              Constraints              
                                                Linear    Quadratic      General
col_trans            (8, 10)           24           16            0            0

lin_reg               (8, 1)           64            8            0            0

--------------------------------------------------------------------------------
Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (win64 - Windows 10.0 (19043.2))

CPU model: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 49 rows, 128 columns and 184 nonzeros
Model fingerprint: 0xab60f9f8
Model has 8 quadratic objective terms
Coefficient statisti

In [9]:
### print solution
solution = pd.DataFrame(index=regions)

solution["Price"] = p.gppd.X
solution["Allocated"] = x.gppd.X
solution["Sold"] = s.gppd.X
solution["Wasted"] = w.gppd.X
solution["Pred_demand"] = d.gppd.X

opt_revenue = m.ObjVal
print("\n The optimal net revenue: $%f million" % opt_revenue)
solution.round(4)


 The optimal net revenue: $41.167117 million


Unnamed: 0,Price,Allocated,Sold,Wasted,Pred_demand
Great_Lakes,1.6139,3.5566,3.5566,0.0,3.5566
Midsouth,1.5088,6.1686,3.5454,2.6231,3.5454
Northeast,1.989,4.1629,4.1629,0.0,4.1629
Northern_New_England,1.4412,0.918,0.918,0.0,0.918
SouthCentral,2.0027,4.4135,4.4135,0.0,4.4135
Southeast,1.6964,3.9588,3.9588,0.0,3.9588
West,2.1542,9.0623,4.9677,4.0947,4.9677
Plains,1.1521,2.7593,2.7593,0.0,2.7593


### 2. Develop CUSTOM LINEAR REGRESSION USING CLASS REGRESION AS BASE
In this base example, only a new model using the class LinearRegression was develop, without any change in the training code. The first step is test if guroby accept a "model son" created using class "LinearRegression"
Train with the same data.
So, if the codes works, it shoud generate the same results

### 2.1 Train custom linear regression

In [36]:
# # OLD
# class lr_custom_nochange(LinearRegression):
#     """
#     Clase linear regression with any change. Only test if guroby accept this model
#     """
#     def __init__(self):
#         super().__init__()
#         self.__class__ = LinearRegression  # change type of the object to conserve its original type: sklearn.linear_model._base.LinearRegression
            
#     def fit(self, X, y):
#         """
#         Method custom train
#         """
#         super().fit(X, y)
#         return self
    
#     def predict(self, X):
#         """
#         Method custom predict
#         """
#         y_pred = super().predict(X)
#         # one change in the code. No effect the prediction
#         y_pred_change = y_pred + 1
#         return y_pred


class lr_custom_nochange(LinearRegression):
    """
    Clase linear regression with any change. Only test if guroby accept this model
    """
    def __init__(self):
        super().__init__()
        self.__class__ = LinearRegression  # change type of the object to conserve its original type: sklearn.linear_model._base.LinearRegression
            
    def fit(self, X, y):
        """
        Method custom train
        """
        print('debugging - training')
        super().fit(X, y)
        #return self
    
    def predict(self, X):
        """
        Method custom predict
        """
        y_pred = super().predict(X)
        # one change in the code. No effect the prediction
        y_pred_change = y_pred + 1
        return y_pred

In [37]:
### verify that the types of boths models are the same
model_a = lr_custom_nochange()
print('type lr modified: ', type(model_a))

model_b = LinearRegression()
print('type lr original: ', type(model_b))

type lr modified:  <class 'sklearn.linear_model._base.LinearRegression'>
type lr original:  <class 'sklearn.linear_model._base.LinearRegression'>


In [30]:
# train custom lr model in the pipeline
reg_custom_nochange = make_pipeline(feat_transform, 
                                    lr_custom_nochange()  # generate new pipeline with lr updated
                                   ) 
reg_custom_nochange.fit(X, y)


# see the metrics
y_pred_custom_nochange = reg_custom_nochange.predict(X)
print(f"The R^2 value in the full dataset is {np.round(r2_score(y, y_pred_custom_nochange),5)}")

The R^2 value in the full dataset is 0.90667


### 2.2 update gurobi model and CHECK IF WORKS A CUSTOM LR

In [31]:
pred_constr.remove()
pred_constr = add_predictor_constr(m, reg_custom_nochange, m_feats, d)  #### ADD NEW CONSTRAINT WITH MODEL LR MODIFIED
pred_constr.print_stats()
m.update()
m.optimize()

Model for pipe2:
88 variables
24 constraints
Input has shape (8, 4)
Output has shape (8, 1)

Pipeline has 2 steps:

--------------------------------------------------------------------------------
Step            Output Shape    Variables              Constraints              
                                                Linear    Quadratic      General
col_trans            (8, 10)           24           16            0            0

lin_reg               (8, 1)           64            8            0            0

--------------------------------------------------------------------------------
Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (win64 - Windows 10.0 (19043.2))

CPU model: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 49 rows, 128 columns and 184 nonzeros
Model fingerprint: 0xab60f9f8
Model has 8 quadratic objective terms
Coefficient statisti

In [32]:
### print solution
solution = pd.DataFrame(index=regions)

solution["Price"] = p.gppd.X
solution["Allocated"] = x.gppd.X
solution["Sold"] = s.gppd.X
solution["Wasted"] = w.gppd.X
solution["Pred_demand"] = d.gppd.X

opt_revenue = m.ObjVal
print("\n The optimal net revenue: $%f million" % opt_revenue)
solution.round(4)


 The optimal net revenue: $41.167117 million


Unnamed: 0,Price,Allocated,Sold,Wasted,Pred_demand
Great_Lakes,1.6139,3.5566,3.5566,0.0,3.5566
Midsouth,1.5088,6.1686,3.5454,2.6231,3.5454
Northeast,1.989,4.1629,4.1629,0.0,4.1629
Northern_New_England,1.4412,0.918,0.918,0.0,0.918
SouthCentral,2.0027,4.4135,4.4135,0.0,4.4135
Southeast,1.6964,3.9588,3.9588,0.0,3.9588
West,2.1542,9.0623,4.9677,4.0947,4.9677
Plains,1.1521,2.7593,2.7593,0.0,2.7593


# IMPORTANT THE CLASS MODIFIED ISNT SHOWING THE print('debugging - training')
This happens because runing this line of code

self.__class__ = LinearRegression

the class transforms completly into LinearRegression and the methods fit and predict written in the class are forbidden

In [46]:
class lr_custom_nochange_v2(LinearRegression):
    """
    Clase linear regression with any change. Only test if guroby accept this model
    """
    def __init__(self):
        super().__init__()
            
    def fit(self, X, y):
        """
        Method custom train
        """
        print('debugging - training')
        super().fit(X, y)
        #return self
    
    def predict(self, X):
        """
        Method custom predict
        """
        y_pred = super().predict(X)
        # one change in the code. No effect the prediction
        y_pred_change = y_pred + 1
        return y_pred

In [47]:
model_z = lr_custom_nochange_v2()
isinstance(model_z, LinearRegression)
#isinstance(model_z, lr_custom_nochange)

True

In [48]:
# train custom lr model in the pipeline
reg_custom_nochange = make_pipeline(feat_transform, 
                                    lr_custom_nochange_v2()  # generate new pipeline with lr updated
                                   ) 
reg_custom_nochange.fit(X, y)


# see the metrics
y_pred_custom_nochange = reg_custom_nochange.predict(X)
print(f"The R^2 value in the full dataset is {np.round(r2_score(y, y_pred_custom_nochange),5)}")

debugging - training
The R^2 value in the full dataset is 0.90667


In [49]:
# update gurobipy
pred_constr.remove()
pred_constr = add_predictor_constr(m, reg_custom_nochange, m_feats, d)  #### ADD NEW CONSTRAINT WITH MODEL LR MODIFIED
pred_constr.print_stats()
m.update()
m.optimize()

NoModel: Can't do model for Pipeline: I don't know how to deal with that object: lr_custom_nochange_v2()