# Try piecewise linear regression modify class
**The idea is modify class linear regression of sklearn, adding some calculation that not affect the training and predicting process**

-> DO THAT TO CHECK IF IT POSSIGLE USING CUSTOM CLASS IN GUROBI MACHINE LEARNING

### 1. ORIGINAL OPTIMIZATION WIH LINEAR REGRESSION

In [None]:
import pandas as pd
import warnings
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

In [None]:
# !pip install gurobipy
# !pip install gurobipy_pandas
# !pip install gurobi-machinelearning
import gurobipy_pandas as gppd
from gurobi_ml import add_predictor_constr
import gurobipy as gp

### 1.1 data

In [None]:
data_url = "https://raw.githubusercontent.com/Gurobi/modeling-examples/master/price_optimization/"
avocado = pd.read_csv(data_url+"HAB_data_2015to2022.csv")
avocado["date"] = pd.to_datetime(avocado["date"])
avocado = avocado.sort_values(by="date")

regions = [
    "Great_Lakes",
    "Midsouth",
    "Northeast",
    "Northern_New_England",
    "SouthCentral",
    "Southeast",
    "West",
    "Plains"
]
df = avocado[avocado.region.isin(regions)]


X = df[["region", "price", "year", "peak"]]
y = df["units_sold"]
# Split the data for training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=1
)



feat_transform = make_column_transformer(
    (OneHotEncoder(drop="first"), ["region"]),
    (StandardScaler(), ["price", "year"]),
    ("passthrough", ["peak"]),
    verbose_feature_names_out=False,
    remainder='drop'
)

### 1.2 train model

In [None]:
reg = make_pipeline(feat_transform, LinearRegression())
reg.fit(X_train, y_train)

# Get R^2 from test data
y_pred = reg.predict(X_test)
print(f"The R^2 value in the test set is {np.round(r2_score(y_test, y_pred),5)}")

##### ---->
reg.fit(X, y)

y_pred_full = reg.predict(X)
print(f"The R^2 value in the full dataset is {np.round(r2_score(y, y_pred_full),5)}")

### 1.3 optimization

In [None]:
# Sets and parameters
B = 35  # total amount of avocado supply

peak_or_not = 1  # 1 if it is the peak season; 0 if isn't
year = 2022

c_waste = 0.1  # the cost ($) of wasting an avocado

# the cost of transporting an avocado
c_transport = pd.Series(
    {
        "Great_Lakes": 0.3,
        "Midsouth": 0.1,
        "Northeast": 0.4,
        "Northern_New_England": 0.5,
        "SouthCentral": 0.3,
        "Southeast": 0.2,
        "West": 0.2,
        "Plains": 0.2,
    }, name='transport_cost'
)
c_transport = c_transport.loc[regions]

a_min = 0  # minimum avocado price
a_max = 3  # maximum avocado price

# Get the lower and upper bounds from the dataset for the price and the number of products to be stocked
data = pd.concat([c_transport,
                  df.groupby("region")["units_sold"].min().rename('min_delivery'),
                  df.groupby("region")["units_sold"].max().rename('max_delivery')], axis=1)


feats = pd.DataFrame(
    data={
        "year": year,
        "peak": peak_or_not,
        "region": regions,
    },
    index=regions
)


m = gp.Model("Avocado_Price_Allocation")

p = gppd.add_vars(m, data, name="price", lb=a_min, ub=a_max) # price of an avocado for each region
x = gppd.add_vars(m, data, name="x", lb='min_delivery', ub='max_delivery') # number of avocados supplied to each reagion
s = gppd.add_vars(m, data, name="s") # predicted amount of sales in each region for the given price
w = gppd.add_vars(m, data, name="w") # excess wasteage in each region
d = gppd.add_vars(m, data, lb=-gp.GRB.INFINITY, name="demand") # Add variables for the regression

m.addConstr(x.sum() == B)
gppd.add_constrs(m, s, gp.GRB.LESS_EQUAL, x)
gppd.add_constrs(m, s, gp.GRB.LESS_EQUAL, d)
gppd.add_constrs(m, w, gp.GRB.EQUAL, x - s)
m.update()

In [None]:
# ----> restricction model
m_feats = pd.concat([feats, p], axis=1)[["region", "price", "year", "peak"]]
pred_constr = add_predictor_constr(m, reg, m_feats, d)
pred_constr.print_stats()

m.setObjective((p * s).sum() - c_waste * w.sum() - (c_transport * x).sum(),
               gp.GRB.MAXIMIZE)

m.Params.NonConvex = 2
m.optimize()

### 1.4 show solution

In [None]:
### print solution
solution = pd.DataFrame(index=regions)

solution["Price"] = p.gppd.X
solution["Allocated"] = x.gppd.X
solution["Sold"] = s.gppd.X
solution["Wasted"] = w.gppd.X
solution["Pred_demand"] = d.gppd.X

opt_revenue = m.ObjVal
print("\n The optimal net revenue: $%f million" % opt_revenue)
solution.round(4)

### 1.5 develop codes to change model

In [None]:
pred_constr.remove()
pred_constr = add_predictor_constr(m, reg, m_feats, d)  #### ADD NEW CONSTRAINT WITH UPDATED MODEL
pred_constr.print_stats()
m.update()
m.optimize()

In [None]:
### print solution
solution = pd.DataFrame(index=regions)

solution["Price"] = p.gppd.X
solution["Allocated"] = x.gppd.X
solution["Sold"] = s.gppd.X
solution["Wasted"] = w.gppd.X
solution["Pred_demand"] = d.gppd.X

opt_revenue = m.ObjVal
print("\n The optimal net revenue: $%f million" % opt_revenue)
solution.round(4)

### 2. Develop CUSTOM LINEAR REGRESSION USING CLASS REGRESION AS BASE
In this base example, only a new model using the class LinearRegression was develop, without any change in the training code. The first step is test if guroby accept a "model son" created using class "LinearRegression"
Train with the same data.
So, if the codes works, it shoud generate the same results

### 2.1 Train custom linear regression

In [None]:
# # OLD
# class lr_custom_nochange(LinearRegression):
#     """
#     Clase linear regression with any change. Only test if guroby accept this model
#     """
#     def __init__(self):
#         super().__init__()
#         self.__class__ = LinearRegression  # change type of the object to conserve its original type: sklearn.linear_model._base.LinearRegression
            
#     def fit(self, X, y):
#         """
#         Method custom train
#         """
#         super().fit(X, y)
#         return self
    
#     def predict(self, X):
#         """
#         Method custom predict
#         """
#         y_pred = super().predict(X)
#         # one change in the code. No effect the prediction
#         y_pred_change = y_pred + 1
#         return y_pred


class lr_custom_nochange(LinearRegression):
    """
    Clase linear regression with any change. Only test if guroby accept this model
    """
    def __init__(self):
        super().__init__()
        self.__class__ = LinearRegression  # change type of the object to conserve its original type: sklearn.linear_model._base.LinearRegression
            
    def fit(self, X, y):
        """
        Method custom train
        """
        print('debugging - training')
        super().fit(X, y)
        #return self
    
    def predict(self, X):
        """
        Method custom predict
        """
        y_pred = super().predict(X)
        # one change in the code. No effect the prediction
        y_pred_change = y_pred + 1
        return y_pred

In [None]:
### verify that the types of boths models are the same
model_a = lr_custom_nochange()
print('type lr modified: ', type(model_a))

model_b = LinearRegression()
print('type lr original: ', type(model_b))

In [None]:
# train custom lr model in the pipeline
reg_custom_nochange = make_pipeline(feat_transform, 
                                    lr_custom_nochange()  # generate new pipeline with lr updated
                                   ) 
reg_custom_nochange.fit(X, y)


# see the metrics
y_pred_custom_nochange = reg_custom_nochange.predict(X)
print(f"The R^2 value in the full dataset is {np.round(r2_score(y, y_pred_custom_nochange),5)}")

### 2.2 update gurobi model and CHECK IF WORKS A CUSTOM LR

In [None]:
pred_constr.remove()
pred_constr = add_predictor_constr(m, reg_custom_nochange, m_feats, d)  #### ADD NEW CONSTRAINT WITH MODEL LR MODIFIED
pred_constr.print_stats()
m.update()
m.optimize()

In [None]:
### print solution
solution = pd.DataFrame(index=regions)

solution["Price"] = p.gppd.X
solution["Allocated"] = x.gppd.X
solution["Sold"] = s.gppd.X
solution["Wasted"] = w.gppd.X
solution["Pred_demand"] = d.gppd.X

opt_revenue = m.ObjVal
print("\n The optimal net revenue: $%f million" % opt_revenue)
solution.round(4)

# IMPORTANT THE CLASS MODIFIED ISNT SHOWING THE print('debugging - training')
This happens because runing this line of code

self.__class__ = LinearRegression

the class transforms completly into LinearRegression and the methods fit and predict written in the class are forbidden

In [None]:
class lr_custom_nochange_v2(LinearRegression):
    """
    Clase linear regression with any change. Only test if guroby accept this model
    """
    def __init__(self):
        super().__init__()
            
    def fit(self, X, y):
        """
        Method custom train
        """
        print('debugging - training')
        super().fit(X, y)
        #return self
    
    def predict(self, X):
        """
        Method custom predict
        """
        y_pred = super().predict(X)
        # one change in the code. No effect the prediction
        y_pred_change = y_pred + 1
        return y_pred

In [None]:
model_z = lr_custom_nochange_v2()
isinstance(model_z, LinearRegression)
#isinstance(model_z, lr_custom_nochange)

In [None]:
# train custom lr model in the pipeline
reg_custom_nochange = make_pipeline(feat_transform, 
                                    lr_custom_nochange_v2()  # generate new pipeline with lr updated
                                   ) 
reg_custom_nochange.fit(X, y)


# see the metrics
y_pred_custom_nochange = reg_custom_nochange.predict(X)
print(f"The R^2 value in the full dataset is {np.round(r2_score(y, y_pred_custom_nochange),5)}")

In [None]:
# update gurobipy
pred_constr.remove()
pred_constr = add_predictor_constr(m, reg_custom_nochange, m_feats, d)  #### ADD NEW CONSTRAINT WITH MODEL LR MODIFIED
pred_constr.print_stats()
m.update()
m.optimize()