# Test to define how to add decision var with multiple sets Var(x,t) into a dataframe to connect to Machine Learning Model

Gurobi machine learning, how is natural, only accepts dataframe of 2 dimensions to connect to Machine Learning Models. **The columns are the features that the model recibe (always are the features of the ml model) and the rows represent one set of the data**, 

So, when the decision variables has multiple sets there are two aproaches that can be do depending of the nature of the optimization problem:
- Use a ML model transversal across only one of the sets (time for example) to achieve to generate the input dataframe: time, features
- Use a ML model with the input dataframe with multiindex to have a input dataframe with 2 or more sets: (product, time), feartures

## Root folder and read env variables

In [1]:
import os
# fix root path to save outputs
actual_path = os.path.abspath(os.getcwd())
list_root_path = actual_path.split('\\')[:-2]
root_path = '\\'.join(list_root_path)
os.chdir(root_path)
print('root path: ', root_path)

root path:  D:\github-mi-repo\Gurobi-ML-tips-modeling


In [2]:
import os
from dotenv import load_dotenv, find_dotenv # package used in jupyter notebook to read the variables in file .env

""" get env variable from .env """
load_dotenv(find_dotenv())

""" Read env variables and save it as python variable """
PROJECT_GCP = os.environ.get("PROJECT_GCP", "")

## RUN

In [3]:
import pickle
import pandas as pd
import numpy as np

#gurobi
import gurobipy_pandas as gppd
from gurobi_ml import add_predictor_constr
import gurobipy as gp

### 0. Load data
This data will be use to get values to generate a instance of the ml model

In [4]:
name_process = 'process_b'
# load X_test
path_X_test = f'artifacts/data_training/{name_process}/X_test.pkl'
X_test = pd.read_pickle(path_X_test)

# load y_test
path_y_test = f'artifacts/data_training/{name_process}/y_test.pkl'
y_test = pd.read_pickle(path_y_test)

### 1. Load Artifacts to connect ML to gurobi

#### 1.1 pkl model

In [5]:
path_model_to_test = f'artifacts/models/{name_process}/lr.pkl'
model_ml_to_test = pd.read_pickle(path_model_to_test)
model_ml_to_test

### 1.2 Define list of features and target for each model

In [6]:
X_test

Unnamed: 0,Z1,X2,O4,O5
521,125.827767,1.600197,4.554699,0.088144
737,86.789512,0.578117,7.962496,3.903218
740,91.836144,6.023679,0.332634,2.712363
660,85.987786,3.098621,3.222811,0.905589
411,85.749421,5.509697,3.307710,4.956510
...,...,...,...,...
408,110.079863,6.547218,9.923618,8.871421
332,110.343326,3.887887,5.537505,4.707576
208,94.335957,7.043643,3.328544,0.851833
613,118.819866,2.913836,4.735953,0.224784


In [7]:
######################## model  ########################

list_features = ['Z1', 'X2', 'O4', 'O5']

list_features_controlables = ['Z1', 'X2']

list_target = ['Y2']

### 1.3 Read master tag and sort features according its order

In [8]:
# read table master tag
path_list_features_target_to_optimization = f'config/config_ml_models_development/MasterTable_{name_process}.xlsx'
maestro_tags = pd.read_excel(path_list_features_target_to_optimization)

### sort list of features according the order in master table
list_features = [tag for tag in maestro_tags['TAG'].tolist() if tag in list_features]
list_features_controlables = [tag for tag in maestro_tags['TAG'].tolist() if tag in list_features_controlables]

## 2. Create gurobi model

In [9]:
# create model
m = gp.Model('modelo')

Restricted license - for non-production use only - expires 2025-11-24


### 3. Create decision variables
- Decision variables that are features in ml models
- Decicion variable that is the output in ml models

#### 3.1 define multiple sets

In [10]:
# define set time
list_set_time = ['t0', 't1', 't2', 't3', 't4', 't5', 't6']
index_set_time = pd.Index(list_set_time)
index_set_time

Index(['t0', 't1', 't2', 't3', 't4', 't5', 't6'], dtype='object')

In [11]:
# define set additional - second set defined to this example - of example, think the set represent the kind of product
list_set_product = ['w', 'x', 'y', 'z']
index_set_product = pd.Index(list_set_product)
index_set_product

Index(['w', 'x', 'y', 'z'], dtype='object')

In [12]:
############## multi set region&time - index pandas ##############
index_set_product_time = pd.MultiIndex.from_product((list_set_product, list_set_time), 
                                                    names = ('product', 'time')
                                                   )
index_set_product_time

MultiIndex([('w', 't0'),
            ('w', 't1'),
            ('w', 't2'),
            ('w', 't3'),
            ('w', 't4'),
            ('w', 't5'),
            ('w', 't6'),
            ('x', 't0'),
            ('x', 't1'),
            ('x', 't2'),
            ('x', 't3'),
            ('x', 't4'),
            ('x', 't5'),
            ('x', 't6'),
            ('y', 't0'),
            ('y', 't1'),
            ('y', 't2'),
            ('y', 't3'),
            ('y', 't4'),
            ('y', 't5'),
            ('y', 't6'),
            ('z', 't0'),
            ('z', 't1'),
            ('z', 't2'),
            ('z', 't3'),
            ('z', 't4'),
            ('z', 't5'),
            ('z', 't6')],
           names=['product', 'time'])

#### 3.2 create decision variables MULTIPLE INDEX

In [13]:
# create decision variables - features ml model
var_Z1 = gppd.add_vars(m, index_set_product_time, name = "decision variable Z1"
                                     )

var_X2 = gppd.add_vars(m, index_set_product_time, name = "decision variable X2"
                                     )

In [14]:
# crete decision variables - output ml model
var_Y2 = gppd.add_vars(m, index_set_product_time, name = "decision variable Y2"
                                     )

In [15]:
# "compile"
m.update()

In [16]:
# see decision var created
var_Y2

product  time
w        t0      <gurobi.Var decision variable Y2[w,t0]>
         t1      <gurobi.Var decision variable Y2[w,t1]>
         t2      <gurobi.Var decision variable Y2[w,t2]>
         t3      <gurobi.Var decision variable Y2[w,t3]>
         t4      <gurobi.Var decision variable Y2[w,t4]>
         t5      <gurobi.Var decision variable Y2[w,t5]>
         t6      <gurobi.Var decision variable Y2[w,t6]>
x        t0      <gurobi.Var decision variable Y2[x,t0]>
         t1      <gurobi.Var decision variable Y2[x,t1]>
         t2      <gurobi.Var decision variable Y2[x,t2]>
         t3      <gurobi.Var decision variable Y2[x,t3]>
         t4      <gurobi.Var decision variable Y2[x,t4]>
         t5      <gurobi.Var decision variable Y2[x,t5]>
         t6      <gurobi.Var decision variable Y2[x,t6]>
y        t0      <gurobi.Var decision variable Y2[y,t0]>
         t1      <gurobi.Var decision variable Y2[y,t1]>
         t2      <gurobi.Var decision variable Y2[y,t2]>
         t3      

## EXAMPLES ML MODELS WITH DECISION VAR WITH MULTIPLE SETS

### A. Instance Machine learning MULTIPLE INDEX
- Create instance of Machine learning model using decision var of gurobi (decision var in optimization)

- The observed variables has fixed values, so this values doesn't change across the time

In [17]:
######################## generate instance NO controlables features for model ########################

# list feature NC
list_features_no_vc = list(set(list_features) - set(list_features_controlables))

# generate dataframe with input values. In this example is the mean value
df_input_values = np.array(X_test[list_features_no_vc].mean().to_frame().T).tolist()

# generate dataframe instance_no_controlables with the time set - MULTIINDEX
instance_no_controlables = pd.DataFrame(df_input_values, index = index_set_product_time, columns = list_features_no_vc) # TODO: full index dataframe - multiindex
instance_no_controlables

Unnamed: 0_level_0,Unnamed: 1_level_0,O4,O5
product,time,Unnamed: 2_level_1,Unnamed: 3_level_1
w,t0,5.216518,4.53373
w,t1,5.216518,4.53373
w,t2,5.216518,4.53373
w,t3,5.216518,4.53373
w,t4,5.216518,4.53373
w,t5,5.216518,4.53373
w,t6,5.216518,4.53373
x,t0,5.216518,4.53373
x,t1,5.216518,4.53373
x,t2,5.216518,4.53373


In [18]:
######################## genrate instance - features no controlables + decision vars ########################

# create instance with controlables variables. sorted according the list of features. ES MUY IMPORTANTE QUE ESTÉ ORDENADO LAS VARIABLES DE DECUISIÓN DE ACUERDO A LA LISTA DE FEATURES
instance_controlables = pd.DataFrame([var_Z1, var_X2]).T # ADD DECISION VARIABLES
instance_controlables.columns = list_features_controlables # rename columns

# append features controlables with no controlables
instance = pd.concat([instance_no_controlables, instance_controlables], axis = 1)
instance = instance[list_features] # sort features

In [19]:
instance

Unnamed: 0_level_0,Unnamed: 1_level_0,Z1,X2,O4,O5
product,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
w,t0,"<gurobi.Var decision variable Z1[w,t0]>","<gurobi.Var decision variable X2[w,t0]>",5.216518,4.53373
w,t1,"<gurobi.Var decision variable Z1[w,t1]>","<gurobi.Var decision variable X2[w,t1]>",5.216518,4.53373
w,t2,"<gurobi.Var decision variable Z1[w,t2]>","<gurobi.Var decision variable X2[w,t2]>",5.216518,4.53373
w,t3,"<gurobi.Var decision variable Z1[w,t3]>","<gurobi.Var decision variable X2[w,t3]>",5.216518,4.53373
w,t4,"<gurobi.Var decision variable Z1[w,t4]>","<gurobi.Var decision variable X2[w,t4]>",5.216518,4.53373
w,t5,"<gurobi.Var decision variable Z1[w,t5]>","<gurobi.Var decision variable X2[w,t5]>",5.216518,4.53373
w,t6,"<gurobi.Var decision variable Z1[w,t6]>","<gurobi.Var decision variable X2[w,t6]>",5.216518,4.53373
x,t0,"<gurobi.Var decision variable Z1[x,t0]>","<gurobi.Var decision variable X2[x,t0]>",5.216518,4.53373
x,t1,"<gurobi.Var decision variable Z1[x,t1]>","<gurobi.Var decision variable X2[x,t1]>",5.216518,4.53373
x,t2,"<gurobi.Var decision variable Z1[x,t2]>","<gurobi.Var decision variable X2[x,t2]>",5.216518,4.53373


In [20]:
###### load ml constraint ######
pred_constr = add_predictor_constr(gp_model = m, 
                                                    predictor = model_ml_to_test, 
                                                    input_vars = instance, # instance pandas gurobi
                                                    output_vars = var_Y2, # target
                                                    name = f'model_predict'
                                                   )
pred_constr.print_stats()

Model for model_predict:
588 variables
140 constraints
420 quadratic constraints
Input has shape (28, 4)
Output has shape (28, 1)

Pipeline has 3 steps:

--------------------------------------------------------------------------------
Step            Output Shape    Variables              Constraints              
                                                Linear    Quadratic      General
std_scaler           (28, 4)          168          112            0            0

poly_feat           (28, 15)          420            0          420            0

lin_reg              (28, 1)            0           28            0            0

--------------------------------------------------------------------------------


### B. Instance Machine learning ONE INDEX
- Create instance of Machine learning model using decision var of gurobi (decision var in optimization)

- The observed variables has fixed values, so this values doesn't change across the time

- For this example, supose that in the set "product" the element "w" was selected, **so the machine learning model predict for the "product w" across all the "time t"**

In [21]:
######################## generate instance NO controlables features for model ########################

# list feature NC
list_features_no_vc = list(set(list_features) - set(list_features_controlables))

# generate dataframe with input values. In this example is the mean value
#df_input_values = X_test[list_features_no_vc].mean().to_frame().T
df_input_values = np.array(X_test[list_features_no_vc].mean().to_frame().T).tolist()

# generate dataframe instance_no_controlables with the time set - ONLY ONE INDEX
instance_no_controlables = pd.DataFrame(df_input_values, index = index_set_time, columns = list_features_no_vc) # index dataframe - multiindex
instance_no_controlables

Unnamed: 0,O4,O5
t0,5.216518,4.53373
t1,5.216518,4.53373
t2,5.216518,4.53373
t3,5.216518,4.53373
t4,5.216518,4.53373
t5,5.216518,4.53373
t6,5.216518,4.53373


In [22]:
######################## genrate instance - features no controlables + decision vars ########################

# create instance with controlables variables. sorted according the list of features. ES MUY IMPORTANTE QUE ESTÉ ORDENADO LAS VARIABLES DE DECUISIÓN DE ACUERDO A LA LISTA DE FEATURES
instance_controlables = pd.DataFrame([var_Z1['w'], var_X2['w']]).T # TODO: ADD DECISION VARIABLES WITH THE SELECTION OF THE SET
instance_controlables.columns = list_features_controlables # rename columns

# append features controlables with no controlables
instance = pd.concat([instance_no_controlables, instance_controlables], axis = 1)
instance = instance[list_features] # sort features

In [23]:
instance

Unnamed: 0,Z1,X2,O4,O5
t0,"<gurobi.Var decision variable Z1[w,t0]>","<gurobi.Var decision variable X2[w,t0]>",5.216518,4.53373
t1,"<gurobi.Var decision variable Z1[w,t1]>","<gurobi.Var decision variable X2[w,t1]>",5.216518,4.53373
t2,"<gurobi.Var decision variable Z1[w,t2]>","<gurobi.Var decision variable X2[w,t2]>",5.216518,4.53373
t3,"<gurobi.Var decision variable Z1[w,t3]>","<gurobi.Var decision variable X2[w,t3]>",5.216518,4.53373
t4,"<gurobi.Var decision variable Z1[w,t4]>","<gurobi.Var decision variable X2[w,t4]>",5.216518,4.53373
t5,"<gurobi.Var decision variable Z1[w,t5]>","<gurobi.Var decision variable X2[w,t5]>",5.216518,4.53373
t6,"<gurobi.Var decision variable Z1[w,t6]>","<gurobi.Var decision variable X2[w,t6]>",5.216518,4.53373


In [24]:
###### load ml constraint ######
pred_constr = add_predictor_constr(gp_model = m, 
                                                    predictor = model_ml_to_test, 
                                                    input_vars = instance, # instance pandas gurobi
                                                    output_vars = var_Y2['w'], # TODO: ADD DECISION VARIABLES WITH THE SELECTION OF THE SET
                                                    name = f'model_predict'
                                                   )
pred_constr.print_stats()

Model for model_predict0:
147 variables
35 constraints
105 quadratic constraints
Input has shape (7, 4)
Output has shape (7, 1)

Pipeline has 3 steps:

--------------------------------------------------------------------------------
Step            Output Shape    Variables              Constraints              
                                                Linear    Quadratic      General
std_scaler            (7, 4)           42           28            0            0

poly_feat            (7, 15)          105            0          105            0

lin_reg               (7, 1)            0            7            0            0

--------------------------------------------------------------------------------


In [30]:
stop - at this part you can now 2 ways to works with model with mutiple sets in gurobi machine learning

SyntaxError: invalid syntax (1085773698.py, line 1)

### 5. Define objective optimization
Objetive that no generate infeasibility

In [26]:
var_Y2.sum() # sum across time

<gurobi.LinExpr: decision variable Y2[w,t0] + decision variable Y2[w,t1] + decision variable Y2[w,t2] + decision variable Y2[w,t3] + decision variable Y2[w,t4] + decision variable Y2[w,t5] + decision variable Y2[w,t6] + decision variable Y2[x,t0] + decision variable Y2[x,t1] + decision variable Y2[x,t2] + decision variable Y2[x,t3] + decision variable Y2[x,t4] + decision variable Y2[x,t5] + decision variable Y2[x,t6] + decision variable Y2[y,t0] + decision variable Y2[y,t1] + decision variable Y2[y,t2] + decision variable Y2[y,t3] + decision variable Y2[y,t4] + decision variable Y2[y,t5] + decision variable Y2[y,t6] + decision variable Y2[z,t0] + decision variable Y2[z,t1] + decision variable Y2[z,t2] + decision variable Y2[z,t3] + decision variable Y2[z,t4] + decision variable Y2[z,t5] + decision variable Y2[z,t6]>

In [27]:
m.setObjective(var_Y2.sum(),
               gp.GRB.MINIMIZE)

#### 6. Optimize and get optimal values

In [28]:
# solve
m.optimize()

Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (win64 - Windows 10.0 (19043.2))

CPU model: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads



GurobiError: Model too large for size-limited license; visit https://www.gurobi.com/free-trial for a full license

In [29]:
#### know the status of the model - 2 a optimal solution was founded
# docu: https://www.gurobi.com/documentation/current/refman/optimization_status_codes.html#sec:StatusCodes
m.Status

1

In [None]:
# get optimal values and save in a dataframe
######## create a dataframe with set as index
solution = pd.DataFrame(index = index_set_time)

######################## save optimal values - features of models (only the features) ########################

# model
solution["var_Z1"] = var_Z1.gppd.X
solution["var_X2"] = var_X2.gppd.X


######################## save optimal values - targets of models (some targets are features of the model of the next step) ########################
solution["var_Y2"] = var_Y2.gppd.X  # model


######################## # get value objetive function ########################
opt_objetive_function = m.ObjVal

In [None]:
# show value objetive function
opt_objetive_function

In [None]:
# show value decision variables
solution