In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree


from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import itertools

from QuadraticConstraintModel import train_COF_on_leaves, get_h_from_COF

from QuadraticConstraintModel import get_feature_bounds_from_COF, predict_from_COF, get_elevated_vertices

from Helping_Code.HelpingFunctions import load_dataset, normalized_root_mean_square_error
from Helping_Code.CustomHyperrectangle import CustomHyperrectangle
from Helping_Code.plot.CustomPlotClass import CustomPlotClass



data_Directory = "Dataset/"

X,y = load_dataset(data_Directory + "SteamGovernor/data_SteamGovernor_10000000.csv",num_attributes = 3, num_classes = 3 )
print(f"Size of the Data Set\n Shape of X = {X.shape} \n Shape of y = {y.shape}")

X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.1)
print(f" Shape of X_Training = {X_train.shape} \n Shape of X_Testing = {X_test.shape}")

# X_train = X
# y_train = y


Size of the Data Set
 Shape of X = (10000000, 3) 
 Shape of y = (10000000, 3)
 Shape of X_Training = (9000000, 3) 
 Shape of X_Testing = (1000000, 3)


In [15]:
from Helping_Code.Optimizers.COFLeafOptimizer import COFLeafOptimizer

In [5]:
from sklearn.preprocessing import PolynomialFeatures

In [6]:
poly = PolynomialFeatures(degree=3, include_bias=False)

In [9]:
X_train_poly = poly.fit_transform(X_train)
X_train_poly[:2]

array([[ 0.63378894, -0.9446858 ,  0.2744053 ,  0.40168842, -0.59873141,
         0.17391504,  0.89243126, -0.25922679,  0.07529827,  0.25458568,
        -0.37946935,  0.11022543,  0.56561306, -0.16429507,  0.04772321,
        -0.84306714,  0.24488787, -0.07113321,  0.02066224],
       [ 0.773327  , -0.7954329 ,  0.32454443,  0.59803465, -0.61512974,
         0.25097897,  0.6327135 , -0.25815332,  0.10532909,  0.46247634,
        -0.47569644,  0.19408881,  0.48929443, -0.19963693,  0.08145383,
        -0.50328113,  0.20534364, -0.08378222,  0.03418397]])

In [10]:
X_test_poly = poly.transform(X_test)
X_test_poly[:2]

array([[-0.11777723, -0.6516137 , -0.12528431,  0.01387148,  0.07674526,
         0.01475564,  0.42460041,  0.08163697,  0.01569616, -0.00163374,
        -0.00903884, -0.00173788, -0.05000826, -0.00961498, -0.00184865,
        -0.27667545, -0.05319577, -0.01022783, -0.00196648],
       [ 0.8685726 ,  0.6736992 , -0.7164793 ,  0.75441836,  0.58515667,
        -0.62231429,  0.45387061, -0.48269153,  0.51334259,  0.65526712,
         0.50825105, -0.54052514,  0.39421958, -0.41925264,  0.44587531,
         0.30577227, -0.3251889 ,  0.34583849, -0.36779934]])

In [11]:
from sklearn.preprocessing import StandardScaler

In [12]:
# --- StandardScaler for X ---
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train_poly)   # fit+transform on training
X_test_scaled = scaler_X.transform(X_test_poly)         # transform only on test

# --- StandardScaler for y ---
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)   # fit+transform on training
y_test_scaled = scaler_y.transform(y_test)         # transform only on test

In [13]:
X_test_scaled[:2]

array([[-0.18579939, -1.02608892, -0.19820678, -1.07962968,  0.19053822,
         0.03676098,  0.05953098,  0.20243792, -1.07426247, -0.00325345,
        -0.02619224, -0.00577821, -0.14577419, -0.03767171, -0.0054104 ,
        -0.54984493, -0.15506178, -0.02958341, -0.00471408],
       [ 1.36723168,  1.06128666, -1.12907696,  0.97306098,  1.45159622,
        -1.54293642,  0.14065529, -1.19684656,  0.30480514,  1.30241973,
         1.47981035, -1.5739783 ,  1.14749816, -1.63783777,  1.29737245,
         0.60813966, -0.94651931,  1.00648779, -0.73170326]])

In [None]:
tree = DecisionTreeRegressor(random_state=42, min_samples_leaf=1000000)
tree.fit(X_train_scaled, y_train_scaled)

In [16]:
cof_model = COFLeafOptimizer(
            tree=tree,
            optimizer="gurobi",
            h_max=2.0,
            random_state=42,
            scaler_X=scaler_X,
            n_jobs=5
        )

In [17]:
cof_model.fit(X_train_scaled, y_train_scaled)

Optimization on Leaf ID = 648
Restricted license - for non-production use only - expires 2026-11-23
Optimization on Leaf ID = 588
Restricted license - for non-production use only - expires 2026-11-23
Optimization on Leaf ID = 217
Restricted license - for non-production use only - expires 2026-11-23
Optimization on Leaf ID = 652
Restricted license - for non-production use only - expires 2026-11-23
Optimization on Leaf ID = 568
Restricted license - for non-production use only - expires 2026-11-23


In [18]:
print("Number of modeled leaves:", len(cof_model.leaf_models))
for lid, leaf in list(cof_model.leaf_models.items()):
    print("H = ", leaf.h)

Number of modeled leaves: 671
H =  3.418398572601158e-06
H =  0.024720795188590383
H =  inf
H =  inf
H =  inf
H =  5.59077589657083e-05
H =  inf
H =  0.00018359850871866104
H =  inf
H =  inf
H =  inf
H =  inf
H =  4.1101195868828255e-05
H =  7.133355950353925e-06
H =  4.946556221732556e-05
H =  inf
H =  inf
H =  4.2350738038259236e-05
H =  inf
H =  2.3012077047434095e-07
H =  4.197914527974021e-05
H =  3.368309142423639e-05
H =  inf
H =  inf
H =  0.0036643289353173007
H =  inf
H =  inf
H =  6.994449661361715e-06
H =  inf
H =  1.803864438583488e-05
H =  5.7803318464064895e-05
H =  inf
H =  inf
H =  2.372460110698543e-06
H =  inf
H =  9.884858348374305e-06
H =  4.784408891167846e-05
H =  0.0025475931227910698
H =  2.3146470326172914e-05
H =  inf
H =  inf
H =  inf
H =  inf
H =  inf
H =  inf
H =  inf
H =  inf
H =  0.0005005842070020337
H =  inf
H =  inf
H =  inf
H =  inf
H =  inf
H =  1.9430873103336437e-06
H =  inf
H =  inf
H =  9.524855004703118e-08
H =  inf
H =  0.000269410454508223
H =