## Testing MGBoost on a synthetically generated multiobjective dataset

Create synthetic multiobjective dataset to evaluate MGBoost performance.

**Note: important to be in ModXGBoost venv when running this notebook.**

**Also note that the core.py file must be modified to load multiobj_data/**

In [2]:
# Imports
import numpy as np
from xgboost import XGBRegressor
from xgboost.sklearn import fobj
from sklearn.metrics import mean_squared_error

In [3]:
# For reproducible results (should change to use RandomState object instead?)
np.random.seed(1)

# Dimensions of generated data
m = 10000   # Training examples
n = 3       # Features
k = 4       # Distinct objectives

In [4]:
# Generate features randomly (gaussian)
X = np.random.normal(loc=0, scale=1, size=(m, n))

# Generate random integer weights
W = np.random.randint(low=-10, high=10, size=(k, n + 1))

# Generate labels using various weights multiplied by feature values
# Include bias term to match linear regression (?)
y = np.matmul(np.hstack((np.ones((m, 1)), X)), W)

In [5]:
# Randomly generated array of weights
W

array([[ -6,  -2,   4,  -3],
       [ -6,  -2,  -1,   7],
       [  4,  -5,   4,  -4],
       [-10,   8,   8,  -6]])

In [6]:
X.shape, y.shape, W.shape

((10000, 3), (10000, 4), (4, 4))

In [None]:
""" ONLY RUN ONCE """
# np.save('X_multiobj.npy', X)
# np.save('y_multiobj.npy', y)
# np.save('W_multiobj.npy', W)

In [7]:
# Fit mgboost model on all labels together
xgbr = XGBRegressor(max_depth=6,
                    n_estimators=100,
                    objective=fobj)

xgbr.fit(X, np.mean(y, axis=1))


Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  1.5903e-05 -1.0000e+00  1e+00  2e-16  3e+00
 1:  1.5900e-05 -1.0033e-02  1e-02  6e-17  3e-02
 2:  1.5583e-05 -1.3157e-04  1e-04  8e-17  4e-04
 3:  9.5074e-06 -2.9641e-06  1e-05  1e-16  2e-05
 4:  8.0092e-06  6.8547e-06  1e-06  1e-16  1e-06
 5:  7.8424e-06  7.7532e-06  9e-08  1e-16  8e-21
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function

Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  9.0567e-06 -1.0000e+00  1e+00  3e-16  3e+00
 1:  9.0536e-06 -1.0039e-02  1e-02  8e-17  3e-02
 2:  8.7578e-06 -1.3774e-04  1e-04  3e-17  4e-04
 3:  3.4370e-06 -8.2270e-06  1e-05  1e-16  2e-05
 4:  2.4366e-06  1.6019e-06  8e-07  1e-16  8e-07
 5:  2.3832e-06  2.3597e-06  2e-08  6e-17  2e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function e

Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.6910e-06 -1.0000e+00  1e+00  6e-17  3e+00
 1:  7.6878e-06 -1.0047e-02  1e-02  1e-16  3e-02
 2:  7.3832e-06 -1.4572e-04  2e-04  1e-16  4e-04
 3:  2.1558e-06 -1.0228e-05  1e-05  1e-16  2e-05
 4:  1.2976e-06  5.4418e-07  8e-07  1e-16  7e-07
 5:  1.2629e-06  1.2460e-06  2e-08  1e-16  5e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.6433e-06

Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.3565e-06 -1.0000e+00  1e+00  2e-16  3e+00
 1:  7.3533e-06 -1.0050e-02  1e-02  8e-17  3e-02
 2:  7.0443e-06 -1.4830e-04  2e-04  1e-16  4e-04
 3:  1.8315e-06 -1.0804e-05  1e-05  2e-16  2e-05
 4:  1.0315e-06  3.1480e-07  7e-07  6e-17  7e-07
 5:  1.0027e-06  9.8796e-07  1e-08  8e-17  6e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function e


Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.2697e-06 -1.0001e+00  1e+00  3e-17  3e+00
 1:  7.2665e-06 -1.0051e-02  1e-02  1e-16  3e-02
 2:  6.9569e-06 -1.4923e-04  2e-04  7e-17  4e-04
 3:  1.7681e-06 -1.0966e-05  1e-05  8e-17  2e-05
 4:  9.9900e-07  3.0258e-07  7e-07  1e-16  7e-07
 5:  9.7290e-07  9.5928e-07  1e-08  8e-17  6e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function

Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.2858e-06 -1.0001e+00  1e+00  1e-16  3e+00
 1:  7.2826e-06 -1.0051e-02  1e-02  8e-17  3e-02
 2:  6.9747e-06 -1.4930e-04  2e-04  1e-16  4e-04
 3:  1.8231e-06 -1.0924e-05  1e-05  1e-16  2e-05
 4:  1.0720e-06  3.8959e-07  7e-07  1e-16  7e-07
 5:  1.0476e-06  1.0346e-06  1e-08  1e-16  6e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function e

Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.4193e-06 -1.0001e+00  1e+00  3e-17  3e+00
 1:  7.4161e-06 -1.0050e-02  1e-02  1e-16  3e-02
 2:  7.1111e-06 -1.4886e-04  2e-04  1e-16  4e-04
 3:  1.9962e-06 -1.0721e-05  1e-05  6e-17  2e-05
 4:  1.2491e-06  5.6888e-07  7e-07  8e-17  7e-07
 5:  1.2249e-06  1.2120e-06  1e-08  3e-17  6e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.4353e-06 -1.0001e+00  1e+00  3e-17  3e+00
 1

Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.5517e-06 -1.0000e+00  1e+00  1e-16  3e+00
 1:  7.5486e-06 -1.0050e-02  1e-02  1e-16  3e-02
 2:  7.2465e-06 -1.4829e-04  2e-04  1e-16  4e-04
 3:  2.1675e-06 -1.0501e-05  1e-05  9e-17  2e-05
 4:  1.4217e-06  7.4293e-07  7e-07  1e-16  7e-07
 5:  1.3976e-06  1.3848e-06  1e-08  1e-16  6e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.5680e-06 -1.0000e+00  1e+00  1e-16  3e+00
 1

Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.7367e-06 -1.0000e+00  1e+00  2e-16  3e+00
 1:  7.7335e-06 -1.0049e-02  1e-02  8e-17  3e-02
 2:  7.4335e-06 -1.4768e-04  2e-04  1e-16  4e-04
 3:  2.3738e-06 -1.0259e-05  1e-05  8e-17  2e-05
 4:  1.6266e-06  9.4920e-07  7e-07  6e-17  7e-07
 5:  1.6026e-06  1.5898e-06  1e-08  1e-16  6e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.

Inside Booster.update(), Neil's code
Custom objective function executing
Custom objective function executing
Custom objective function executing
Custom objective function e

Custom objective function executing
Custom objective function executing
     pcost       dcost       gap    pres   dres
 0:  7.9820e-06 -1.0000e+00  1e+00  0e+00  3e+00
 1:  7.9789e-06 -1.0048e-02  1e-02  7e-17  3e-02
 2:  7.6810e-06 -1.4695e-04  2e-04  1e-16  4e-04
 3:  2.6384e-06 -9.9559e-06  1e-05  8e-17  2e-05
 4:  1.8871e-06  1.2086e-06  7e-07  2e-16  7e-07
 5:  1.8629e-06  1.8501e-06  1e-08  1e-16  6e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.0000e-04 -9.9990e-01  1e+00  0e+00  2e+00
 1:  1.0000e-04 -9.9000e-03  1e-02  2e-16  2e-02
 2:  1.0000e-04 -1.3417e-18  1e-04  9e-17  2e-04
 3:  1.0000e-04  9.9000e-05  1e-06  1e-16  3e-06
 4:  1.0000e-04  9.9990e-05  1e-08  8e-17  3e-08
Optimal solution found.


XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=6, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective=<function fobj at 0x1a1b33cc20>,
             random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)

In [8]:
# Compare mgboost performance vs each label individually
y_pred = xgbr.predict(X)

print('obj\tRMSE\tmean(y - y_pred)')
for i in range(4):
    print('%d\t%.2f\t%.2f' 
          % (i + 1,
             np.sqrt(mean_squared_error(y[:, i], y_pred)), 
             np.mean(y[:, i] - y_pred)))

obj	RMSE	mean(y - y_pred)
1	15.22	-9.04
2	11.12	-5.13
3	9.03	0.93
4	11.97	-6.10


In [9]:
# Understand shape of data to put results in context
print('Statistics on Labels\n\nobj\tmean\tstd\tmin\tmax')
for i in range(4):
    print('%d\t%.2f\t%.2f\t%.2f\t%.2f'
          % (i, np.mean(y[:, i]), np.std(y[:, i]), np.min(y[:, i]), np.max(y[:, i])))

Statistics on Labels

obj	mean	std	min	max
0	-5.95	12.28	-55.77	39.13
1	-2.05	9.64	-39.43	33.81
2	4.01	8.99	-28.60	41.01
3	-3.02	10.15	-40.77	35.15


In [11]:
# Find RMSE if all predictions were the average value of y
# (Not sure how this helps)
print('obj\tRMSE vs mean')
for i in range(4):
    err_vs_mean = np.mean(np.sqrt(mean_squared_error(y[:, i], 
                                                     np.full(shape=(y[:, i].size,), 
                                                             fill_value=np.mean(y[:, i])))))
    print('%d\t%.2f' % (i + 1, err_vs_mean))

obj	RMSE vs mean
1	12.28
2	9.64
3	8.99
4	10.15
