In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import xgboost as xgb

from typing import Tuple

from utils import methods, math_expressions as me

In [2]:
train_df = pd.read_csv('../../data/gen_train_v3.csv')
test_df = pd.read_csv('../../data/gen_test_v3.csv')

In [3]:
train_df.head()

h = 1 / 15
c = 25

In [4]:
X_train = train_df[['N', 'n', 'mean_n', 'std_n', 'alpha_hat', 'beta_hat', 'u_star_hat']]
y_train = train_df['u']

X_test = test_df[['N', 'n', 'mean_n', 'std_n', 'alpha_hat', 'beta_hat', 'u_star_hat']]
y_test = test_df['u']

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [5]:
from sympy import diff, symbols, lambdify
d = symbols('d')
expr = me.cus_cost_expr_1(h, c, d)
expr_grad = diff(expr, d)
expr_hess = diff(expr_grad, d)

display(expr)
display(expr_grad)
display(expr_hess)


if True:
    f = lambdify(d, expr)
    f_grad = lambdify(d, expr_grad)
    f_hess = lambdify(d, expr_hess)

else:
    f = lambda x: expr.subs(d, x).evalf(5)
    f_grad = lambda x: expr_grad.subs(d, x).evalf(5)
    f_hess = lambda x: expr_hess.subs(d, x).evalf(5)

0


Piecewise((0.0666666666666667*(-d)**1.25, d <= 0), (0.0666666666666667*d**4, True))

Piecewise((0.0833333333333333*(-d)**1.25/d, d <= 0), (0.266666666666667*d**3, True))

Piecewise((0.0208333333333333*(-d)**1.25/d**2, d <= 0), (0.8*d**2, True))

In [6]:
def custom_cost(pred: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
    y = dtrain.get_label()
    diff = pred - y
    grad = np.array([f_grad(x) for x in diff])
    hess = np.array([f_hess(x) for x in diff])
    return grad, hess

def custom_metric(pred: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
    y = dtrain.get_label()
    diff = pred - y
    error = np.array([f(x) for x in diff])
    return 'CusCost', float(np.sum(error) / len(y))


In [7]:
results = []
model = xgb.train({'tree_method': 'hist', 'seed': 1994, 'eta': 0.5, 'disable_default_eval_metric': 1, 'nthread':7},  # any other tree method is fine.
           dtrain=dtrain,
           num_boost_round=1000,
           obj=custom_cost,
           custom_metric=custom_metric,
           early_stopping_rounds=100,
           evals=[(dtrain, 'train'), (dtest, 'validation')])


  return select([less_equal(d, 0),True], [0.0666666666666667*(-d)**1.25,0.0666666666666667*d**4], default=nan)


[0]	train-CusCost:374928094.20311	validation-CusCost:434651301.98683


  return select([less_equal(d, 0),True], [0.0833333333333333*(-d)**1.25/d,0.266666666666667*d**3], default=nan)
  return select([less_equal(d, 0),True], [0.0208333333333333*(-d)**1.25/d**2,0.8*d**2], default=nan)


[1]	train-CusCost:185299353.03042	validation-CusCost:212488070.37722
[2]	train-CusCost:92202486.77312	validation-CusCost:104468571.73460
[3]	train-CusCost:46241076.09110	validation-CusCost:51744658.83559
[4]	train-CusCost:23395976.80782	validation-CusCost:25846724.17483
[5]	train-CusCost:11945281.99920	validation-CusCost:12993444.46132
[6]	train-CusCost:6158254.70495	validation-CusCost:6609590.15413
[7]	train-CusCost:3203646.36257	validation-CusCost:3390387.68864
[8]	train-CusCost:1680233.25280	validation-CusCost:1753804.76622
[9]	train-CusCost:887444.33820	validation-CusCost:915452.39497
[10]	train-CusCost:472078.83047	validation-CusCost:485622.81817
[11]	train-CusCost:252800.69737	validation-CusCost:261295.28236
[12]	train-CusCost:136202.86985	validation-CusCost:143382.37540
[13]	train-CusCost:73687.58748	validation-CusCost:80512.07273
[14]	train-CusCost:40040.76954	validation-CusCost:46338.57440
[15]	train-CusCost:21803.17883	validation-CusCost:27699.69994
[16]	train-CusCost:11863.3

In [8]:
test_df['predicted_u_star'] = model.predict(dtest)
test_df['actual_cost'] = test_df.apply(lambda row: methods.cal_cost(row['c'], row['h'], row['u'], row['predicted_u_star']), axis=1)
print(f'Actual Mean cost: {test_df['actual_cost'].mean():.2f}, Actual Median cost: {test_df['actual_cost'].median():.2f}')
print(f'Optimal Mean cost: {test_df['optimal_cost'].mean():.2f}, Optimal Median cost: {test_df['optimal_cost'].median():.2f}')

Actual Mean cost: 9.82, Actual Median cost: 8.08
Optimal Mean cost: 4.36, Optimal Median cost: 3.17


In [9]:
display(model.get_score(importance_type='gain'))

# Get the best score and iteration
best_score = model.best_score
best_iteration = model.best_iteration

# Display the best score and the corresponding iteration
print(f"Best score: {best_score}")
print(f"Best iteration: {best_iteration}")

{'N': 1768524.375,
 'mean_n': 2959855.5,
 'std_n': 2356887.5,
 'alpha_hat': 11132263.0,
 'beta_hat': 3524461.5,
 'u_star_hat': 1260329984.0}

Best score: 2204.714582
Best iteration: 33


In [10]:
test_df.head(50)

Unnamed: 0,alpha,beta,h,c,N,n,mean_n,std_n,alpha_hat,beta_hat,intervals_str,u,u_star,u_star_hat,z,optimal_cost,actual_cost,predicted_u_star
0,5,2.5,0.066667,25,29,5,12.932681,4.299833,9.046363,1.4296,9.873611453932869_12.181558790848907_20.439509...,373.970646,308.401152,330.455982,0.933259,4.3713,10.553606,215.66655
1,7,1.5,0.066667,25,16,5,9.336215,3.195497,8.536206,1.09372,8.266907462181138_9.999238032738244_14.2029088...,161.389327,135.114076,121.730806,1.109942,1.751683,5.106355,84.793999
2,7,1.0,0.066667,25,21,5,9.326094,4.260114,4.792446,1.945999,9.216064224518064_8.939377602897661_6.74126765...,143.176863,120.497418,157.110477,0.76696,1.511963,1.175022,125.551529
3,3,2.0,0.066667,25,16,5,6.171769,2.741844,5.066802,1.21808,4.268162630400795_5.646163787763241_10.9732434...,108.552055,67.076424,74.632356,0.898758,2.765042,4.053335,47.752026
4,3,1.0,0.066667,25,30,5,3.408672,3.727784,0.836121,4.07677,2.7139503189213166_1.8650227067252927_9.985086...,82.809339,68.600863,63.23327,1.084886,0.947232,1.069021,66.774025
5,3,1.5,0.066667,25,20,5,3.769411,2.560751,2.166767,1.739648,3.11735382807551_1.9783134491147916_0.91413133...,87.63787,64.876472,50.806139,1.276942,1.517427,1.906561,59.039448
6,3,2.5,0.066667,25,15,5,9.931571,2.022021,24.124854,0.411674,10.139845582668807_13.268196488069144_8.596625...,106.97037,78.709432,130.343916,0.60386,1.884062,25.0,128.066864
7,3,3.0,0.066667,25,36,5,7.128026,5.330372,1.788231,3.986078,3.956626898229583_1.8282626746323363_13.739765...,334.042238,268.574966,200.318648,1.340739,4.364485,12.258198,150.169266
8,5,3.0,0.066667,25,16,5,18.323201,6.923162,7.004767,2.615819,21.25558763838263_19.034362899181332_8.6877522...,233.670311,190.557184,242.398413,0.786132,2.874208,3.747201,177.462296
9,3,1.5,0.066667,25,15,5,5.778133,2.196956,6.917228,0.835325,8.134566061803032_6.967224733616182_2.94335748...,53.291864,45.443092,67.092193,0.677323,0.523251,0.719642,42.497227
