In [None]:
!pip install bayesian-optimization

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import pickle
import io
import xgboost as xgb
from sklearn.model_selection import train_test_split
import csv
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import KFold
from bayes_opt import BayesianOptimization
import sklearn.metrics as metrics
from google.colab import files
from random import randint

In [None]:
def compute_score(model, input):
    y_true = tf.convert_to_tensor(Target)
    y_score = model.predict(input)*tf.transpose(Target.std()) + Target.mean()

    r2 = round(metrics.r2_score(y_true, y_score), 4)
    mse = round(metrics.mean_squared_error(y_true, y_score),4)
    mae = round(metrics.mean_absolute_error(y_true, y_score),4)
    mape = round(metrics.mean_absolute_percentage_error(y_true, y_score), 4)

    print('r2: ', r2, ', mse: ', mse, ', mae: ', mae, ', mape: ', mape)

    return r2, mse, mae, mape

### XGB Transfer Learning + Monotonic from Global Model

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Indian Data.csv to Indian Data.csv


In [None]:
df = pd.read_csv(io.StringIO(uploaded['Indian Data.csv'].decode('utf-8')),header=0)
Inputs = df.iloc[:,0:6]
Target = df.iloc[:,6:35]
Input_norm = (Inputs - Inputs.mean())/Inputs.std()
Target_norm = (Target - Target.mean())/Target.std()

from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(Input_norm, Target_norm, test_size=0.15)

data = xgb.DMatrix(Input_norm, Target_norm, feature_names=Input_norm.columns)

In [None]:
seed = 1
monotonic_constraints = (1,-1,-1,1,1,0)
def xgb_cv(max_depth, learning_rate, gamma, min_child_weight, subsample, colsample_bytree):
    params = {
        'max_depth': int(max_depth),
        'learning_rate': learning_rate,
        'gamma': gamma,
        'min_child_weight': int(min_child_weight),
        'subsample': subsample,
        'colsample_bytree': colsample_bytree,
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'seed': seed,
        'monotone_constraints': monotonic_constraints
    }
    kf = KFold(n_splits=5, shuffle=True, random_state=seed)
    mse_scores = []
    for train_index, test_index in kf.split(Input_norm):
        X_train, y_train = Input_norm, Target_norm
        dtrain = xgb.DMatrix(X_train, y_train)
        cv_result = xgb.cv(params, dtrain, num_boost_round=100, early_stopping_rounds=5,
                           nfold=5, metrics='rmse', seed=seed, stratified=False, shuffle=True)
        mse_scores.append(cv_result['test-rmse-mean'].iloc[-1])
    return -1.0 * sum(mse_scores) / len(mse_scores)

# Set up Bayesian optimization
pbounds = {
    'max_depth': (2,6),
    'learning_rate': (0.01, 0.3),
    'gamma': (0, 1),
    'min_child_weight': (1, 5),
    'subsample': (0.5, 1),
    'colsample_bytree': (0.5, 1),
}
optimizer1 = BayesianOptimization(f=xgb_cv, pbounds=pbounds, random_state=seed)

# Run Bayesian optimization for 10 iterations
optimizer1.maximize(init_points=5, n_iter=5)

# Get the best hyperparameters
best_params1 = optimizer1.max['params']
best_params1['max_depth'] = int(best_params1['max_depth'])
best_params1['learning_rate'] = best_params1['learning_rate']
best_params1['gamma'] = best_params1['gamma']
best_params1['min_child_weight'] = int(best_params1['min_child_weight'])
best_params1['subsample'] = best_params1['subsample']
best_params1['colsample_bytree'] = best_params1['colsample_bytree']

# Create XGBoost model with the best hyperparameters
xgb_model = xgb.XGBRegressor(monotone_constraints=(1,-1,-1,1,1,0), **best_params1, random_state=seed)

model_with_constraints = xgb_model.fit(Input_norm,Target_norm)
R2, MSE, MAE, MAPE = compute_score(model_with_constraints, Input_norm)

|   iter    |  target   | colsam... |   gamma   | learni... | max_depth | min_ch... | subsample |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m-0.7197  [0m | [0m0.7085   [0m | [0m0.7203   [0m | [0m0.01003  [0m | [0m3.209    [0m | [0m1.587    [0m | [0m0.5462   [0m |
| [95m2        [0m | [95m-0.4989  [0m | [95m0.5931   [0m | [95m0.3456   [0m | [95m0.1251   [0m | [95m4.155    [0m | [95m2.677    [0m | [95m0.8426   [0m |
| [0m3        [0m | [0m-0.6333  [0m | [0m0.6022   [0m | [0m0.8781   [0m | [0m0.01794  [0m | [0m4.682    [0m | [0m2.669    [0m | [0m0.7793   [0m |
| [0m4        [0m | [0m-0.5002  [0m | [0m0.5702   [0m | [0m0.1981   [0m | [0m0.2422   [0m | [0m5.873    [0m | [0m2.254    [0m | [0m0.8462   [0m |
| [0m5        [0m | [0m-0.5595  [0m | [0m0.9382   [0m | [0m0.8946   [0m | [0m0.03466  [0m | [0m2.156    [0m | [0m1.679    [0m | [0m0.

In [None]:
best_params1

{'colsample_bytree': 0.7807144402786754,
 'gamma': 0.37537397610054724,
 'learning_rate': 0.14888013919148696,
 'max_depth': 4,
 'min_child_weight': 1,
 'subsample': 0.822258152747434}