In [1]:
# imports
import warnings
warnings.filterwarnings('ignore')
import os
import math
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.dpi'] = 125

# machine learning
from sklearn.model_selection import KFold,cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, log_loss


import xgboost as xgb

# hyper param tuning
import optuna
from optuna.samplers import TPESampler
from optuna.integration import XGBoostPruningCallback

In [2]:
train_df=pd.read_csv('train_clean.csv')
test_df=pd.read_csv('test_clean.csv')
train_df.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Family_size,Is_alone,Survived
0,3,1,1,1,0,0,1,2,0,0
1,1,0,2,1,0,3,0,2,0,1
2,3,0,1,0,0,1,1,1,1,1
3,1,0,2,1,0,3,1,2,0,1
4,3,1,2,0,0,1,1,1,1,0


In [3]:
# split the target in train
y=train_df.pop("Survived").to_numpy()
X=train_df.to_numpy()
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

X shape: (891, 9)
y shape: (891,)


In [4]:
# train test split
X_train,X_test,y_train,y_test=train_test_split(X,y,
                                               test_size=0.25, 
                                               random_state=42,
                                               shuffle=True)

In [5]:
def objective(trial,X_train,y_train,X_test,y_test):
#     pruning_callback = XGBoostPruningCallback(trial, "validation_0-rmse")
    params = {
            "verbosity": 0,  # 0 (silent) - 3 (debug)
            'objective': 'binary:logistic',
            "n_estimators": 10000,
            "max_depth": trial.suggest_int("max_depth", 4, 12),
            "learning_rate": trial.suggest_loguniform("learning_rate", 0.005, 0.05),
            "colsample_bytree": trial.suggest_loguniform("colsample_bytree", 0.2, 0.6),
            "subsample": trial.suggest_loguniform("subsample", 0.4, 0.8),
            "alpha": trial.suggest_loguniform("alpha", 0.01, 10.0),
            "lambda": trial.suggest_loguniform("lambda", 1e-8, 10.0),
            "gamma": trial.suggest_loguniform("lambda", 1e-8, 10.0),
            "min_child_weight": trial.suggest_loguniform("min_child_weight", 10, 1000),
            "seed": 42,
            "n_jobs": -1,
        }

   
    model = xgb.XGBClassifier(params,random_state=42)  
    model.fit(X_train, y_train,eval_set=[(X_test,y_test)], early_stopping_rounds=150,verbose=False)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc

In [6]:
## optuna xgb
func = lambda trial: objective(trial, X_train,y_train,X_test,y_test)
# Pass func to Optuna studies
study = optuna.create_study(study_name='xgb',direction='maximize')
study.optimize(func,n_trials=500,n_jobs=-1)
# best params dict
xgb_best_params = study.best_trial.params

# best sgd model
print("*"*20,"Best Params","*"*20)
for key, value in xgb_best_params.items():
    print(f"{key:>20s} : {value}")
    
print(f"{'best objective value':>20s} : {study.best_value}")

[32m[I 2022-08-16 17:09:27,143][0m A new study created in memory with name: xgb[0m
[33m[W 2022-08-16 17:09:27,263][0m Trial 0 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 6, 'learning_rate': 0.007858708827990647, 'colsample_bytree': 0.5606846392320929, 'subsample': 0.5460275846495002, 'alpha': 2.3529608262620405, 'lambda': 0.00015452807110850452, 'gamma': 0.00015452807110850452, 'min_child_weight': 494.6068380255749, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: 

[33m[W 2022-08-16 17:09:27,284][0m Trial 1 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 6, 'learning_rate': 0.008050131457387966, 'colsample_bytree': 0.34795508126873637, 'subsample': 0.5780206436687471, 'alpha': 0.15449042577626462, 'lambda': 0.0062857428693211224, 'gamma': 0.0062857428693211224, 'min_child_weight': 31.686004712361303, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: reg:pseudohubererror\nObjective candidate: binary:logistic\nObjective candidate: bina

[33m[W 2022-08-16 17:09:27,285][0m Trial 11 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 12, 'learning_rate': 0.007686854666852161, 'colsample_bytree': 0.23540510352813368, 'subsample': 0.5314397021580918, 'alpha': 0.15271385386397124, 'lambda': 0.012381130458544774, 'gamma': 0.012381130458544774, 'min_child_weight': 80.20026570367422, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: reg:pseudohubererror\nObjective candidate: binary:logistic\nObjective candidate: binar

[33m[W 2022-08-16 17:09:27,286][0m Trial 14 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 12, 'learning_rate': 0.020099567384940193, 'colsample_bytree': 0.5575327294411685, 'subsample': 0.6864849738188915, 'alpha': 0.2072563599941802, 'lambda': 1.8876750140574453e-07, 'gamma': 1.8876750140574453e-07, 'min_child_weight': 268.30090898557756, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: reg:pseudohubererror\nObjective candidate: binary:logistic\nObjective candidate: bi

[33m[W 2022-08-16 17:09:27,288][0m Trial 5 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 8, 'learning_rate': 0.006245202726854972, 'colsample_bytree': 0.43289083150905544, 'subsample': 0.649547262373748, 'alpha': 0.148666974465535, 'lambda': 1.953352364970492e-08, 'gamma': 1.953352364970492e-08, 'min_child_weight': 16.54583720649165, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: reg:pseudohubererror\nObjective candidate: binary:logistic\nObjective candidate: binary:l

[33m[W 2022-08-16 17:09:27,291][0m Trial 13 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 4, 'learning_rate': 0.007904759497654668, 'colsample_bytree': 0.3387012114895767, 'subsample': 0.6884719582521531, 'alpha': 0.5131711436616413, 'lambda': 0.03853065683900717, 'gamma': 0.03853065683900717, 'min_child_weight': 10.681051925676718, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: reg:pseudohubererror\nObjective candidate: binary:logistic\nObjective candidate: binary:lo

[33m[W 2022-08-16 17:09:27,294][0m Trial 12 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 10, 'learning_rate': 0.011603459208539169, 'colsample_bytree': 0.33036410750196843, 'subsample': 0.7290196426890265, 'alpha': 4.043778501743034, 'lambda': 4.67022976083663e-06, 'gamma': 4.67022976083663e-06, 'min_child_weight': 11.02860754658192, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: reg:pseudohubererror\nObjective candidate: binary:logistic\nObjective candidate: binary:

[33m[W 2022-08-16 17:09:27,296][0m Trial 4 failed because of the following error: XGBoostError("[17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 7, 'learning_rate': 0.029709239640572035, 'colsample_bytree': 0.261821976987339, 'subsample': 0.6507111726635402, 'alpha': 0.010351953791785122, 'lambda': 0.0003305522096703232, 'gamma': 0.0003305522096703232, 'min_child_weight': 524.721452063603, 'seed': 42, 'n_jobs': -1}`\nObjective candidate: survival:aft\nObjective candidate: binary:hinge\nObjective candidate: multi:softmax\nObjective candidate: multi:softprob\nObjective candidate: rank:pairwise\nObjective candidate: rank:ndcg\nObjective candidate: rank:map\nObjective candidate: reg:squarederror\nObjective candidate: reg:squaredlogerror\nObjective candidate: reg:logistic\nObjective candidate: reg:pseudohubererror\nObjective candidate: binary:logistic\nObjective candidate: binary:

XGBoostError: [17:09:27] ../src/objective/objective.cc:26: Unknown objective function: `{'verbosity': 0, 'objective': 'binary:logistic', 'n_estimators': 10000, 'max_depth': 6, 'learning_rate': 0.007858708827990647, 'colsample_bytree': 0.5606846392320929, 'subsample': 0.5460275846495002, 'alpha': 2.3529608262620405, 'lambda': 0.00015452807110850452, 'gamma': 0.00015452807110850452, 'min_child_weight': 494.6068380255749, 'seed': 42, 'n_jobs': -1}`
Objective candidate: survival:aft
Objective candidate: binary:hinge
Objective candidate: multi:softmax
Objective candidate: multi:softprob
Objective candidate: rank:pairwise
Objective candidate: rank:ndcg
Objective candidate: rank:map
Objective candidate: reg:squarederror
Objective candidate: reg:squaredlogerror
Objective candidate: reg:logistic
Objective candidate: reg:pseudohubererror
Objective candidate: binary:logistic
Objective candidate: binary:logitraw
Objective candidate: reg:linear
Objective candidate: count:poisson
Objective candidate: survival:cox
Objective candidate: reg:gamma
Objective candidate: reg:tweedie

Stack trace:
  [bt] (0) /home/ramch/anaconda3/envs/venv/lib/python3.6/site-packages/xgboost/lib/libxgboost.so(+0x21da6d) [0x7fe64595ba6d]
  [bt] (1) /home/ramch/anaconda3/envs/venv/lib/python3.6/site-packages/xgboost/lib/libxgboost.so(+0x21e0c9) [0x7fe64595c0c9]
  [bt] (2) /home/ramch/anaconda3/envs/venv/lib/python3.6/site-packages/xgboost/lib/libxgboost.so(+0x1b2522) [0x7fe6458f0522]
  [bt] (3) /home/ramch/anaconda3/envs/venv/lib/python3.6/site-packages/xgboost/lib/libxgboost.so(+0x1b9e5d) [0x7fe6458f7e5d]
  [bt] (4) /home/ramch/anaconda3/envs/venv/lib/python3.6/site-packages/xgboost/lib/libxgboost.so(XGBoosterBoostedRounds+0x31) [0x7fe6457d66e1]
  [bt] (5) /home/ramch/anaconda3/envs/venv/lib/python3.6/lib-dynload/../../libffi.so.7(+0x69dd) [0x7fe70e1d49dd]
  [bt] (6) /home/ramch/anaconda3/envs/venv/lib/python3.6/lib-dynload/../../libffi.so.7(+0x6067) [0x7fe70e1d4067]
  [bt] (7) /home/ramch/anaconda3/envs/venv/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so(_ctypes_callproc+0x2ce) [0x7fe70d302ede]
  [bt] (8) /home/ramch/anaconda3/envs/venv/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so(+0x13915) [0x7fe70d303915]

