In [167]:
from collections import Counter

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
from imblearn.combine import SMOTEENN
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.simplefilter(action ="ignore")

In [3]:
df = pd.read_csv("../data/fetal_health.csv")

In [132]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [183]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2)

scaler = StandardScaler()
scaler.fit_transform(X_train)

classes_weights = class_weight.compute_sample_weight(
    class_weight='balanced',
    y=y_train.values
)

model = XGBClassifier(booster='gbtree', colsample_bytree=0.5, gamma=0, max_depth=3,
              reg_lambda=0, subsample=0.5)
model.fit(X_train, y_train, sample_weight=classes_weights)

scaler.fit(X_valid)
print(classification_report(y_valid, model.predict(X_valid)))
print(confusion_matrix(y_valid, model.predict(X_valid)))

              precision    recall  f1-score   support

         1.0       0.99      0.97      0.98       344
         2.0       0.89      0.91      0.90        46
         3.0       0.87      0.94      0.91        36

    accuracy                           0.96       426
   macro avg       0.92      0.94      0.93       426
weighted avg       0.97      0.96      0.97       426

[[335   4   5]
 [  4  42   0]
 [  1   1  34]]


In [164]:
param_grid = {
    "max_depth": [3, 5, 7],
    "learning_rate": [0.3, 0.1],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    "scale_pos_weight": [1, 3, 5],
    "subsample": [0.5, 0.8],
    "colsample_bytree": [0.5, 0.9],
}

model = XGBClassifier()
grid_cv = GridSearchCV(model, param_grid, n_jobs=-1, cv=3, scoring="recall")
grid_cv.fit(X_train, y_train)

Parameters: { "scale_pos_weight" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




GridSearchCV(cv=3,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs...
                                     num_parallel_tree=None, random_state=None,
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, va

In [151]:
grid_cv.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.5, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.3, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=4, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=0, scale_pos_weight=1, subsample=0.5,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [184]:
model.save_model("model.json")