### `xgboost` XGBoost

In [1]:
!pip install XGBoost



In [22]:
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import GradientBoostingClassifier

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from sklearn.tree import plot_tree
from sklearn.metrics import roc_auc_score,accuracy_score,r2_score
from sklearn.model_selection import train_test_split, GridSearchCV,StratifiedKFold
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_transformer 
from sklearn.compose import make_column_selector
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.naive_bayes import GaussianNB



import warnings
warnings.filterwarnings('ignore')


In [23]:
cancer = pd.read_csv('BreastCancer.csv').set_index('Code')

In [4]:
X = cancer.drop('Class',axis=1)
y = cancer['Class']
le = LabelEncoder()
y = le.fit_transform(y)
dict(zip(list(np.unique(y)),le.classes_))

{0: 'Benign', 1: 'Malignant'}

In [5]:
 X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=24, test_size=0.3,stratify=y)

In [10]:
xgbm = XGBClassifier(random_state=24)
# xgbm.fit(X_train, y_train)
# y_pred = xgbm.predict(X_test)
# print(accuracy_score(y_test,y_pred))

In [11]:
kFold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {'max_depth' : [2,3,4], 'n_estimators':[10,50],'learning_rate':np.linspace(0.001,1,5)}
gcv = GridSearchCV(xgbm,param_grid=params,cv=kFold,scoring='roc_auc',verbose=3)

In [12]:
gcv.fit(X,y)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.961 total time=   0.0s
[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.941 total time=   0.0s
[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.954 total time=   0.0s
[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.952 total time=   0.0s
[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.937 total time=   0.0s
[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.961 total time=   0.0s
[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.943 total time=   0.0s
[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.962 total time=   0.0s
[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.952 total time=   0.0s
[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.9

In [13]:
print(gcv.best_params_)
print(gcv.best_score_)

{'learning_rate': 0.25075, 'max_depth': 4, 'n_estimators': 10}
0.9920563686535292


### `lightgbm` Boosting

In [None]:
!pip install Lightgbm

In [18]:
lgbm= LGBMClassifier(random_state=24)
kFold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {'max_depth' : [2,3,4], 'n_estimators':[10,50],'learning_rate':np.linspace(0.001,1,5)}
gcv = GridSearchCV(lgbm,param_grid=params,cv=kFold,scoring='roc_auc',verbose=3)

In [19]:
gcv.fit(X,y)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[LightGBM] [Info] Number of positive: 193, number of negative: 366
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 96
[LightGBM] [Info] Number of data points in the train set: 559, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.345259 -> initscore=-0.639943
[LightGBM] [Info] Start training from score -0.639943
[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.970 total time=   0.0s
[LightGBM] [Info] Number of positive: 193, number of negative: 366
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 96
[LightGBM] [Info] Number of data points in the train set: 559, number of used features: 9
[Light

In [20]:
print(gcv.best_params_)
print(gcv.best_score_)

{'learning_rate': 1.0, 'max_depth': 3, 'n_estimators': 50}
0.9916129770111581


### `catboost` Boosting

In [21]:
!pip install CatBoost

Collecting CatBoost
  Downloading catboost-1.2.7-cp312-cp312-win_amd64.whl.metadata (1.2 kB)
Collecting graphviz (from CatBoost)
  Downloading graphviz-0.20.3-py3-none-any.whl.metadata (12 kB)
Downloading catboost-1.2.7-cp312-cp312-win_amd64.whl (101.7 MB)
   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.1/101.7 MB 1.3 MB/s eta 0:01:18
   ---------------------------------------- 0.3/101.7 MB 2.6 MB/s eta 0:00:39
   ---------------------------------------- 0.8/101.7 MB 4.6 MB/s eta 0:00:22
    --------------------------------------- 1.4/101.7 MB 6.2 MB/s eta 0:00:17
   - -------------------------------------- 2.6/101.7 MB 9.8 MB/s eta 0:00:11
   - -------------------------------------- 4.3/101.7 MB 13.6 MB/s eta 0:00:08
   -- ------------------------------------- 6.9/101.7 MB 19.2 MB/s eta 0:00:05
   ---- ----------------------------------- 10.9/101.

In [24]:
cgbm= CatBoostClassifier(random_state=24)
kFold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {'max_depth' : [2,3,4], 'n_estimators':[10,50],'learning_rate':np.linspace(0.001,1,5)}
gcv = GridSearchCV(cgbm,param_grid=params,cv=kFold,scoring='roc_auc',verbose=3)

In [25]:
gcv.fit(X,y)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
0:	learn: 0.6924045	total: 152ms	remaining: 1.37s
1:	learn: 0.6916960	total: 153ms	remaining: 612ms
2:	learn: 0.6909517	total: 154ms	remaining: 360ms
3:	learn: 0.6902190	total: 155ms	remaining: 233ms
4:	learn: 0.6894729	total: 156ms	remaining: 156ms
5:	learn: 0.6887781	total: 157ms	remaining: 105ms
6:	learn: 0.6880340	total: 158ms	remaining: 67.9ms
7:	learn: 0.6872802	total: 160ms	remaining: 39.9ms
8:	learn: 0.6865487	total: 161ms	remaining: 17.8ms
9:	learn: 0.6858238	total: 162ms	remaining: 0us
[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.995 total time=   0.1s
0:	learn: 0.6924105	total: 669us	remaining: 6.02ms
1:	learn: 0.6916905	total: 1.18ms	remaining: 4.71ms
2:	learn: 0.6909278	total: 1.96ms	remaining: 4.57ms
3:	learn: 0.6901781	total: 2.84ms	remaining: 4.26ms
4:	learn: 0.6894156	total: 3.52ms	remaining: 3.52ms
5:	learn: 0.6887138	total: 4.07ms	remaining: 2.72ms
6:	learn: 0.6879476	total: 4.6

In [26]:
print(gcv.best_params_)
print(gcv.best_score_)

{'learning_rate': 1.0, 'max_depth': 2, 'n_estimators': 10}
0.9926941034416632
