In [1]:
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from sklearn.tree import plot_tree
from sklearn.metrics import roc_auc_score,accuracy_score,r2_score
from sklearn.model_selection import train_test_split, GridSearchCV,StratifiedKFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer 
from sklearn.compose import make_column_selector
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.naive_bayes import GaussianNB



import warnings
warnings.filterwarnings('ignore')


In [2]:
cancer = pd.read_csv('BreastCancer.csv').set_index('Code')

In [3]:
X = cancer.drop('Class',axis=1)
y = cancer['Class']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=24, test_size=0.3,stratify=y)

In [5]:
#dtc = DecisionTreeClassifier(random_state=24)

In [6]:
gbm = GradientBoostingClassifier(random_state=24)

In [7]:
gbm.get_params()

{'ccp_alpha': 0.0,
 'criterion': 'friedman_mse',
 'init': None,
 'learning_rate': 0.1,
 'loss': 'log_loss',
 'max_depth': 3,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_iter_no_change': None,
 'random_state': 24,
 'subsample': 1.0,
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [8]:
kFold = StratifiedKFold(n_splits=5,shuffle=True,random_state=24)
params = {'max_depth' : [2,3,4], 'n_estimators':[10,50],'learning_rate':np.linspace(0.001,1,5)}
gcv = GridSearchCV(gbm,param_grid=params,cv=kFold,scoring='roc_auc',verbose=3)

In [9]:
gcv.fit(X,y)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.952 total time=   0.0s
[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.940 total time=   0.0s
[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.961 total time=   0.0s
[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.952 total time=   0.0s
[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=10;, score=0.936 total time=   0.0s
[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.973 total time=   0.0s
[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.940 total time=   0.0s
[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.962 total time=   0.0s
[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.952 total time=   0.0s
[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=50;, score=0.9

In [10]:
gcv.best_params_

{'learning_rate': 0.5005, 'max_depth': 2, 'n_estimators': 10}

In [11]:
gcv.best_score_

0.990956792117229