In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot
import seaborn as sns

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score

%matplotlib inline

### Data

In [2]:
data = pd.read_csv('pima-indians-diabetes.csv')

In [3]:
x = data.drop('diabetes', axis = 1)
y = data['diabetes']

In [4]:
y.value_counts()

0    500
1    268
Name: diabetes, dtype: int64

### GradientBoosting

In [5]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier

In [6]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.30, random_state = 101)

In [7]:
gboost = GradientBoostingClassifier(learning_rate = 0.01, n_estimators = 100)

In [8]:
gboost.fit(xtrain, ytrain)

GradientBoostingClassifier(learning_rate=0.01)

In [9]:
pred = gboost.predict(xtest)

In [10]:
confusion_matrix(ytest, pred)

array([[139,  11],
       [ 41,  40]])

In [11]:
accuracy_score(ytest, pred)

0.7748917748917749

### AdaBoost

In [12]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

In [13]:
dtree = DecisionTreeClassifier()
ada = AdaBoostClassifier(base_estimator = dtree, learning_rate = 0.01, n_estimators = 100, algorithm = 'SAMME')

In [14]:
ada.fit(xtrain, ytrain)

AdaBoostClassifier(algorithm='SAMME', base_estimator=DecisionTreeClassifier(),
                   learning_rate=0.01, n_estimators=100)

In [15]:
pred_ada = ada.predict(xtest)

In [16]:
accuracy_score(ytest, pred_ada)

0.7056277056277056

### XGBoost

In [17]:
from xgboost import XGBClassifier

In [18]:
xbst = XGBClassifier(learning_rate = 0.01, n_estimators = 100)

In [19]:
xbst.fit(xtrain, ytrain)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.01, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [20]:
pred_xbst = xbst.predict(xtest)

In [21]:
confusion_matrix(ytest, pred_xbst)

array([[121,  29],
       [ 27,  54]])

In [22]:
accuracy_score(ytest, pred_xbst)

0.7575757575757576