In [72]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics

In [73]:
data = pd.read_csv("ILPD.csv")
data = data.dropna()
data

Unnamed: 0,Age,Gender,TB,DB,AP,SGPT,SGOT,TP,ALB,A/G,Target
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.90,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.00,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2
579,40,Male,0.6,0.1,98,35,31,6.0,3.2,1.10,1
580,52,Male,0.8,0.2,245,48,49,6.4,3.2,1.00,1
581,31,Male,1.3,0.5,184,29,32,6.8,3.4,1.00,1


In [74]:
data.Gender = data.Gender.astype('category').cat.codes
data

Unnamed: 0,Age,Gender,TB,DB,AP,SGPT,SGOT,TP,ALB,A/G,Target
0,65,0,0.7,0.1,187,16,18,6.8,3.3,0.90,1
1,62,1,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,1,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,1,1.0,0.4,182,14,20,6.8,3.4,1.00,1
4,72,1,3.9,2.0,195,27,59,7.3,2.4,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,60,1,0.5,0.1,500,20,34,5.9,1.6,0.37,2
579,40,1,0.6,0.1,98,35,31,6.0,3.2,1.10,1
580,52,1,0.8,0.2,245,48,49,6.4,3.2,1.00,1
581,31,1,1.3,0.5,184,29,32,6.8,3.4,1.00,1


In [75]:
X, Y = data.iloc[:, :-1], data.iloc[:, -1]

In [76]:
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.33, random_state=7, shuffle=True)

In [77]:
lr_list = [0.05, 0.075, 0.1, 0.2, 0.5, 0.75, 1]

for learning_rate in lr_list:
    gb_clf = GradientBoostingClassifier(
        n_estimators=50, learning_rate=learning_rate, max_features=10, max_depth=1, random_state=7)
    gb_clf.fit(X_train, y_train)

    print("Learning rate: ", learning_rate)
    print("Accuracy score (training): {0:.3f}".format(
        gb_clf.score(X_train, y_train)))
    print("Accuracy score (validation): {0:.3f}".format(
        gb_clf.score(X_test, y_test)))
    
    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=7)
    scores = cross_val_score(gb_clf, X, Y, scoring='accuracy', cv=cv)
    print('%s : %f' % ("Stratified K fold", np.mean(scores)))

    print('\n')


Learning rate:  0.05
Accuracy score (training): 0.718
Accuracy score (validation): 0.766
Stratified K fold : 0.718480


Learning rate:  0.075
Accuracy score (training): 0.736
Accuracy score (validation): 0.740
Stratified K fold : 0.721934


Learning rate:  0.1
Accuracy score (training): 0.742
Accuracy score (validation): 0.745
Stratified K fold : 0.713299


Learning rate:  0.2
Accuracy score (training): 0.780
Accuracy score (validation): 0.745
Stratified K fold : 0.696028


Learning rate:  0.5
Accuracy score (training): 0.793
Accuracy score (validation): 0.719
Stratified K fold : 0.704663


Learning rate:  0.75
Accuracy score (training): 0.819
Accuracy score (validation): 0.729
Stratified K fold : 0.701209


Learning rate:  1
Accuracy score (training): 0.850
Accuracy score (validation): 0.703
Stratified K fold : 0.711572


