In [1]:
from sklearn.datasets import load_digits
from sklearn.ensemble import GradientBoostingClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

import numpy as np

In [2]:
data = load_digits()
X = data.data/255
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=.3)

In [5]:
#model 1: Grid Search Gradient Boost
np.random.seed(6)
learning_rate = np.logspace(-3,3,10)
grid_para = dict(learning_rate = learning_rate)

cv_model = GridSearchCV(GradientBoostingClassifier(),grid_para)
cv_model.fit(X_train, y_train)

np.random.seed(6)
clf1 = GradientBoostingClassifier(learning_rate = cv_model.best_params_['learning_rate'])
clf1.fit(X_train, y_train)
y_predict = clf1.predict(X_test)




In [6]:
print('accuracy:', clf1.score(X_test, y_test))
print(confusion_matrix(y_test, y_predict))

accuracy: 0.9685185185185186
[[52  0  0  0  0  0  0  0  0  0]
 [ 0 47  2  0  0  0  0  0  0  0]
 [ 0  1 55  0  0  0  0  0  0  0]
 [ 0  0  0 53  0  0  0  1  2  0]
 [ 0  0  0  0 49  0  0  1  0  0]
 [ 0  0  0  0  0 53  0  0  1  0]
 [ 0  0  0  0  0  1 64  0  1  0]
 [ 1  0  0  0  1  0  0 49  1  1]
 [ 0  1  0  0  0  0  0  0 43  0]
 [ 1  0  0  0  0  0  0  0  1 58]]


In [7]:
#model 2: XG boost
np.random.seed(6)
clf2 = XGBClassifier(learning_rate = .1)
clf2.fit(X_train, y_train)
y_predict2 = clf2.predict(X_test)
print('accuracy:',clf2.score(X_test, y_test))
print(confusion_matrix(y_test, y_predict2))


accuracy: 0.9629629629629629
[[52  0  0  0  0  0  0  0  0  0]
 [ 0 47  1  0  0  0  0  1  0  0]
 [ 0  1 55  0  0  0  0  0  0  0]
 [ 0  0  0 54  0  0  0  1  1  0]
 [ 0  0  0  0 49  0  0  1  0  0]
 [ 0  0  0  0  0 52  0  0  1  1]
 [ 0  0  0  0  0  1 64  0  1  0]
 [ 0  0  0  0  1  0  0 50  1  1]
 [ 0  2  0  0  0  0  0  2 40  0]
 [ 0  0  0  1  0  0  0  0  2 57]]


In [11]:
# model3: Decision tree with ada boost
np.random.seed(6)
clf3 = AdaBoostClassifier(DecisionTreeClassifier())
clf3.fit(X_train, y_train)
print('accuracy:', clf3.score(X_test, y_test))

accuracy: 0.8444444444444444


In [10]:
# model4: ada boost alone
np.random.seed(6)
clf3 = AdaBoostClassifier()
clf3.fit(X_train, y_train)
print('accuracy:',clf3.score(X_test, y_test))

accuracy: 0.2574074074074074
