# Adaboost对mnist数据集进行分类

##### Copyright © 2020 by Wangchuwen，2018202114. All rights reserved.

## 一.装载数据集并且显示一个样板

In [1]:
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report,confusion_matrix  
from sklearn import datasets, svm, tree
from sklearn.ensemble import AdaBoostClassifier  
import matplotlib.pyplot as plt


# Load the digit data
digits = datasets.load_digits()
# View the features of the first observation
print(digits.data[0:1])

print(digits.target[0:1])


[[ 0.  0.  5. 13.  9.  1.  0.  0.  0.  0. 13. 15. 10. 15.  5.  0.  0.  3.
  15.  2.  0. 11.  8.  0.  0.  4. 12.  0.  0.  8.  8.  0.  0.  5.  8.  0.
   0.  9.  8.  0.  0.  4. 11.  0.  1. 12.  7.  0.  0.  2. 14.  5. 10. 12.
   0.  0.  0.  0.  6. 13. 10.  0.  0.  0.]]
[0]


## 二.切割训练集、测试集（固定比例7:3）

In [2]:
X = np.array(digits.data)
Y = digits.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30)  

## 三.用scikit learn的DecisionTreeClassifier进行分类模型训练

In [3]:
tclf = tree.DecisionTreeClassifier()
tclf = tclf.fit(X_train, Y_train)


## 四.测试集上的分类效果，即每个类别的precision, recall, f1

In [4]:
Y_pred = tclf.predict(X_test)
print(confusion_matrix(Y_test, Y_pred))  
print(classification_report(Y_test, Y_pred))

[[48  0  0  0  0  1  0  0  0  0]
 [ 0 37  3  1  1  0  0  0  3  1]
 [ 1  3 37  2  0  2  0  1  4  0]
 [ 0  2  1 47  2  0  0  0  0  2]
 [ 1  3  1  0 52  0  2  2  3  1]
 [ 0  1  1  2  0 49  1  0  4  2]
 [ 0  1  1  0  1  0 38  0  0  0]
 [ 0  0  0  2  9  0  0 53  2  1]
 [ 0  4  3  5  0  1  0  2 43  1]
 [ 0  3  1  1  1  0  0  0  1 42]]
              precision    recall  f1-score   support

           0       0.96      0.98      0.97        49
           1       0.69      0.80      0.74        46
           2       0.77      0.74      0.76        50
           3       0.78      0.87      0.82        54
           4       0.79      0.80      0.79        65
           5       0.92      0.82      0.87        60
           6       0.93      0.93      0.93        41
           7       0.91      0.79      0.85        67
           8       0.72      0.73      0.72        59
           9       0.84      0.86      0.85        49

    accuracy                           0.83       540
   macro avg       

## 五.用AdaBoostClassifier包装DecisionTreeClassifier，完成

### 5.1分类模型训练

In [9]:
bdt = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=8, min_samples_split=20, min_samples_leaf=5), algorithm="SAMME", n_estimators=200, learning_rate=0.8)
bdt.fit(X_train,Y_train ) 


AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=DecisionTreeClassifier(class_weight=None,
                                                         criterion='gini',
                                                         max_depth=8,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=5,
                                                         min_samples_split=20,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort=False,
                                                         random_state=None,
                              

### 5.2显示每个类别的precision, recall, f1

In [10]:
Z = bdt.predict(X_test)
print(confusion_matrix(Y_test, Z))  
print(classification_report(Y_test, Z))

[[60  0  0  0  0  0  0  0  0  0]
 [ 0 52  0  0  0  0  0  0  0  0]
 [ 0  1 51  0  0  0  0  0  0  0]
 [ 0  0  0 57  0  0  0  0  0  0]
 [ 0  0  0  0 53  0  0  1  0  0]
 [ 0  0  0  0  0 46  1  0  0  0]
 [ 0  0  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  0  0  0 52  0  2]
 [ 0  3  0  0  0  0  0  0 50  0]
 [ 0  0  0  1  0  0  0  0  2 55]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        60
           1       0.93      1.00      0.96        52
           2       1.00      0.98      0.99        52
           3       0.98      1.00      0.99        57
           4       1.00      0.98      0.99        54
           5       1.00      0.98      0.99        47
           6       0.98      1.00      0.99        53
           7       0.98      0.96      0.97        54
           8       0.96      0.94      0.95        53
           9       0.96      0.95      0.96        58

    accuracy                           0.98       540
   macro avg       

##### 尝试不同数量的决策树,观察AdaBoostClassifier中决策树数目不同对评测精度的影响,给出你的观察.