# Mnist 数据集
- 训练集：60000
- 测试集：10000
- 图片：28x28x1 $\rightarrow$ 784

In [5]:
import numpy as np
from tqdm import tqdm

In [2]:
def load_data(fileName):
    dataArr = []
    labelArr = []
    
    fr = open(fileName, 'r')
    
    for line in tqdm(fr.readlines()):
        currentLine = line.strip().split(',')
        # 二值化处理
        dataArr.append([int(int(num) > 128) for num in currentLine[1 : ]])
        labelArr.append(int(currentLine[0]))
    
    return dataArr, labelArr

# 决策树 ID3

In [12]:
def majorClass(labelArr):
    """
    找到标签集中占比最大的标签
    """
    
    classDict = {}
    # 遍历标签
    for i in range(len(labelArr)):
        if labelArr[i] in classDict.keys():
            classDict[labelArr[i]]  += 1
        else:
            classDict[labelArr[i]] = 1
    
    # sort
    classSort = sorted(classDict.items(), key = lambda x: x[1], reverse= True)
    
    # 返回最大一项的标签
    return classSort[0][0]

def calc_H_D(trainLabelArr):
    """
    计算数据集D的经验熵
    """
    # initialize
    H_D = 0
    
    #
    trainLabelSet = set([label for label in trainLabelArr])
    # 遍历
    for i in trainLabelSet:
        p = trainLabelArr[trainLabelArr == i].size / trainLabelArr.size
        H_D += -1 * p * np.log2(p)
        
    return H_D

def calc_H_D_A(trainDataArr_DevFeature, trainLabelArr):
    """
    计算经验条件熵
    """
    # initialize
    H_D_A = 0
    
    trainDataSet = set([label for label in trainDataArr_DevFeature])
    
    for i in trainDataSet:
        H_D_A += trainDataArr_DevFeature[trainDataArr_DevFeature == i].size / trainDataArr_DevFeature.size \
        * calc_H_D(trainLabelArr[trainDataArr_DevFeature==i])
        
    return H_D_A

def calc_BestFeature(trainDataList, trainLabelList):
    """
    计算信息增益最大的特征
    """
    # 数组
    trainDataArr = np.array(trainDataList)
    trainLabelArr = np.array(trainLabelList)
    
    # 
    featureNum = trainDataArr.shape[1]
    print("featureNum: ", featureNum)
    
    # 初始化最大信息增益
    maxG_D_A = -1
    # 初始化最大信息增益的特征
    maxFeature = -1
    
    # 计算数据集D的经验熵
    H_D = calc_H_D(trainLabelArr)
    # 对每一个特征进行遍历计算
    for feature in range(featureNum):
        trainDataArr_DevideByFeature = np.array(trainDataArr[:, feature].flat)
        G_D_A = H_D - calc_H_D_A(trainDataArr_DevideByFeature, trainLabelArr)
        
        if G_D_A > maxG_D_A:
            maxG_D_A = G_D_A
            maxFeature = feature
            
    return maxFeature, maxG_D_A

def getSubDataArr(trainDataArr, trainLabelArr, A, a):
    """
    更新数据集和标签集
    """
    
    # 返回的数据集
    retDataArr = []
    # 返回的标签集
    retLabelArr = []
    
    for i in range(len(trainDataArr)):
        if trainDataArr[i][A] == a:
            retDataArr.append(trainDataArr[i][0: A] + trainDataArr[i][A+1: ])
            retLabelArr.append(trainLabelArr[i])
    
    return retDataArr, retLabelArr

def createTree(*dataSet):
    """
    递归创建决策树
    """
    
    Epsilon = 0.1
    
    trainDataList = dataSet[0][0]
    trainLabelList = dataSet[0][1]
    
    print('Start a node', len(trainDataList[0]), len(trainLabelList))
    
    classDict = {i for i in trainLabelList}
    
    if len(classDict) == 1:
        return trainLabelList[0]
    
    if len(trainDataList[0]) == 0:
        return majorClass(trainLabelList)
    
    Ag, EpsilonGet = calc_BestFeature(trainDataList, trainLabelList)
    
    if EpsilonGet < Epsilon:
        return majorClass(trainLabelList)
    
    treeDict = {Ag: {}}
    
    treeDict[Ag][0] = createTree(getSubDataArr(trainDataList, trainLabelList, Ag, 0))
    treeDict[Ag][1] = createTree(getSubDataArr(trainDataList, trainLabelList, Ag, 1))
    
    return treeDict


def predict(testDataList, tree):
    """
    预测标签
    """
    while True:
        (key, value), = tree.items()
        if type(tree[key]).__name__ == 'dict':
            dataVal = testDataList[key]
            del testDataList[key]
            tree = value[dataVal]
            if type(tree).__name__ == 'int':
                return tree
        else:
            return value


In [6]:
trainDataList, trainLabelList = load_data('./mnist/mnist_train.csv')
testDataList, testLabelList = load_data('./mnist/mnist_test.csv')

100%|███████████████████████████████████| 60000/60000 [00:12<00:00, 4945.01it/s]
100%|███████████████████████████████████| 10000/10000 [00:01<00:00, 5120.26it/s]


In [13]:
# create tree
tree = createTree((trainDataList, trainLabelList))
print("tree is ", tree)

Start a node 784 60000
featureNum:  784
Start a node 783 33587
featureNum:  783
Start a node 782 23938
featureNum:  782
Start a node 781 18700
featureNum:  781
Start a node 780 11336
featureNum:  780
Start a node 779 8724
featureNum:  779
Start a node 778 7677
featureNum:  778
Start a node 777 6831
featureNum:  777
Start a node 776 6242
featureNum:  776
Start a node 775 5840
featureNum:  775
Start a node 775 402
featureNum:  775
Start a node 774 220
featureNum:  774
Start a node 773 171
featureNum:  773
Start a node 772 142
featureNum:  772
Start a node 771 133
featureNum:  771
Start a node 770 128
featureNum:  770
Start a node 769 125
featureNum:  769
Start a node 768 123
featureNum:  768
Start a node 768 2
featureNum:  768
Start a node 767 1
Start a node 767 1
Start a node 769 3
featureNum:  769
Start a node 768 1
Start a node 768 2
Start a node 770 5
featureNum:  770
Start a node 769 2
Start a node 769 3
featureNum:  769
Start a node 768 2
Start a node 768 1
Start a node 771 9
featu

Start a node 770 1
Start a node 770 1
Start a node 773 7
featureNum:  773
Start a node 772 5
Start a node 772 2
featureNum:  772
Start a node 771 1
Start a node 771 1
Start a node 774 16
featureNum:  774
Start a node 773 3
featureNum:  773
Start a node 772 2
Start a node 772 1
Start a node 773 13
Start a node 777 846
featureNum:  777
Start a node 776 479
featureNum:  776
Start a node 775 369
featureNum:  775
Start a node 774 302
featureNum:  774
Start a node 773 253
featureNum:  773
Start a node 772 244
featureNum:  772
Start a node 771 231
featureNum:  771
Start a node 770 224
featureNum:  770
Start a node 769 216
featureNum:  769
Start a node 769 8
featureNum:  769
Start a node 768 3
featureNum:  768
Start a node 767 2
Start a node 767 1
Start a node 768 5
Start a node 770 7
featureNum:  770
Start a node 769 4
featureNum:  769
Start a node 768 3
Start a node 768 1
Start a node 769 3
featureNum:  769
Start a node 768 1
Start a node 768 2
featureNum:  768
Start a node 767 1
Start a nod

featureNum:  775
Start a node 774 164
featureNum:  774
Start a node 773 75
featureNum:  773
Start a node 772 48
featureNum:  772
Start a node 771 39
featureNum:  771
Start a node 770 21
featureNum:  770
Start a node 769 20
Start a node 769 1
Start a node 770 18
featureNum:  770
Start a node 769 3
Start a node 769 15
featureNum:  769
Start a node 768 10
Start a node 768 5
featureNum:  768
Start a node 767 3
Start a node 767 2
Start a node 771 9
featureNum:  771
Start a node 770 8
Start a node 770 1
Start a node 772 27
featureNum:  772
Start a node 771 3
featureNum:  771
Start a node 770 2
featureNum:  770
Start a node 769 1
Start a node 769 1
Start a node 770 1
Start a node 771 24
Start a node 773 89
featureNum:  773
Start a node 772 87
featureNum:  772
Start a node 771 84
Start a node 771 3
featureNum:  771
Start a node 770 2
Start a node 770 1
Start a node 772 2
Start a node 774 22
featureNum:  774
Start a node 773 5
featureNum:  773
Start a node 772 3
featureNum:  772
Start a node 77

Start a node 773 5
Start a node 773 1
Start a node 778 1289
featureNum:  778
Start a node 777 702
featureNum:  777
Start a node 776 606
featureNum:  776
Start a node 775 529
featureNum:  775
Start a node 774 510
featureNum:  774
Start a node 774 19
featureNum:  774
Start a node 773 12
featureNum:  773
Start a node 772 5
featureNum:  772
Start a node 771 4
Start a node 771 1
Start a node 772 7
Start a node 773 7
featureNum:  773
Start a node 772 5
Start a node 772 2
featureNum:  772
Start a node 771 1
Start a node 771 1
Start a node 775 77
featureNum:  775
Start a node 774 43
featureNum:  774
Start a node 773 15
featureNum:  773
Start a node 772 11
featureNum:  772
Start a node 771 10
Start a node 771 1
Start a node 772 4
featureNum:  772
Start a node 771 2
Start a node 771 2
Start a node 773 28
featureNum:  773
Start a node 772 24
featureNum:  772
Start a node 771 23
Start a node 771 1
Start a node 772 4
featureNum:  772
Start a node 771 3
Start a node 771 1
Start a node 774 34
feature

Start a node 775 277
featureNum:  775
Start a node 774 8
featureNum:  774
Start a node 773 4
featureNum:  773
Start a node 772 3
Start a node 772 1
Start a node 773 4
featureNum:  773
Start a node 772 2
Start a node 772 2
Start a node 774 269
featureNum:  774
Start a node 773 265
featureNum:  773
Start a node 773 4
Start a node 775 26
featureNum:  775
Start a node 774 11
featureNum:  774
Start a node 773 4
featureNum:  773
Start a node 772 2
featureNum:  772
Start a node 771 1
Start a node 771 1
Start a node 772 2
Start a node 773 7
Start a node 774 15
featureNum:  774
Start a node 773 13
Start a node 773 2
featureNum:  773
Start a node 772 1
Start a node 772 1
Start a node 778 663
featureNum:  778
Start a node 777 270
featureNum:  777
Start a node 776 205
featureNum:  776
Start a node 775 174
featureNum:  775
Start a node 774 46
featureNum:  774
Start a node 773 22
featureNum:  773
Start a node 772 13
featureNum:  772
Start a node 771 8
featureNum:  771
Start a node 770 6
featureNum: 

Start a node 769 1
Start a node 769 2
Start a node 770 4
Start a node 774 204
featureNum:  774
Start a node 773 195
featureNum:  773
Start a node 773 9
featureNum:  773
Start a node 772 4
featureNum:  772
Start a node 771 2
Start a node 771 2
featureNum:  771
Start a node 770 1
Start a node 770 1
Start a node 772 5
Start a node 775 131
featureNum:  775
Start a node 774 74
featureNum:  774
Start a node 773 23
featureNum:  773
Start a node 772 9
featureNum:  772
Start a node 771 5
featureNum:  771
Start a node 770 3
Start a node 770 2
Start a node 771 4
featureNum:  771
Start a node 770 3
Start a node 770 1
Start a node 772 14
featureNum:  772
Start a node 771 2
featureNum:  771
Start a node 770 1
Start a node 770 1
Start a node 771 12
featureNum:  771
Start a node 770 11
Start a node 770 1
Start a node 773 51
featureNum:  773
Start a node 772 26
featureNum:  772
Start a node 771 23
featureNum:  771
Start a node 770 11
featureNum:  770
Start a node 769 7
featureNum:  769
Start a node 768

Start a node 771 13
featureNum:  771
Start a node 770 6
Start a node 770 7
featureNum:  770
Start a node 769 4
Start a node 769 3
featureNum:  769
Start a node 768 1
Start a node 768 2
Start a node 771 36
Start a node 772 3
Start a node 773 156
featureNum:  773
Start a node 772 51
featureNum:  772
Start a node 771 47
featureNum:  771
Start a node 770 17
featureNum:  770
Start a node 769 10
Start a node 769 7
featureNum:  769
Start a node 768 3
Start a node 768 4
featureNum:  768
Start a node 767 3
Start a node 767 1
Start a node 770 30
featureNum:  770
Start a node 769 26
featureNum:  769
Start a node 768 1
Start a node 768 25
Start a node 769 4
featureNum:  769
Start a node 768 3
Start a node 768 1
Start a node 771 4
featureNum:  771
Start a node 770 2
featureNum:  770
Start a node 769 1
Start a node 769 1
Start a node 770 2
Start a node 772 105
featureNum:  772
Start a node 771 96
featureNum:  771
Start a node 770 92
featureNum:  770
Start a node 769 11
featureNum:  769
Start a node 

Start a node 771 2
Start a node 771 3
featureNum:  771
Start a node 770 1
Start a node 770 2
Start a node 773 5
featureNum:  773
Start a node 772 2
Start a node 772 3
Start a node 774 4
featureNum:  774
Start a node 773 2
featureNum:  773
Start a node 772 1
Start a node 772 1
Start a node 773 2
Start a node 777 226
featureNum:  777
Start a node 776 178
featureNum:  776
Start a node 775 165
featureNum:  775
Start a node 774 152
featureNum:  774
Start a node 773 145
featureNum:  773
Start a node 773 7
featureNum:  773
Start a node 772 6
Start a node 772 1
Start a node 774 13
featureNum:  774
Start a node 773 7
featureNum:  773
Start a node 772 4
Start a node 772 3
featureNum:  772
Start a node 771 2
Start a node 771 1
Start a node 773 6
Start a node 775 13
featureNum:  775
Start a node 774 10
featureNum:  774
Start a node 773 9
Start a node 773 1
Start a node 774 3
featureNum:  774
Start a node 773 1
Start a node 773 2
Start a node 776 48
featureNum:  776
Start a node 775 21
featureNum: 

Start a node 774 15
featureNum:  774
Start a node 773 9
featureNum:  773
Start a node 772 5
featureNum:  772
Start a node 771 4
Start a node 771 1
Start a node 772 4
featureNum:  772
Start a node 771 3
Start a node 771 1
Start a node 773 6
featureNum:  773
Start a node 772 3
Start a node 772 3
featureNum:  772
Start a node 771 2
Start a node 771 1
Start a node 774 20
Start a node 775 68
featureNum:  775
Start a node 774 54
featureNum:  774
Start a node 773 23
featureNum:  773
Start a node 772 13
featureNum:  772
Start a node 771 4
featureNum:  771
Start a node 770 3
Start a node 770 1
Start a node 771 9
Start a node 772 10
featureNum:  772
Start a node 771 7
featureNum:  771
Start a node 770 6
Start a node 770 1
Start a node 771 3
Start a node 773 31
featureNum:  773
Start a node 772 30
Start a node 772 1
Start a node 774 14
featureNum:  774
Start a node 773 7
featureNum:  773
Start a node 772 6
featureNum:  772
Start a node 771 5
Start a node 771 1
Start a node 772 1
Start a node 773 

featureNum:  782
Start a node 781 5875
featureNum:  781
Start a node 780 2790
featureNum:  780
Start a node 779 1642
featureNum:  779
Start a node 778 1334
featureNum:  778
Start a node 777 711
featureNum:  777
Start a node 776 558
featureNum:  776
Start a node 775 195
featureNum:  775
Start a node 774 156
featureNum:  774
Start a node 773 126
featureNum:  773
Start a node 772 98
featureNum:  772
Start a node 771 65
featureNum:  771
Start a node 770 42
featureNum:  770
Start a node 769 27
featureNum:  769
Start a node 768 21
featureNum:  768
Start a node 767 19
featureNum:  767
Start a node 766 16
Start a node 766 3
featureNum:  766
Start a node 765 2
featureNum:  765
Start a node 764 1
Start a node 764 1
Start a node 765 1
Start a node 767 2
Start a node 768 6
featureNum:  768
Start a node 767 1
Start a node 767 5
Start a node 769 15
featureNum:  769
Start a node 768 10
featureNum:  768
Start a node 767 8
featureNum:  767
Start a node 766 7
Start a node 766 1
Start a node 767 2
Start 

Start a node 770 1
Start a node 770 1
Start a node 773 8
featureNum:  773
Start a node 772 7
Start a node 772 1
Start a node 774 13
featureNum:  774
Start a node 773 12
Start a node 773 1
Start a node 775 43
featureNum:  775
Start a node 774 12
featureNum:  774
Start a node 773 8
Start a node 773 4
featureNum:  773
Start a node 772 3
Start a node 772 1
Start a node 774 31
featureNum:  774
Start a node 773 28
Start a node 773 3
featureNum:  773
Start a node 772 2
featureNum:  772
Start a node 771 1
Start a node 771 1
Start a node 772 1
Start a node 776 144
featureNum:  776
Start a node 775 31
featureNum:  775
Start a node 774 10
featureNum:  774
Start a node 773 3
featureNum:  773
Start a node 772 2
featureNum:  772
Start a node 771 1
Start a node 771 1
Start a node 772 1
Start a node 773 7
Start a node 774 21
featureNum:  774
Start a node 773 20
Start a node 773 1
Start a node 775 113
featureNum:  775
Start a node 774 38
featureNum:  774
Start a node 773 6
featureNum:  773
Start a node

featureNum:  778
Start a node 777 328
featureNum:  777
Start a node 776 83
featureNum:  776
Start a node 775 31
featureNum:  775
Start a node 774 11
featureNum:  774
Start a node 773 7
Start a node 773 4
featureNum:  773
Start a node 772 3
Start a node 772 1
Start a node 774 20
featureNum:  774
Start a node 773 16
Start a node 773 4
Start a node 775 52
featureNum:  775
Start a node 774 15
featureNum:  774
Start a node 773 8
featureNum:  773
Start a node 772 5
Start a node 772 3
featureNum:  772
Start a node 771 2
featureNum:  771
Start a node 770 1
Start a node 770 1
Start a node 771 1
Start a node 773 7
Start a node 774 37
featureNum:  774
Start a node 773 34
featureNum:  773
Start a node 772 32
featureNum:  772
Start a node 771 29
Start a node 771 3
featureNum:  771
Start a node 770 2
Start a node 770 1
Start a node 772 2
featureNum:  772
Start a node 771 1
Start a node 771 1
Start a node 773 3
Start a node 776 245
featureNum:  776
Start a node 777 67
featureNum:  777
Start a node 77

Start a node 776 887
featureNum:  776
Start a node 775 339
featureNum:  775
Start a node 774 246
featureNum:  774
Start a node 773 116
featureNum:  773
Start a node 772 70
featureNum:  772
Start a node 771 48
featureNum:  771
Start a node 770 32
featureNum:  770
Start a node 769 12
featureNum:  769
Start a node 768 9
featureNum:  768
Start a node 767 8
featureNum:  767
Start a node 766 7
Start a node 766 1
Start a node 767 1
Start a node 768 3
Start a node 769 20
featureNum:  769
Start a node 768 18
featureNum:  768
Start a node 767 17
Start a node 767 1
Start a node 768 2
featureNum:  768
Start a node 767 1
Start a node 767 1
Start a node 770 16
featureNum:  770
Start a node 769 8
featureNum:  769
Start a node 768 4
featureNum:  768
Start a node 767 2
Start a node 767 2
Start a node 768 4
featureNum:  768
Start a node 767 2
Start a node 767 2
Start a node 769 8
featureNum:  769
Start a node 768 6
Start a node 768 2
Start a node 771 22
featureNum:  771
Start a node 770 20
featureNum:  

Start a node 771 2
featureNum:  771
Start a node 770 1
Start a node 770 1
Start a node 771 4
Start a node 773 6
featureNum:  773
Start a node 772 5
Start a node 772 1
Start a node 774 71
featureNum:  774
Start a node 773 65
featureNum:  773
Start a node 772 40
featureNum:  772
Start a node 771 38
featureNum:  771
Start a node 770 14
featureNum:  770
Start a node 769 3
Start a node 769 11
featureNum:  769
Start a node 768 7
Start a node 768 4
featureNum:  768
Start a node 767 2
Start a node 767 2
Start a node 770 24
Start a node 771 2
Start a node 772 25
featureNum:  772
Start a node 771 19
featureNum:  771
Start a node 770 7
featureNum:  770
Start a node 769 5
featureNum:  769
Start a node 768 3
featureNum:  768
Start a node 767 2
Start a node 767 1
Start a node 768 2
Start a node 769 2
featureNum:  769
Start a node 768 1
Start a node 768 1
Start a node 770 12
featureNum:  770
Start a node 769 2
Start a node 769 10
Start a node 771 6
Start a node 773 6
featureNum:  773
Start a node 772

Start a node 778 444
featureNum:  778
Start a node 777 327
featureNum:  777
Start a node 776 291
featureNum:  776
Start a node 775 160
featureNum:  775
Start a node 774 48
featureNum:  774
Start a node 773 41
featureNum:  773
Start a node 772 35
featureNum:  772
Start a node 771 30
Start a node 771 5
featureNum:  771
Start a node 770 3
Start a node 770 2
Start a node 772 6
featureNum:  772
Start a node 771 4
Start a node 771 2
Start a node 773 7
featureNum:  773
Start a node 772 4
Start a node 772 3
featureNum:  772
Start a node 771 2
Start a node 771 1
Start a node 774 112
featureNum:  774
Start a node 773 84
featureNum:  773
Start a node 772 76
featureNum:  772
Start a node 771 72
featureNum:  771
Start a node 770 31
featureNum:  770
Start a node 769 15
Start a node 769 16
featureNum:  769
Start a node 768 9
featureNum:  768
Start a node 767 1
Start a node 767 8
Start a node 768 7
Start a node 770 41
featureNum:  770
Start a node 769 40
Start a node 769 1
Start a node 771 4
featureNu

Start a node 771 2
Start a node 771 1
Start a node 774 21
featureNum:  774
Start a node 773 18
featureNum:  773
Start a node 772 17
featureNum:  772
Start a node 771 1
Start a node 771 16
Start a node 772 1
Start a node 773 3
Start a node 779 2500
featureNum:  779
Start a node 778 2235
featureNum:  778
Start a node 777 2156
featureNum:  777
Start a node 777 79
featureNum:  777
Start a node 776 11
featureNum:  776
Start a node 775 6
featureNum:  775
Start a node 774 2
Start a node 774 4
Start a node 775 5
Start a node 776 68
featureNum:  776
Start a node 775 64
featureNum:  775
Start a node 774 12
featureNum:  774
Start a node 773 7
Start a node 773 5
featureNum:  773
Start a node 772 4
Start a node 772 1
Start a node 774 52
Start a node 775 4
Start a node 778 265
featureNum:  778
Start a node 777 94
featureNum:  777
Start a node 776 48
featureNum:  776
Start a node 775 25
featureNum:  775
Start a node 774 10
featureNum:  774
Start a node 773 8
featureNum:  773
Start a node 772 6
Start 

Start a node 775 51
featureNum:  775
Start a node 774 40
featureNum:  774
Start a node 773 39
Start a node 773 1
Start a node 774 11
featureNum:  774
Start a node 773 10
Start a node 773 1
Start a node 775 22
Start a node 776 27
featureNum:  776
Start a node 775 5
featureNum:  775
Start a node 774 1
Start a node 774 4
Start a node 775 22
featureNum:  775
Start a node 774 11
featureNum:  774
Start a node 773 10
Start a node 773 1
Start a node 774 11
featureNum:  774
Start a node 773 2
featureNum:  773
Start a node 772 1
Start a node 772 1
Start a node 773 9
Start a node 779 725
featureNum:  779
Start a node 778 307
featureNum:  778
Start a node 777 211
featureNum:  777
Start a node 776 168
featureNum:  776
Start a node 775 147
featureNum:  775
Start a node 774 136
featureNum:  774
Start a node 773 132
featureNum:  773
Start a node 772 2
featureNum:  772
Start a node 771 1
Start a node 771 1
Start a node 772 130
featureNum:  772
Start a node 771 128
Start a node 771 2
Start a node 773 4


Start a node 772 39
featureNum:  772
Start a node 771 25
featureNum:  771
Start a node 770 15
featureNum:  770
Start a node 769 11
featureNum:  769
Start a node 768 9
Start a node 768 2
featureNum:  768
Start a node 767 1
Start a node 767 1
Start a node 769 4
Start a node 770 10
featureNum:  770
Start a node 769 5
featureNum:  769
Start a node 768 1
Start a node 768 4
Start a node 769 5
featureNum:  769
Start a node 768 4
Start a node 768 1
Start a node 771 14
featureNum:  771
Start a node 770 1
Start a node 770 13
Start a node 772 8
featureNum:  772
Start a node 771 7
Start a node 771 1
Start a node 774 11
featureNum:  774
Start a node 773 10
Start a node 773 1
Start a node 775 24
featureNum:  775
Start a node 774 23
Start a node 774 1
Start a node 776 189
featureNum:  776
Start a node 775 175
featureNum:  775
Start a node 774 171
featureNum:  774
Start a node 773 28
featureNum:  773
Start a node 772 10
featureNum:  772
Start a node 771 2
Start a node 771 8
featureNum:  771
Start a no

Start a node 770 1
Start a node 770 1
Start a node 771 1
Start a node 774 36
featureNum:  774
Start a node 773 23
featureNum:  773
Start a node 772 6
featureNum:  772
Start a node 771 3
featureNum:  771
Start a node 770 2
Start a node 770 1
Start a node 771 3
Start a node 772 17
featureNum:  772
Start a node 771 2
featureNum:  771
Start a node 770 1
Start a node 770 1
Start a node 771 15
Start a node 773 13
featureNum:  773
Start a node 772 5
Start a node 772 8
Start a node 775 135
featureNum:  775
Start a node 774 29
featureNum:  774
Start a node 773 19
featureNum:  773
Start a node 772 18
Start a node 772 1
Start a node 773 10
featureNum:  773
Start a node 772 4
featureNum:  772
Start a node 771 1
Start a node 771 3
Start a node 772 6
Start a node 774 106
featureNum:  774
Start a node 773 99
featureNum:  773
Start a node 772 10
featureNum:  772
Start a node 771 6
featureNum:  771
Start a node 770 4
Start a node 770 2
featureNum:  770
Start a node 769 1
Start a node 769 1
Start a node

Start a node 775 2
featureNum:  775
Start a node 774 1
Start a node 774 1
Start a node 776 8
featureNum:  776
Start a node 775 4
featureNum:  775
Start a node 774 1
Start a node 774 3
Start a node 775 4
featureNum:  775
Start a node 774 3
Start a node 774 1
Start a node 779 286
featureNum:  779
Start a node 778 96
featureNum:  778
Start a node 777 83
featureNum:  777
Start a node 776 70
featureNum:  776
Start a node 775 58
Start a node 775 12
featureNum:  775
Start a node 774 9
featureNum:  774
Start a node 773 8
Start a node 773 1
Start a node 774 3
Start a node 776 13
featureNum:  776
Start a node 775 10
Start a node 775 3
featureNum:  775
Start a node 774 2
Start a node 774 1
Start a node 777 13
featureNum:  777
Start a node 776 8
featureNum:  776
Start a node 775 7
featureNum:  775
Start a node 774 6
Start a node 774 1
Start a node 775 1
Start a node 776 5
featureNum:  776
Start a node 775 3
featureNum:  775
Start a node 774 2
featureNum:  774
Start a node 773 1
Start a node 773 1


Start a node 771 2
featureNum:  771
Start a node 770 1
Start a node 770 1
Start a node 771 1
Start a node 774 21
featureNum:  774
Start a node 773 2
Start a node 773 19
Start a node 775 22
featureNum:  775
Start a node 774 13
Start a node 774 9
featureNum:  774
Start a node 773 2
featureNum:  773
Start a node 772 1
Start a node 772 1
Start a node 773 7
Start a node 777 61
featureNum:  777
Start a node 776 37
featureNum:  776
Start a node 775 18
featureNum:  775
Start a node 774 3
Start a node 774 15
featureNum:  774
Start a node 773 14
Start a node 773 1
Start a node 775 19
featureNum:  775
Start a node 774 16
featureNum:  774
Start a node 773 12
featureNum:  773
Start a node 772 11
Start a node 772 1
Start a node 773 4
featureNum:  773
Start a node 772 3
Start a node 772 1
Start a node 774 3
featureNum:  774
Start a node 773 1
Start a node 773 2
Start a node 776 24
featureNum:  776
Start a node 775 5
featureNum:  775
Start a node 774 2
featureNum:  774
Start a node 773 1
Start a node 

In [16]:
# test model
# 错误率
error_count = 0
# 预测标签
y_predcit = []
for i in range(len(testDataList)):
    y_pre = predict(testDataList[i], tree)
    y_predcit.append(y_pre)
    if testLabelList[i] != y_pre:
        error_count += 1
acc = 1 - error_count / len(testDataList)


In [17]:
acc

0.8588