In [1]:
from __future__ import division, print_function
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_circles
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, BaggingRegressor
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier,AdaBoostRegressor
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" 

# 读取数据

In [3]:
def loadDataSet(fileName):
    dataMat = []; labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = line.strip().split('\t')
        dataMat.append([float(lineArr[0]), float(lineArr[1])])
        labelMat.append(float(lineArr[2]))
    return dataMat,labelMat

In [4]:
dataMat,labelMat=loadDataSet("3_data.txt")

In [5]:
dataMat
labelMat

[[3.542485, 1.977398],
 [3.018896, 2.556416],
 [7.55151, -1.58003],
 [2.114999, -0.004466],
 [8.127113, 1.274372],
 [7.108772, -0.986906],
 [8.610639, 2.046708],
 [2.326297, 0.265213],
 [3.634009, 1.730537],
 [0.341367, -0.894998],
 [3.125951, 0.293251],
 [2.123252, -0.783563],
 [0.887835, -2.797792],
 [7.139979, -2.329896],
 [1.696414, -1.212496],
 [8.117032, 0.623493],
 [8.497162, -0.266649],
 [4.658191, 3.507396],
 [8.197181, 1.545132],
 [1.208047, 0.2131],
 [1.928486, -0.32187],
 [2.175808, -0.014527],
 [7.886608, 0.461755],
 [3.223038, -0.552392],
 [3.628502, 2.190585],
 [7.40786, -0.121961],
 [7.286357, 0.251077],
 [2.301095, -0.533988],
 [-0.232542, -0.54769],
 [3.457096, -0.082216],
 [3.023938, -0.057392],
 [8.015003, 0.885325],
 [8.991748, 0.923154],
 [7.916831, -1.781735],
 [7.616862, -0.217958],
 [2.450939, 0.744967],
 [7.270337, -2.507834],
 [1.749721, -0.961902],
 [1.803111, -0.176349],
 [8.804461, 3.044301],
 [1.231257, -0.568573],
 [2.074915, 1.41055],
 [-0.743036, -1.73

[-1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0]

# 数据准备

In [6]:
X = dataMat
y = labelMat

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,shuffle=True)

In [8]:
X_train
y_train
X_test
y_test

[[3.542485, 1.977398],
 [7.27946, -0.189572],
 [8.610639, 2.046708],
 [6.016004, -3.753712],
 [5.286862, -2.358286],
 [8.117032, 0.623493],
 [3.457096, -0.082216],
 [-0.232542, -0.54769],
 [7.556201, 0.241185],
 [7.886242, 0.191813],
 [6.960661, -0.245353],
 [1.749721, -0.961902],
 [3.263585, 1.367898],
 [7.270337, -2.507834],
 [8.410143, 0.025606],
 [7.916831, -1.781735],
 [1.727631, -0.453143],
 [7.886608, 0.461755],
 [9.295969, 1.346332],
 [1.966279, -1.840439],
 [2.037772, 0.021919],
 [0.870296, -0.109952],
 [8.015003, 0.885325],
 [2.609525, 0.902979],
 [3.107511, 0.758367],
 [7.382988, -0.478764],
 [8.266085, -0.230977],
 [1.696414, -1.212496],
 [1.339746, -0.291183],
 [2.893743, -1.643468],
 [8.168618, -0.903835],
 [1.928486, -0.32187],
 [1.363782, -0.254082],
 [2.369345, 1.375695],
 [2.074915, 1.41055],
 [7.921057, -1.327587],
 [9.854303, 1.365116],
 [8.500757, 1.492372],
 [8.23446, 0.701868],
 [1.896005, 0.51508],
 [7.40786, -0.121961],
 [8.398012, 1.584918],
 [7.139979, -2.329

[-1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0]

[[7.286357, 0.251077],
 [8.197181, 1.545132],
 [6.080573, 0.418886],
 [2.114999, -0.004466],
 [1.870457, -1.04042],
 [2.326297, 0.265213],
 [2.404234, 0.570278],
 [8.127113, 1.274372],
 [8.112593, 0.295839],
 [2.301095, -0.533988],
 [9.803425, 1.495167],
 [1.963874, -0.365233],
 [8.092288, -1.372433],
 [1.208047, 0.2131],
 [1.667645, 0.239204],
 [3.628502, 2.190585],
 [1.979395, -0.050773],
 [9.229518, 2.066088],
 [3.634009, 1.730537],
 [2.529893, 0.662657],
 [8.102154, -0.603875],
 [1.497407, -0.552916],
 [8.132048, 0.785914],
 [6.543888, 0.433164],
 [7.108772, -0.986906],
 [8.54562, 2.788799],
 [7.55151, -1.58003],
 [1.336267, -1.632889],
 [3.023938, -0.057392],
 [1.534187, -0.622492]]

[1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 -1.0]

# Adaboosting

In [9]:
ADB_clfTree = AdaBoostClassifier(DecisionTreeClassifier()).fit(X_train,y_train)
ADB_clfTree.score(X_test,y_test)

1.0

# Bagging

In [10]:
B_clfTree = BaggingClassifier(DecisionTreeClassifier()).fit(X_train,y_train)
B_clfTree.score(X_test,y_test)

1.0