# バギング

In [0]:
import pandas as pd

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

from sklearn.datasets import load_breast_cancer 

In [0]:
cancer = load_breast_cancer()

In [0]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, stratify = cancer.target, random_state=42
)

In [0]:
# 決定木モデルとバギングの設定
models = {
    'tree': DecisionTreeClassifier(),
    'bagging': BaggingClassifier(DecisionTreeClassifier(), n_estimators=100, random_state=0)
}

In [0]:
scores = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    scores[(model_name, 'train_score')] = model.score(X_train, y_train)
    scores[(model_name, 'test_score')] = model.score(X_test, y_test)
    
pd.Series(scores).unstack()

# ブースティング

In [0]:
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split

from sklearn.datasets import load_breast_cancer 

In [0]:
cancer = load_breast_cancer()

In [0]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, stratify = cancer.target, random_state=42
)

In [0]:
models = {
    'tree': DecisionTreeClassifier(random_state=0),
    'AdaBoost': AdaBoostClassifier(DecisionTreeClassifier(), random_state=0)
}

In [0]:
scores = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    scores[(model_name, 'train_score')] = model.score(X_train, y_train)
    scores[(model_name, 'test_score')] = model.score(X_test, y_test)
pd.Series(scores).unstack()

Unnamed: 0,test_score,train_score
AdaBoost,0.923077,1.0
tree,0.937063,1.0


In [0]:
from sklearn.ensemble import GradientBoostingClassifier

In [0]:
models = {
    'tree': DecisionTreeClassifier(random_state=0),
    'GradientBoost': GradientBoostingClassifier(random_state=0)
}

In [0]:
scores = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    scores[(model_name, 'train_score')] = model.score(X_train, y_train)
    scores[(model_name, 'test_score')] = model.score(X_test, y_test)
pd.Series(scores).unstack()

Unnamed: 0,test_score,train_score
GradientBoost,0.958042,1.0
tree,0.937063,1.0
