In [13]:
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier

In [2]:
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

In [3]:
iris = load_iris()
wine = load_wine()
cancer = load_breast_cancer()

In [4]:
base_model = make_pipeline(
  StandardScaler(),
  KNeighborsClassifier()
)

bagging_model = BaggingClassifier(base_model, n_estimators=10, max_samples=0.5, max_features=0.5)

In [7]:
cross_val = cross_validate(
  estimator=base_model,
  X=iris.data, y=iris.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.96 +/- 0.02


In [8]:
cross_val = cross_validate(
  estimator=bagging_model,
  X=iris.data, y=iris.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.94 +/- 0.03


In [10]:
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()

base_model = make_pipeline(
  StandardScaler(),
  KNeighborsRegressor()
)

bagging_model = BaggingRegressor(base_model, n_estimators=10, max_samples=.5, max_features=.5)

In [11]:
cross_val = cross_validate(
  estimator=base_model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.37 +/- 0.04


In [12]:
cross_val = cross_validate(
  estimator=bagging_model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.40 +/- 0.05


In [14]:
base_model = make_pipeline(
  StandardScaler(),
  SVR()
)

bagging_model = BaggingRegressor(base_model, n_estimators=10, max_samples=.5, max_features=.5)

In [15]:
cross_val = cross_validate(
  estimator=base_model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.15 +/- 0.02


In [16]:
cross_val = cross_validate(
  estimator=bagging_model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.06 +/- 0.03


In [17]:
base_model = make_pipeline(
  StandardScaler(),
  DecisionTreeRegressor()
)

bagging_model = BaggingRegressor(base_model, n_estimators=10, max_samples=.5, max_features=.5)

In [18]:
cross_val = cross_validate(
  estimator=base_model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : -0.17 +/- 0.12


In [19]:
cross_val = cross_validate(
  estimator=bagging_model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.36 +/- 0.10


bagging은 DecisionTree랑 잘 맞음

### Random Forest

In [21]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor

In [22]:
model = make_pipeline(
  StandardScaler(),
  RandomForestClassifier()
)

cross_val = cross_validate(
  estimator=model,
  X=iris.data, y=iris.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")


avg test score : 0.97 +/- 0.02


In [24]:
cross_val = cross_validate(
  estimator=model,
  X=wine.data, y=wine.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")


avg test score : 0.98 +/- 0.02


In [26]:
cross_val = cross_validate(
  estimator=model,
  X=cancer.data, y=cancer.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.96 +/- 0.02


In [27]:
model = make_pipeline(
  StandardScaler(),
  RandomForestRegressor()
)

In [28]:
cross_val = cross_validate(
  estimator=model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.43 +/- 0.05


In [29]:
model = make_pipeline(
  StandardScaler(),
  ExtraTreesClassifier()
)

In [30]:
cross_val = cross_validate(
  estimator=model,
  X=wine.data, y=wine.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.99 +/- 0.01


In [32]:
cross_val = cross_validate(
  estimator=model,
  X=cancer.data, y=cancer.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.97 +/- 0.02


In [33]:
model = make_pipeline(
  StandardScaler(),
  ExtraTreesRegressor()
)

In [34]:
cross_val = cross_validate(
  estimator=model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.45 +/- 0.03


### AdaBoost

In [35]:
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor

In [36]:
model = make_pipeline(
  StandardScaler(),
  AdaBoostClassifier()
)

cross_val = cross_validate(
  estimator=model,
  X=iris.data, y=iris.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.95 +/- 0.03


In [37]:
model = make_pipeline(
  StandardScaler(),
  AdaBoostRegressor()
)

cross_val = cross_validate(
  estimator=model,
  X=diabetes.data, y=diabetes.target,
  cv=5
)

print(f"avg test score : {cross_val['test_score'].mean():.2f} +/- {cross_val['test_score'].std():.2f}")

avg test score : 0.39 +/- 0.04


### Gradient Tree Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor

### Voting Classifier
- hard voting(가장 많이 예측된 클래스를 정답으로 채택)
- soft voting(예측된 확률의 가중치 평균)

In [39]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.model_selection import cross_val_score

In [41]:
model1 = SVC()
model2 = GaussianNB()
model3 = RandomForestClassifier()
vote_model = VotingClassifier(
  estimators=[('svc', model1), ('navie', model2), ('forest', model3)],
  voting='hard'
)

In [43]:
score = cross_val_score(
  vote_model,
  iris.data, iris.target,
  cv=5
)

print(score.mean())

0.9666666666666668


#### 스택 일반화
- 예측 모델의 결과 값을 다시 모델에 입력으로 사용

In [47]:
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import StackingClassifier, StackingRegressor, GradientBoostingClassifier, GradientBoostingRegressor

In [48]:
estimators = [('ridge', Ridge()), ('lasso', Lasso()), ('svr', SVR())]

reg = make_pipeline(
  StandardScaler(),
  StackingRegressor(
    estimators=estimators,
    final_estimator=GradientBoostingRegressor()
  )
)

In [49]:
score = cross_val_score(
  reg,
  diabetes.data, diabetes.target,
  cv=5
)

print(score.mean())

0.37187218379812004
