In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn import datasets
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
dt_clf = DecisionTreeClassifier()
log_clf = LogisticRegression()
svm_clf = SVC()

In [4]:
voting_clf = VotingClassifier(estimators = [
    ('lr',log_clf),
    ('dt',dt_clf),
    ('svc',svm_clf)
], voting='hard')

In [5]:
bc=datasets.load_breast_cancer()

In [6]:
X,y=bc.data, bc.target

In [7]:
len(X)

569

In [8]:
X_train,X_test,y_train,y_test=train_test_split(X,y, test_size=0.3, random_state=1)

In [9]:
transformer = Normalizer().fit(X_train)

In [10]:
X_train_normalized = transformer.transform(X_train)
X_test_normalized = transformer.transform(X_test)

In [12]:
for clf in (dt_clf, log_clf, svm_clf, voting_clf):
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

DecisionTreeClassifier 0.9532163742690059
LogisticRegression 0.9532163742690059
SVC 0.631578947368421
VotingClassifier 0.9590643274853801




In [13]:
from sklearn.ensemble import BaggingClassifier

In [19]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(),
                           n_estimators=500,
                           max_samples=100,
                           bootstrap=True,
                           n_jobs=-1)#코어전부사용

In [20]:
bag_clf.fit(X_train_normalized, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None,
                                                        criterion='gini',
                                                        max_depth=None,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                                                        presort=False,
                                                        random_state=None,
                                                        splitter='best'),
    

In [21]:
y_pred = bag_clf.predict(X_test_normalized)

In [22]:
accuracy_score(y_test, y_pred)

0.9239766081871345

In [23]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(),
                           n_estimators=500,
                           max_samples=100,
                           bootstrap=False,
                           n_jobs=-1)#코어전부사용      ##random

In [24]:
bag_clf.fit(X_train_normalized, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None,
                                                        criterion='gini',
                                                        max_depth=None,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                                                        presort=False,
                                                        random_state=None,
                                                        splitter='best'),
    

In [25]:
y_pred = bag_clf.predict(X_test_normalized)

In [26]:
accuracy_score(y_test, y_pred)

0.9298245614035088

In [28]:
patches_clf = BaggingClassifier(DecisionTreeClassifier(),
                           n_estimators=500,
                           max_samples=100,
                           bootstrap=True,
                           bootstrap_features = True,
                           max_features=0.8,
                           n_jobs=-1)#코어전부사용

In [29]:
subspaces_clf = BaggingClassifier(DecisionTreeClassifier(),
                           n_estimators=500,
                           max_samples=100,
                           bootstrap=False,
                           bootstrap_features = True,
                           max_features=0.8,
                           n_jobs=-1)#코어전부사용

In [30]:
from sklearn.ensemble import RandomForestClassifier

In [32]:
rnd_clf = RandomForestClassifier(n_estimators=10, n_jobs=-1)

In [33]:
rnd_clf.fit(X_train_normalized, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
                       oob_score=False, random_state=None, verbose=0,
                       warm_start=False)

In [34]:
accuracy_score(y_test, rnd_clf.predict(X_test_normalized))

0.9415204678362573

In [35]:
from sklearn.ensemble import AdaBoostClassifier

In [36]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(),
                            n_estimators=100)

In [37]:
ada_clf.fit(X_train_normalized, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=DecisionTreeClassifier(class_weight=None,
                                                         criterion='gini',
                                                         max_depth=None,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort=False,
                                                         random_state=None,
                          

In [38]:
accuracy_score(y_test, ada_clf.predict(X_test_normalized))

0.9473684210526315

In [39]:
from sklearn.ensemble import GradientBoostingClassifier

In [44]:
gb_clf = GradientBoostingClassifier(DecisionTreeClassifier(),
                            n_estimators=100)

In [45]:
gb_clf.fit(X_train_normalized, y_train)

TypeError: unsupported format string passed to DecisionTreeClassifier.__format__

In [43]:
accuracy_score(y_test, ada_clf.predict(X_test_normalized))

0.9473684210526315