## Ensemble Methods With Sklearn

Ensemble SKlearn

### Content

- [Packages](#pack)
- [Voting Hard And Soft](#sklearn1)

## <a id = 'pack'>Packages </a>

In [13]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

## <a id = 'sklearn1'>  Hard Voting and Soft Voting </a>

In [4]:
data, target = load_breast_cancer().data, load_breast_cancer().target

In [5]:
data.shape, target.shape

((569, 30), (569,))

In [7]:
pd.DataFrame(data).isna().sum()

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
dtype: int64

In [12]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size = 0.2)

In [24]:
log_clf = LogisticRegression(C = 1, solver = 'liblinear')
svc_clf = SVC(C=1, kernel= 'linear', gamma = 'auto', probability = True)
naive_clf = GaussianNB()

In [25]:
for clf in (log_clf,svc_clf,naive_clf):
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))

LogisticRegression 0.9385964912280702
SVC 0.9385964912280702
GaussianNB 0.9298245614035088


In [26]:
clf_vote = VotingClassifier(estimators= [('lr',log_clf),
                                         ('svc',svc_clf),
                                          ('naive',naive_clf)],
                voting='hard',
                weights=None,
                n_jobs=5,
                flatten_transform=True,
                verbose=True,)
clf_vote.fit(X_train,y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1, solver='liblinear')),
                             ('svc',
                              SVC(C=1, gamma='auto', kernel='linear',
                                  probability=True)),
                             ('naive', GaussianNB())],
                 n_jobs=5, verbose=True)

In [27]:
clf_vote.estimators

[('lr', LogisticRegression(C=1, solver='liblinear')),
 ('svc', SVC(C=1, gamma='auto', kernel='linear', probability=True)),
 ('naive', GaussianNB())]

In [28]:
accuracy_score(y_test,clf_vote.predict(X_test))

0.9385964912280702

In [31]:
clf_vote = VotingClassifier(estimators= [('lr',log_clf),
                                         ('svc',svc_clf),
                                          ('naive',naive_clf)],
                voting='soft',
                weights=[0.4,0.3,0.3],
                n_jobs=5,
                flatten_transform=True,
                verbose=True,)
clf_vote.fit(X_train,y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1, solver='liblinear')),
                             ('svc',
                              SVC(C=1, gamma='auto', kernel='linear',
                                  probability=True)),
                             ('naive', GaussianNB())],
                 n_jobs=5, verbose=True, voting='soft',
                 weights=[0.4, 0.3, 0.3])

In [32]:
accuracy_score(y_test,clf_vote.predict(X_test))

0.9298245614035088