### Import Libraries

In [19]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_digits
from sklearn2pmml import sklearn2pmml, make_pmml_pipeline
from IPython.display import HTML

### Turn off Notebook Warnings

The javascript/html snippet below will hide those pesky red warning boxes that appear in output.

In [20]:
from IPython.display import HTML
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')

### Init `LogisticRegression` , `RandomForestClassifier`, `GaussianNB` Objects

In [21]:
clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
                          random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()

### Load data

In [22]:
df=pd.read_csv('credit_card_data-headers.txt', sep='\t')
df.head()

Unnamed: 0,A1,A2,A3,A8,A9,A10,A11,A12,A14,A15,R1
0,1,30.83,0.0,1.25,1,0,1,1,202,0,1
1,0,58.67,4.46,3.04,1,0,6,1,43,560,1
2,0,24.5,0.5,1.5,1,1,0,1,280,824,1
3,1,27.83,1.54,3.75,1,0,5,0,100,3,1
4,1,20.17,5.625,1.71,1,1,0,1,120,0,1


### Pick target and features

In [43]:
target = 'R1'
X = df.drop(target, axis=1)
y = df[target]

### Init `VotingClassifier` Object

For this example we'll use `voting='hard'`.

If 'hard', uses predicted class labels for majority rule voting.

In [37]:
eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')

### Fit `VotingClassifier` w/ `voting='hard'`

In [38]:
eclf1 = eclf1.fit(X, y)
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')



### Print the score predicted by `VotingClassifier`

In [39]:
print(eclf1.score(X, y))

0.9281345565749235


We can use dot notation or JSON notation to access the classification steps inside our `VotingClassifier`

In [28]:
np.array_equal(eclf1.named_estimators_.lr.predict(X),
               eclf1.named_estimators_['lr'].predict(X))

True

### Init `VotingClassifier` Object

For this example we'll use `voting='soft'`.

if 'soft', predicts the class label based on the argmax of
    the sums of the predicted probabilities, which is recommended for
    an ensemble of well-calibrated classifiers.

In [30]:
eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft')

### Fit `VotingClassifier` w/ `voting='soft'`

In [31]:
eclf2 = eclf2.fit(X, y)
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')



### Print the score predicted by `VotingClassifier`

In [32]:
print(eclf2.score(X, y))

0.8853211009174312


### Init `VotingClassifier` Object

For this example we'll use `voting='soft'` and will manually assign weights to each classifier.

if 'soft', predicts the class label based on the argmax of
    the sums of the predicted probabilities, which is recommended for
    an ensemble of well-calibrated classifiers.

In [33]:
eclf3 = VotingClassifier(estimators=[
       ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
       voting='soft', weights=[2,1,1],
       flatten_transform=True)

### Fit `VotingClassifier` w/ `voting='soft'` and manual weights assigned

In [34]:
eclf3 = eclf3.fit(X, y)
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')



### Print the score predicted by `VotingClassifier`

In [35]:
print(eclf3.score(X, y))

0.8975535168195719


### Make PMML pipeline from `VotingClassifier`

PMML allows us to port our model to SAS.

In [47]:
pmml_pipeline = make_pmml_pipeline(eclf1, active_fields=X.columns, target_fields=[target])

### Export PMML pipeline to .pmml file

In [48]:
sklearn2pmml(pmml_pipeline, 'VotingClassifier.pmml')

### Fin!