## Coding Exercise #0313

### 1. Voting Ensemble:

In [1]:
import numpy as np
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn import metrics
from sklearn.datasets import load_breast_cancer
warnings.filterwarnings(action='ignore')                  # Turn off the warnings.

#### 1.1. Read in data:

In [2]:
# Load data.
data = load_breast_cancer()

In [3]:
# Explanatory variables.
X = data['data']
print(data['feature_names'])

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [4]:
X.shape

(569, 30)

In [5]:
# Response variable.
# Relabel such that 0 = 'benign' and 1 = malignant.
Y = 1 - data['target']
label = list(data['target_names'])
label.reverse()
print(label)

['benign', 'malignant']


In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)

#### 1.2. Predicting with individual estimator:

In [7]:
# Classification Tree.
DTC = DecisionTreeClassifier(max_depth=10)
DTC.fit(X_train, Y_train)
Y_pred = DTC.predict(X_test)
print( "Tree accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

Tree accuracy : 0.953


In [8]:
# Classification with KNN
KNN = KNeighborsClassifier(n_neighbors=5)
KNN.fit(X_train, Y_train)
Y_pred = KNN.predict(X_test)
print( "KNN accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

KNN accuracy : 0.936


In [9]:
# Classification with Logistic Regression.
LR = LogisticRegression()
LR.fit(X_train, Y_train)
Y_pred = LR.predict(X_test)
print( "Logistic regression accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

Logistic regression accuracy : 0.918


#### 1.3. Predicting with a voting ensemble:

In [10]:
VC = VotingClassifier(estimators=[('Tree',DTC),('knn',KNN),('Logistic',LR)],voting='hard')             # voting = 'hard'.
VC.fit(X_train, Y_train)
Y_pred = VC.predict(X_test)
print( "Voting Classifier Accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

Voting Classifier Accuracy : 0.953


In [11]:
VC = VotingClassifier(estimators=[('Tree',DTC),('knn',KNN),('Logistic',LR)],voting='soft')             # voting = 'soft'.
VC.fit(X_train, Y_train)
Y_pred = VC.predict(X_test)
print( "Voting Classifier Accuracy : " + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))

Voting Classifier Accuracy : 0.953
