Comparing several estimators with the `wine` dataset.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Imports

In [3]:
import numpy as np
from matplotlib import pyplot as plt
import scipy as sp
import pandas as pd

from sklearn import datasets 
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

## Load data

In [4]:
d = datasets.load_wine(as_frame=True)

In [5]:
X = d.data
y = d.target

## Split data

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [7]:
X_train.shape

(133, 13)

In [8]:
X_test.shape

(45, 13)

## Several estimators

In [9]:
estimators = {
    'decision_tree': DecisionTreeClassifier(random_state=24),
    'random_forest': RandomForestClassifier(random_state=24),
    'adaboost': AdaBoostClassifier(random_state=24),
    'svm': SVC(random_state=24),
}

In [10]:
n_total = len(y_test)

for k, estimator in estimators.items():

    estimator.fit(X_train, y_train)
    
    y_pred = estimator.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    
    n_matches = np.sum(y_test == y_pred)
    
    print(k)
    print('Number of matches: {} out of {} (accuracy = {:.3f})'.format(n_matches, n_total, acc))
    print()

decision_tree
Number of matches: 43 out of 45 (accuracy = 0.956)

random_forest
Number of matches: 45 out of 45 (accuracy = 1.000)

adaboost
Number of matches: 41 out of 45 (accuracy = 0.911)

svm
Number of matches: 32 out of 45 (accuracy = 0.711)

