Getting started with ML and the `wine` dataset.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Imports

In [3]:
import numpy as np
from matplotlib import pyplot as plt
import scipy as sp
import pandas as pd

from sklearn import datasets 
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

## Load data

In [4]:
d = datasets.load_wine(as_frame=True)

In [5]:
X = d.data
y = d.target

## Split data

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [7]:
X_train.shape

(133, 13)

In [8]:
X_test.shape

(45, 13)

## Train an estimator

In [9]:
estimator = DecisionTreeClassifier(random_state=24)

_ = estimator.fit(X_train, y_train)

In [10]:
y_pred = estimator.predict(X_test)

print('Predicted: \n', y_pred)
print('Known: \n', np.array(y_test))

Predicted: 
 [0 0 2 0 1 0 1 2 1 2 1 0 0 1 0 1 1 1 0 1 0 1 1 2 2 2 1 1 1 0 0 1 2 0 0 0 2
 2 1 2 0 1 1 1 2]
Known: 
 [0 0 2 0 1 0 1 2 1 2 0 2 0 1 0 1 1 1 0 1 0 1 1 2 2 2 1 1 1 0 0 1 2 0 0 0 2
 2 1 2 0 1 1 1 2]


## Evaluate the trained model

In [11]:
y_test == estimator.predict(X_test)

19      True
45      True
140     True
30      True
67      True
16      True
119     True
174     True
109     True
141     True
24     False
150    False
41      True
118     True
15      True
111     True
113     True
82      True
9       True
114     True
18      True
66      True
60      True
169     True
171     True
164     True
117     True
65      True
90      True
55      True
29      True
128     True
145     True
31      True
12      True
42      True
158     True
137     True
98      True
159     True
38      True
108     True
85      True
68      True
143     True
Name: target, dtype: bool

In [12]:
print('Number of mateches: {} out of {}'.format(np.sum(y_test == y_pred), len(y_test)))

Number of mateches: 43 out of 45


In [13]:
def accuracy(y_test, y_pred):
    
    n = len(y_test)
    
    matched = (y_test == y_pred)
    n_matches = np.sum(matched)
    
    return n_matches / n

In [14]:
accuracy(y_test, y_pred)

0.9555555555555556