In [1]:
import numpy as np
from sklearn.datasets import load_wine

# Loading the dataset

In [2]:
wine = load_wine()

## Exploring the data

In [3]:
print("Features: {}".format(wine.feature_names))

Features: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']


In [4]:
print("Labels: {}".format(wine.target_names))

Labels: ['class_0' 'class_1' 'class_2']


In [5]:
print(wine.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [6]:
X = wine.data

In [7]:
y = wine.target

In [8]:
X.shape

(178, 13)

## Splitting the dataset into training and test

Here we shall use 75% training and 25% test data

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25)

In [10]:
X_train.shape

(133, 13)

In [11]:
X_test.shape

(45, 13)

## Creating the model

In [12]:
from sklearn.naive_bayes import GaussianNB  # Import Gaussian Naive Bayes Model

In [13]:
gnb = GaussianNB()

In [14]:
gnb.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

### Testing / Predicting by the model

In [15]:
y_pred = gnb.predict(X_test)

### Evaluating Model performance

In [16]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

cm

array([[11,  1,  0],
       [ 0, 23,  1],
       [ 0,  0,  9]])

In [17]:
corrects = np.trace(cm)
total = np.sum(cm)

print("Total number of correct predictions: {} out of: {}".format(corrects, total))


Total number of correct predictions: 43 out of: 45


In [18]:
accuracy = 100*corrects/total

In [19]:
print("Accuracy of the model: {:0.2f}%".format(accuracy))

Accuracy of the model: 95.56%


In [21]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.96      0.96      0.96        24
           2       0.90      1.00      0.95         9

    accuracy                           0.96        45
   macro avg       0.95      0.96      0.95        45
weighted avg       0.96      0.96      0.96        45

