In [1]:
from sklearn.datasets import load_wine
import numpy as np
import pandas as pd 

wine = load_wine()
df = pd.DataFrame(data=np.c_[wine['data'], wine['target']], columns=wine['feature_names'] + ['target'])
print(df)

train = df.sample(frac=0.7, random_state=1)
test = df.drop(train.index)
y_train = train["target"]  # "species" with lowercase 's' is the correct column name
x_train = train.drop("target", axis=1)
y_test = test["target"]
x_test = test.drop("target", axis=1)

means = train.groupby(["target"]).mean()
var = train.groupby(["target"]).var()
prior = (train.groupby("target").count() / len(train)).iloc[:, 1]
classes = np.unique(train["target"].tolist())

def Normal(n, mu, var):
    sd = np.sqrt(var)
    pdf = (np.e ** (-0.5 * ((n - mu) / sd) ** 2)) / (sd * np.sqrt(2 * np.pi))
    return pdf

def Predict(X):
    Predictions = []
    for i in X.index:
        ClassLikelihood = []
        instance = X.loc[i]
        for cls in classes:
            FeatureLikelihoods = []
            FeatureLikelihoods.append(np.log(prior[cls]))
            for col in x_train.columns:
                data = instance[col]
                mean = means[col].loc[cls]
                variance = var[col].loc[cls]
                Likelihood = Normal(data, mean, variance)
                if Likelihood != 0:
                    Likelihood = np.log(Likelihood)
                else:
                    Likelihood = 1 / len(train)
                FeatureLikelihoods.append(Likelihood)
            TotalLikelihood = sum(FeatureLikelihoods)
            ClassLikelihood.append(TotalLikelihood)
        MaxIndex = ClassLikelihood.index(max(ClassLikelihood))
        Prediction = classes[MaxIndex]
        Predictions.append(Prediction)
    return Predictions

def Accuracy(y, prediction):
    y = list(y)
    prediction = list(prediction)
    score = 0
    for i, j in zip(y, prediction):
        if i == j:
            score += 1
    return score / len(y)

PredictTrain = Predict(x_train)
PredictTest = Predict(x_test)
print('Training Accuracy: %.4f' % round(Accuracy(y_train, PredictTrain), 5))
print('Testing Accuracy: %.4f' % round(Accuracy(y_test, PredictTest), 5))


     alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0      14.23        1.71  2.43               15.6      127.0           2.80   
1      13.20        1.78  2.14               11.2      100.0           2.65   
2      13.16        2.36  2.67               18.6      101.0           2.80   
3      14.37        1.95  2.50               16.8      113.0           3.85   
4      13.24        2.59  2.87               21.0      118.0           2.80   
..       ...         ...   ...                ...        ...            ...   
173    13.71        5.65  2.45               20.5       95.0           1.68   
174    13.40        3.91  2.48               23.0      102.0           1.80   
175    13.27        4.28  2.26               20.0      120.0           1.59   
176    13.17        2.59  2.37               20.0      120.0           1.65   
177    14.13        4.10  2.74               24.5       96.0           2.05   

     flavanoids  nonflavanoid_phenols  proanthocyan

In [3]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(x_train, y_train)
SkTrain = clf.predict(x_train) # Predicting on the train set
SkTest = clf.predict(x_test) # Predicting on the test set
print('Training Accuracy: %.4f' % round(Accuracy(y_train, SkTrain), 5))
print('Testing Accuracy: %.4f' % round(Accuracy(y_test, SkTest), 5))

Training Accuracy: 0.9920
Testing Accuracy: 0.9811
