### Naive_Bayes Application

## Load the required libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB

## Read and Understand the data

In [3]:
data = pd.read_csv('Tennis_Data.csv')

In [4]:
data.shape

(14, 5)

In [5]:
data.head()

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Class
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [6]:
data.describe()

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Class
count,14,14,14,14,14
unique,3,3,2,2,2
top,Rain,Mild,Normal,Weak,Yes
freq,5,6,7,8,9


## To check for missing or null values

In [7]:
data.isnull().sum()

Outlook        0
Temperature    0
Humidity       0
Windy          0
Class          0
dtype: int64

In [8]:
data.dtypes

Outlook        object
Temperature    object
Humidity       object
Windy          object
Class          object
dtype: object

In [9]:
data.columns

Index(['Outlook', 'Temperature', 'Humidity', 'Windy', 'Class'], dtype='object')

## Type Conversions

In [10]:
data = data.astype("category")

In [11]:
data.dtypes

Outlook        category
Temperature    category
Humidity       category
Windy          category
Class          category
dtype: object

In [12]:
data = pd.get_dummies(data)

In [13]:
data.dtypes

Outlook_Overcast    uint8
Outlook_Rain        uint8
Outlook_Sunny       uint8
Temperature_Cool    uint8
Temperature_Hot     uint8
Temperature_Mild    uint8
Humidity_High       uint8
Humidity_Normal     uint8
Windy_Strong        uint8
Windy_Weak          uint8
Class_No            uint8
Class_Yes           uint8
dtype: object

## To Split data and Model Building

In [14]:
array= data.values
X= array[:,0:4]
Y= array[:,4]
test_size= 0.3
seed= 10
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

In [15]:
X_train

array([[1, 0, 0, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 1],
       [1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 1, 0, 1],
       [0, 1, 0, 0]], dtype=uint8)

In [16]:
Y_train

array([1, 0, 0, 0, 0, 1, 1, 0, 0], dtype=uint8)

In [17]:
model = GaussianNB()
model.fit(X_train, Y_train)
print(model)

GaussianNB()


In [18]:
expected = Y_test
predicted = model.predict(X_test)

In [19]:
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))

              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.50      1.00      0.67         1

    accuracy                           0.80         5
   macro avg       0.75      0.88      0.76         5
weighted avg       0.90      0.80      0.82         5

[[3 1]
 [0 1]]


### Micro-averaged: 
all samples equally contribute to the final averaged metric
### Macro-averaged: 
all classes equally contribute to the final averaged metric
### Weighted-averaged: 
each classes’s contribution to the average is weighted by its size