Load Pandas and [Sklearn](https://scikit-learn.org/stable/) packages.

## Gaussian Naive Bayes


In [1]:
import pandas as pd
import numpy as np

from sklearn.naive_bayes import GaussianNB
from sklearn import preprocessing

### Tennis example (multiple predictors)

In [2]:
multi_mixed = """
  Weather
  Temp
  Play
  Sunny
  Hot
  No
  Sunny
  Hot
  No
  Overcast
  Hot
  Yes
  Rainy
  Mild
  Yes
  Rainy
  Cool
  Yes
  Rainy
  Cool
  No
  Overcast
  Cool
  Yes
  Sunny
  Mild
  No
  Sunny
  Cool
  Yes
  Rainy
  Mild
  Yes
  Sunny
  Mild
  Yes
  Rainy
  Mild
  No
  Overcast
  Hot
  Yes

""".split()
print(multi_mixed)


['Weather', 'Temp', 'Play', 'Sunny', 'Hot', 'No', 'Sunny', 'Hot', 'No', 'Overcast', 'Hot', 'Yes', 'Rainy', 'Mild', 'Yes', 'Rainy', 'Cool', 'Yes', 'Rainy', 'Cool', 'No', 'Overcast', 'Cool', 'Yes', 'Sunny', 'Mild', 'No', 'Sunny', 'Cool', 'Yes', 'Rainy', 'Mild', 'Yes', 'Sunny', 'Mild', 'Yes', 'Rainy', 'Mild', 'No', 'Overcast', 'Hot', 'Yes']


In [3]:
weather = multi_mixed[0::3]
temp =  multi_mixed[1::3]
play =  multi_mixed[2::3]

print(weather)
print(temp)
print(play)


['Weather', 'Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Rainy', 'Overcast']
['Temp', 'Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot']
['Play', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes']


In [4]:
import pprint
pp = pprint.PrettyPrinter(depth=2, compact=True, width=150)

table = { weather.pop(0): weather,
          temp.pop(0): temp,
          play.pop(0): play
}
pp.pprint(table)


{'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes'],
 'Temp': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot'],
 'Weather': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Rainy', 'Overcast']}


In [5]:
# Create labelEncoder
le = preprocessing.LabelEncoder()

# Convert string labels into numbers. Note: this isn't the best way to encode
# weather but we will talk about other encoding methods later on
weather_encoded = le.fit_transform(weather)
temp_encoded = le.fit_transform(temp)
label_encoded = le.fit_transform(play)

print('Label:', label_encoded)
print('Temp:', temp_encoded)
print('Weather:', weather_encoded)


Label: [0 0 1 1 1 0 1 0 1 1 1 0 1]
Temp: [1 1 1 2 0 0 0 2 0 2 2 2 1]
Weather: [2 2 0 1 1 1 0 2 2 1 2 1 0]


Show encoding.

In [6]:
print(sorted(set(zip(weather_encoded, weather))))
print(sorted(set(zip(temp_encoded, temp))))
print(sorted(set(zip(label_encoded, play))))


[(0, 'Overcast'), (1, 'Rainy'), (2, 'Sunny')]
[(0, 'Cool'), (1, 'Hot'), (2, 'Mild')]
[(0, 'No'), (1, 'Yes')]


In [7]:
#Combine weather and temp into data frame
my_dict = {'Weather': weather_encoded, 'Temp': temp_encoded}
features = pd.DataFrame(my_dict)
features

Unnamed: 0,Weather,Temp
0,2,1
1,2,1
2,0,1
3,1,2
4,1,0
5,1,0
6,0,0
7,2,2
8,2,0
9,1,2


In [8]:
features.iloc[0].to_dict()

{'Weather': 2, 'Temp': 1}

In [9]:
#Create a Gaussian Naive Bayes Classifier
model = GaussianNB()

# Train the model using the training sets
model.fit(features,label_encoded) ;


In [10]:
# Predict Output
# predicted = model.predict( pd.Series({'Weather': 1, 'Temp': 2}) )   # 1: Rainy 2: Mild
predicted = model.predict( [[1,2]] )   # 1: Rainy 2: Mild

print( "Predicted Value:", predicted ) # 0: No    1: Yes

Predicted Value: [1]




In [11]:
model.predict_proba( [[1,2]] )



array([[0.37754552, 0.62245448]])

In [12]:
result = dict(zip(label_encoded, play))
result

{0: 'No', 1: 'Yes'}

In [13]:
result[predicted[0]]


'Yes'