In [None]:
# Implement Naive Bayes algorithm with a simple example
# references
# Naive Bayes: https://blog.paperspace.com/introduction-to-naive-bayes/
# One hot encoding: https://contactsunny.medium.com/label-encoder-vs-one-hot-encoder-in-machine-learning-3fc273365621
# 2022 - 2024
# note: dataset too small, and has contradictions !

In [None]:
import sys
#clear all variables (from last session)
sys.modules[__name__].__dict__.clear()
import os, sys, time, random, itertools, shutil
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# Here we will work with a toy example
# Make some synthetic data: 2D weather data (weather and temperature) and labels ( conditions for a play date: yes, no)
# Assigning features and label variables

weather = ['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy','Overcast','Sunny','Sunny','Rainy','Sunny','Overcast','Overcast','Rainy']
temp = ['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Hot','Mild']
playdate = ['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']

In [None]:
print(len(weather))
print(len(temp))
print(len(playdate))

14
14
14


In [32]:
for i in range (0, 14):
    print(weather[i], temp[i], playdate[i])

Sunny Hot No
Sunny Hot No
Overcast Hot Yes
Rainy Mild Yes
Rainy Cool Yes
Rainy Cool No
Overcast Cool Yes
Sunny Mild No
Sunny Cool Yes
Rainy Mild Yes
Sunny Mild Yes
Overcast Mild Yes
Overcast Hot Yes
Rainy Mild No


In [None]:
#alternative using pandas ---------------------- see Albon p 81ff
import pandas
df_weather = pandas.DataFrame({"weather_data":weather})
df_temp = pandas.DataFrame({"temp_data":temp})
df_playdate = pandas.DataFrame({"playdate_data":playdate})

In [None]:
print(df_weather.head(3))


  weather_data
0        Sunny
1        Sunny
2     Overcast


In [None]:
print(df_temp.head(5))

In [None]:
weather_scale_mapper = {"Sunny":2, "Rainy":1, "Overcast":0}
temp_scale_mapper = {"Hot":2, "Cool":1, "Mild":0}
playdate_scale_mapper = {"Yes":1, "No":0}

In [None]:
df_weather["weather_data"].replace(weather_scale_mapper)
df_temp["temp_data"].replace(temp_scale_mapper)
df_playdate["playdate_data"].replace(playdate_scale_mapper)

print(df_weather["weather_data"])
print(df_weather["weather_data"].replace(weather_scale_mapper))
print(df_temp["temp_data"].replace(temp_scale_mapper))
#----------------------------------------------------

0        Sunny
1        Sunny
2     Overcast
3        Rainy
4        Rainy
5        Rainy
6     Overcast
7        Sunny
8        Sunny
9        Rainy
10       Sunny
11    Overcast
12    Overcast
13       Rainy
Name: weather_data, dtype: object
0     2
1     2
2     0
3     1
4     1
5     1
6     0
7     2
8     2
9     1
10    2
11    0
12    0
13    1
Name: weather_data, dtype: int64
0     2
1     2
2     2
3     0
4     1
5     1
6     1
7     0
8     1
9     0
10    0
11    0
12    2
13    0
Name: temp_data, dtype: int64


In [None]:
# Import sklearn and the LabelEncoder
from sklearn import preprocessing
import numpy

# Create labelEncoder
le = preprocessing.LabelEncoder()

# Convert string labels into numbers.
weather_encoded=le.fit_transform(weather)

print('Weather encoded:', weather_encoded)

Weather encoded: [2 2 0 1 1 1 0 2 2 1 2 0 0 1]


In [None]:
# Convert string labels into numbers
temp_encoded = le.fit_transform(temp)
playdate_encoded = le.fit_transform(playdate)

print("Temperature encoded:",temp_encoded)
print("Play date encoded:", playdate_encoded)

Temperature encoded: [1 1 1 2 0 0 0 2 0 2 2 2 1 2]
Play date encoded: [0 0 1 1 1 0 1 0 1 1 1 1 1 0]


In [None]:
# stack the input data pairwise
# https://www.pythonprogramming.in/joining-and-stacking-of-numpy-arrays.html
com = numpy.dstack((weather_encoded, temp_encoded))
combo = com[0]
print(combo)

[[2 1]
 [2 1]
 [0 1]
 [1 2]
 [1 0]
 [1 0]
 [0 0]
 [2 2]
 [2 0]
 [1 2]
 [2 2]
 [0 2]
 [0 1]
 [1 2]]


In [None]:
# Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB

# Create a Gaussian Classifier
model = GaussianNB()

In [None]:
# Train the model using the training sets
model.fit(combo, playdate_encoded)
#print()

In [None]:
# Predict output based on the training data
# weather overcast (0) and temp mild (2) -> yes (1)
# weather sunny (2) and temp hot (1) -> no (0)
#-------------------------------------------------------------------------------
input_weather =  0
input_temp =   2

predicted = model.predict([[input_weather, input_temp]])

if(predicted[0] == 1):
  result = 'yes'
elif(predicted[0] == 0):
  result = 'no'

print("Play date given these conditions, yes or no? Prediction says:", result)

Play date given these conditions, yes or no? Prediction says: yes


advantages of naive bayes classification:
- fast
- easy to interpret
- few things to 'adjust'
- works on multi-class prediction
- works with numerical and categorical data
- numerical data is assumed to be normally distributed

disadvantages of naive bayes classification:
- test data not represented in the training data will generate a zero probability of occurance
-  features are assumed to be independent (usually not completely true)

typical applications:
- text classification, spam filtering, sentiment analysis, simple recommendation systems

In [29]:
#write a snippet of code that takes 3 weather/temp inputs and predicts the suitability for a playdate
# Predict output based on the training data
# weather overcast (0) and temp mild (2) -> yes (1)
# weather sunny (2) and temp hot (1) -> no (0)

#weather_scale_mapper = {"Sunny":2, "Rainy":1, "Overcast":0}
#temp_scale_mapper = {"Hot":2, "Cool":1, "Mild":0}
#-------------------------------------------------------------------------------

i1 = [2,0]
i2 = [2,1]
i3 = [2,2]

inputs = [i1,i2,i3]

for i in inputs:
    print(i)
    predicted = model.predict([i])
    print(predicted)

    if(predicted[0] == 1):
        result = 'yes'
    elif(predicted[0] == 0):
        result = 'no'
    print("Play date given these conditions, yes or no? Prediction says:", result)

[2, 0]
[0]
Play date given these conditions, yes or no? Prediction says: no
[2, 1]
[0]
Play date given these conditions, yes or no? Prediction says: no
[2, 2]
[0]
Play date given these conditions, yes or no? Prediction says: no
