In [234]:
import numpy as np
import pandas as pd 
import seaborn as sns

In [235]:
golf=pd.read_csv('golf.csv')
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


In [236]:
(golf['Play']=='yes').sum()

9

In [237]:
(golf['Play']=='no').sum()

5

In [238]:
def prior_prob(golf,label):
    total_examples=golf.shape[0]
    class_examples=(golf['Play']==label).sum()
    return class_examples/total_examples

In [239]:
prior_prob(golf,'yes'),prior_prob(golf,'no')

(0.6428571428571429, 0.35714285714285715)

In [240]:
9/14,5/14

(0.6428571428571429, 0.35714285714285715)

In [241]:
PRIOR={
    'yes': prior_prob(golf,'yes'),
     'no':prior_prob(golf,'no') 
}
PRIOR

{'yes': 0.6428571428571429, 'no': 0.35714285714285715}

# Bayes Theorem

# conditional probability

In [242]:
golf['Play'] =='yes'

0     False
1     False
2      True
3      True
4      True
5     False
6      True
7     False
8      True
9      True
10     True
11     True
12     True
13    False
Name: Play, dtype: bool

In [243]:
golf[golf['Play'] =='yes']

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
6,overcast,cool,normal,True,yes
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes
10,sunny,mild,normal,True,yes
11,overcast,mild,high,True,yes
12,overcast,hot,normal,False,yes


# p(outlook) = 'sunny'|yes=1.Here outlook = feature, sunny=feature_value, yes=label


In [244]:
def cond_prob(golf, feature, feature_value, label):
    filtered_data = golf[golf['Play'] ==label]
    numerator=np.sum(filtered_data[feature]==feature_value)
    denominator=filtered_data.shape[0]
    
    return numerator/denominator
    

In [245]:
2/5

0.4

In [246]:
cond_prob(golf,'Humidity','normal','no')

0.2

In [247]:
cond_prob(golf,'Windy',False,'no')

0.4

# Likelihood

In [248]:
list(golf['Play'].unique())

['no', 'yes']

In [249]:
list(golf.columns)[:-1]

['Outlook', 'Temperature', 'Humidity', 'Windy']

In [250]:
features=list(golf.columns)[:-1] 

COND_PROB = {}


for label in golf['Play'].unique(): 
    COND_PROB[label]={}
    for feature in features:
        COND_PROB[label][feature] ={}
                  
        feature_values=golf[feature].unique()
                  
        for fea_value in feature_values:
                  #no, outlook,sunny
            prob=round(cond_prob(golf,feature,fea_value,label),2)
            COND_PROB[label][feature][fea_value]=prob
            print(feature,fea_value,label,prob)
    print()

Outlook sunny no 0.6
Outlook overcast no 0.0
Outlook rainy no 0.4
Temperature hot no 0.4
Temperature mild no 0.4
Temperature cool no 0.2
Humidity high no 0.8
Humidity normal no 0.2
Windy False no 0.4
Windy True no 0.6

Outlook sunny yes 0.22
Outlook overcast yes 0.44
Outlook rainy yes 0.33
Temperature hot yes 0.22
Temperature mild yes 0.44
Temperature cool yes 0.33
Humidity high yes 0.33
Humidity normal yes 0.67
Windy False yes 0.67
Windy True yes 0.33



In [251]:
COND_PROB

{'no': {'Outlook': {'sunny': 0.6, 'overcast': 0.0, 'rainy': 0.4},
  'Temperature': {'hot': 0.4, 'mild': 0.4, 'cool': 0.2},
  'Humidity': {'high': 0.8, 'normal': 0.2},
  'Windy': {False: 0.4, True: 0.6}},
 'yes': {'Outlook': {'sunny': 0.22, 'overcast': 0.44, 'rainy': 0.33},
  'Temperature': {'hot': 0.22, 'mild': 0.44, 'cool': 0.33},
  'Humidity': {'high': 0.33, 'normal': 0.67},
  'Windy': {False: 0.67, True: 0.33}}}

# Prediction

In [252]:
x_test=['sunny','hot','normal',False]

In [253]:
features

['Outlook', 'Temperature', 'Humidity', 'Windy']

In [254]:
for label in golf['Play'].unique():
    prior=PRIOR[label]
    liklihood=1.0
    
    for i in range(len(features)):
        feature=features[i]
        fea_value=x_test[i]
        liklihood*=COND_PROB[label][feature][fea_value]
    post=liklihood*prior
    print(label,post)
        

no 0.006857142857142858
yes 0.013967202857142858


# Impleamentation Naive Bayes Sklearn

In [255]:
golf=pd.read_csv('golf.csv')
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


In [256]:
from sklearn.preprocessing import LabelEncoder

In [257]:
lel=LabelEncoder()
golf['Outlook']=lel.fit_transform(golf['Outlook'])
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,hot,high,False,no
1,2,hot,high,True,no
2,0,hot,high,False,yes
3,1,mild,high,False,yes
4,1,cool,normal,False,yes
5,1,cool,normal,True,no
6,0,cool,normal,True,yes
7,2,mild,high,False,no
8,2,cool,normal,False,yes
9,1,mild,normal,False,yes


In [258]:
lel2=LabelEncoder()
golf['Temperature']=lel2.fit_transform(golf['Temperature'])
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,1,high,False,no
1,2,1,high,True,no
2,0,1,high,False,yes
3,1,2,high,False,yes
4,1,0,normal,False,yes
5,1,0,normal,True,no
6,0,0,normal,True,yes
7,2,2,high,False,no
8,2,0,normal,False,yes
9,1,2,normal,False,yes


In [259]:
lel3=LabelEncoder()
golf['Humidity']=lel3.fit_transform(golf['Humidity'])
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,1,0,False,no
1,2,1,0,True,no
2,0,1,0,False,yes
3,1,2,0,False,yes
4,1,0,1,False,yes
5,1,0,1,True,no
6,0,0,1,True,yes
7,2,2,0,False,no
8,2,0,1,False,yes
9,1,2,1,False,yes


In [260]:
lel4=LabelEncoder()
golf['Windy']=lel4.fit_transform(golf['Windy'])
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,1,0,0,no
1,2,1,0,1,no
2,0,1,0,0,yes
3,1,2,0,0,yes
4,1,0,1,0,yes
5,1,0,1,1,no
6,0,0,1,1,yes
7,2,2,0,0,no
8,2,0,1,0,yes
9,1,2,1,0,yes


In [261]:
lel5=LabelEncoder()
golf['Play']=lel5.fit_transform(golf['Play'])
golf

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,1,0,0,0
1,2,1,0,1,0
2,0,1,0,0,1
3,1,2,0,0,1
4,1,0,1,0,1
5,1,0,1,1,0
6,0,0,1,1,1
7,2,2,0,0,0
8,2,0,1,0,1
9,1,2,1,0,1


In [262]:
x=golf[['Outlook','Temperature','Humidity','Windy']]
y=golf[['Play']]

In [263]:
from sklearn.naive_bayes import CategoricalNB

In [264]:
model=CategoricalNB()

In [265]:
model.fit(x,y)

  y = column_or_1d(y, warn=True)


In [266]:
x_test=['sunny','hot','normal',False]

In [267]:
lel.transform(['sunny'])

array([2])

In [268]:
lel2.transform(['hot'])

array([1])

In [270]:
lel3.transform(['normal'])

array([1])

In [271]:
lel4.transform([False])

array([0], dtype=int64)

In [272]:
x_test=np.array([[2,1,1,0]])

In [273]:
model.predict(x_test)



array([1])

In [274]:
model.predict_proba(x_test)




array([[0.33508723, 0.66491277]])