In [1]:
import pandas as pd
import numpy as np

In [2]:
df_tennis = pd.read_excel('PlayTennis.xlsx')
df_tennis.head(15)

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [3]:
#find Prior Probability
# P(classplay=yes)=? and P(Classplay=no)=?
def prior_prob(tennis,label):
    total_examples = tennis.shape[0]
    class_examples = (tennis['Play Tennis'] == label).sum()
    return class_examples/total_examples

In [4]:
yes_prob = prior_prob(df_tennis,'Yes')
yes_prob

0.6428571428571429

In [5]:
no_prob = prior_prob(df_tennis,'No')
no_prob

0.35714285714285715

In [6]:
yes_prob,no_prob

(0.6428571428571429, 0.35714285714285715)

In [7]:
9/14,5/14

(0.6428571428571429, 0.35714285714285715)

In [8]:
PRIOR_PROB = {
    'Yes': yes_prob,
    'No': no_prob
}

In [9]:
PRIOR_PROB['Yes'], PRIOR_PROB['No']

(0.6428571428571429, 0.35714285714285715)

In [10]:
#Find Conditional Probability
def cond_prob(tennis,feature,feature_value,label):
    filtered_data = tennis[tennis['Play Tennis'] == label]
    numerator = np.sum(filtered_data[feature] == feature_value)
    denominator = filtered_data.shape[0]
    return numerator/denominator

In [11]:
outlook_sunny_yes_prob = cond_prob(df_tennis,'Outlook','Sunny','Yes')
outlook_sunny_yes_prob, 2/9

(0.2222222222222222, 0.2222222222222222)

In [12]:
temp_mild_no_prob = cond_prob(df_tennis,'Temperature','Mild','No')
temp_mild_no_prob, 2/5

(0.4, 0.4)

In [13]:
df_tennis.columns

Index(['Outlook', 'Temperature', 'Humidity', 'Wind', 'Play Tennis'], dtype='object')

In [14]:
#Find Likelihood Probabilites
features = list(df_tennis.columns)[:-1]
COND_PROBS = {}
for label in df_tennis['Play Tennis'].unique():
    COND_PROBS[label] = {}
    for feature in features:
        COND_PROBS[label][feature] = {}
        feature_values = df_tennis[feature].unique()
        for feature_value in feature_values:
            prob = np.round(cond_prob(df_tennis,feature,feature_value,label),2)
            COND_PROBS[label][feature][feature_value] = prob

            print(label,feature,feature_value,prob)
            print()

No Outlook Sunny 0.6

No Outlook Overcast 0.0

No Outlook Rain 0.4

No Temperature Hot 0.4

No Temperature Mild 0.4

No Temperature Cool 0.2

No Humidity High 0.8

No Humidity Normal 0.2

No Wind Weak 0.4

No Wind Strong 0.6

Yes Outlook Sunny 0.22

Yes Outlook Overcast 0.44

Yes Outlook Rain 0.33

Yes Temperature Hot 0.22

Yes Temperature Mild 0.44

Yes Temperature Cool 0.33

Yes Humidity High 0.33

Yes Humidity Normal 0.67

Yes Wind Weak 0.67

Yes Wind Strong 0.33



In [15]:
df_tennis['Outlook'].unique()

array(['Sunny', 'Overcast', 'Rain'], dtype=object)

In [16]:
x_test = ["Sunny", "Cool", "High", "Strong"]

for label in df_tennis['Play Tennis'].unique():
    prior_prob = PRIOR_PROB[label]
    likelihood = 1
    for i in range(len(features)):
        feature = features[i]
        feature_value = x_test[i]
        likelihood *=COND_PROBS[label][feature][feature_value]

        post = likelihood*prior_prob
        
    print(label,post)

No 0.02057142857142857
Yes 0.005082518571428572


In [17]:
COND_PROBS[label]['Outlook']['Sunny']

0.22

In [18]:
df_tennis.head(5)

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [19]:
X = df_tennis.iloc[:,0:4]
y = df_tennis.iloc[:,-1]

In [20]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.2,random_state=42)

In [21]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
X_train['Outlook']=encoder.fit_transform(X_train['Outlook'])
X_train['Temperature']=encoder.fit_transform(X_train['Temperature'])
X_train['Humidity']=encoder.fit_transform(X_train['Humidity'])
X_train['Wind']=encoder.fit_transform(X_train['Wind'])

X_test['Outlook']=encoder.fit_transform(X_test['Outlook'])
X_test['Temperature']=encoder.fit_transform(X_test['Temperature'])
X_test['Humidity']=encoder.fit_transform(X_test['Humidity'])
X_test['Wind']=encoder.fit_transform(X_test['Wind'])

In [22]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()

In [23]:
model.fit(X_train,y_train)

In [27]:
#outlook = sunny, Temp = cool, Humidity = high, wind = Strong
prediction = model.predict([[2,0,0,0]])
prediction



array(['No'], dtype='<U3')