# Bayes rule

[source](https://www.youtube.com/watch?v=kz2tvO_ZAKI&list=PLUl4u3cNGP60hI9ATjSFgLZpbNJ7myAg6&index=29)

1. Partition sample space into $A_1$,$A_2$,$A_3$....
2. Have $P(A_i)$ for every i .    **Note** these are initial beliefs
3. Let sample space is such that $event(B) = \sum_{i}event(B \cap A_i)$
    * $P(B) = \sum_{i}P(B \cap A_i)$
4. Have $P(B|A_i)$. for every i
5. Revised belief given that B occured :
    * $P(A_i|B) = \dfrac{P(A_i \cap B)}{P(B)} = \dfrac{P(A_i)P(B|A_i)}{\sum_{j}P(A_j)P(B|A_j)}$ 

# Naive Bayes

## Example 1
[source](https://machinelearningmastery.com/naive-bayes-tutorial-for-machine-learning/)

In [1]:
import pandas as pd
from io import StringIO

### Data

In [2]:
input_data_str = u"""Weather	Car	Class
sunny	working	go-out
rainy	broken	go-out
sunny	working	go-out
sunny	working	go-out
sunny	working	go-out
rainy	broken	stay-home
rainy	broken	stay-home
sunny	working	stay-home
sunny	broken	stay-home
rainy	broken	stay-home
"""

input_buffer = StringIO(input_data_str)
data_df = pd.read_csv(input_buffer,sep='\t')
data_df

Unnamed: 0,Weather,Car,Class
0,sunny,working,go-out
1,rainy,broken,go-out
2,sunny,working,go-out
3,sunny,working,go-out
4,sunny,working,go-out
5,rainy,broken,stay-home
6,rainy,broken,stay-home
7,sunny,working,stay-home
8,sunny,broken,stay-home
9,rainy,broken,stay-home


### Training

let event $Y_o$, is event of 'go-out' and $Y_s$ for 'stay-home' , step [1](#Bayes-rule))

Calculate initial belief $P(Y_o)$ and $P(Y_s)$, step [2](#Bayes-rule) 

In [3]:
prob_Yo = len(data_df[data_df['Class']=='go-out'])/float(len(data_df.Class)) # 5/10 
prob_Ys = len(data_df[data_df['Class']=='stay-home'])/float(len(data_df.Class)) # 5/10 

prob_Yo, prob_Ys

(0.5, 0.5)

Let:

* $W_s$ event whether is sunny 
* $W_r$ event whether is rainy
* $C_w$ event Car is working
* $C_b$ event Car is broken
    

In [4]:
# Prob(Weather = sunny | Class = 'go-out')
# = Prob(Weather = sunny and Class = 'go-out')/Prob(Class = 'go-out')
# = count(Weather = sunny and Class = 'go-out')/count(Class = 'go-out')

cond_Ws = data_df['Weather']=='sunny'
cond_Yo = data_df['Class']=='go-out'
prob_Ws_given_Yo = len(data_df[cond_Ws & cond_Yo])/float(len(data_df[cond_Yo]))
print("Prob(Weather = sunny | Class = 'go-out') = {}".format(prob_Ws_given_Yo))

# Prob(Weather = rainy | Class = 'go-out')
# = Prob(Weather = rainy and Class = 'go-out')/Prob(Class = 'go-out')
# = count(Weather = rainy and Class = 'go-out')/count(Class = 'go-out')
cond_Wr = data_df['Weather']=='rainy'
prob_Wr_given_Yo = len(data_df[cond_Wr & cond_Yo])/float(len(data_df[cond_Yo]))
print("Prob(Weather = rainy | Class = 'go-out') = {}".format(prob_Wr_given_Yo))

# Prob(Weather = sunny | Class = 'stay-home')
# = Prob(Weather = sunny and Class = 'stay-home')/Prob(Class = 'stay-home')
# = count(Weather = sunny and Class = 'stay-home')/count(Class = 'stay-home')

cond_Ws = data_df['Weather']=='sunny'
cond_Ys = data_df['Class']=='stay-home'
prob_Ws_given_Ys = len(data_df[cond_Ws & cond_Ys])/float(len(data_df[cond_Ys]))
print("Prob(Weather = sunny | Class = 'stay-home') = {}".format(prob_Ws_given_Ys))

# Prob(Weather = rainy | Class = 'stay-home')
# = Prob(Weather = rainy and Class = 'stay-home')/Prob(Class = 'stay-home')
# = count(Weather = rainy and Class = 'stay-home')/count(Class = 'stay-home')
cond_Wr = data_df['Weather']=='rainy'
prob_Wr_given_Ys = len(data_df[cond_Wr & cond_Ys])/float(len(data_df[cond_Ys]))
print("Prob(Weather = rainy | Class = 'stay-home') = {}".format(prob_Wr_given_Ys))


Prob(Weather = sunny | Class = 'go-out') = 0.8
Prob(Weather = rainy | Class = 'go-out') = 0.2
Prob(Weather = sunny | Class = 'stay-home') = 0.4
Prob(Weather = rainy | Class = 'stay-home') = 0.6


In [5]:
# Prob(Car = working | Class = 'go-out')
# = Prob(Car = working and Class = 'go-out')/Prob(Class = 'go-out')
# = count(Car = working and Class = 'go-out')/count(Class = 'go-out')

cond_Cw = data_df['Car']=='working'
cond_Yo = data_df['Class']=='go-out'
prob_Cw_given_Yo = len(data_df[cond_Cw & cond_Yo])/float(len(data_df[cond_Yo]))
print("Prob(Car = working | Class = 'go-out') = {}".format(prob_Cw_given_Yo))

# Prob(Car = broken | Class = 'go-out')
# = Prob(Car = broken and Class = 'go-out')/Prob(Class = 'go-out')
# = count(Car = broken and Class = 'go-out')/count(Class = 'go-out')
cond_Cb = data_df['Car']=='broken'
prob_Cb_given_Yo = len(data_df[cond_Cb & cond_Yo])/float(len(data_df[cond_Yo]))
print("Prob(Car = broken | Class = 'go-out') = {}".format(prob_Cb_given_Yo))

# Prob(Car = working | Class = 'stay-home')
# = Prob(Car = working and Class = 'stay-home')/Prob(Class = 'stay-home')
# = count(Car = working and Class = 'stay-home')/count(Class = 'stay-home')

cond_Cw = data_df['Car']=='working'
cond_Ys = data_df['Class']=='stay-home'
prob_Cw_given_Ys = len(data_df[cond_Cw & cond_Ys])/float(len(data_df[cond_Ys]))
print("Prob(Car = working | Class = 'stay-home') = {}".format(prob_Cw_given_Ys))

# Prob(Car = broken | Class = 'stay-home')
# = Prob(Car = broken and Class = 'stay-home')/Prob(Class = 'stay-home')
# = count(Car = broken and Class = 'stay-home')/count(Class = 'stay-home')
cond_Cb = data_df['Car']=='broken'
prob_Cb_given_Ys = len(data_df[cond_Cb & cond_Ys])/float(len(data_df[cond_Ys]))
print("Prob(Car = broken | Class = 'stay-home') = {}".format(prob_Cb_given_Ys))


Prob(Car = working | Class = 'go-out') = 0.8
Prob(Car = broken | Class = 'go-out') = 0.2
Prob(Car = working | Class = 'stay-home') = 0.2
Prob(Car = broken | Class = 'stay-home') = 0.8


### Inference 

Let input test data is Weather=sunny Car=working

We want to calculate
* Prob(Class = go-out | Weather=sunny, Car=working) 
    * = $P(Y_o|W_s \cap C_w) = \dfrac{P(Y_o)*P(W_s\cap C_w|Y_o)}{P(W_s\cap C_w)}$ 
    * = $\dfrac{P(Y_o)*P(W_s|Y_o)* P(C_w|Y_o)}{P(W_s\cap C_w)}$, since Weather and Car are assumed independent
    * = 
* Prob(Class = stay-home | Weather=sunny, Car=working) 
    * = $P(Y_s|W_s \cap C_w) = \dfrac{P(Y_s)*P(W_s\cap C_w|Y_s)}{P(W_s\cap C_w)}$ 
    * = $\dfrac{P(Y_s)*P(W_s|Y_s)* P(C_w|Y_s)}{P(W_s\cap C_w)}$ 
    * Since Weather and Car are assumed independent
    
**Note:** We only need the numerator and the class that gives the largest response, which will be the predicted output. We don't need calculate denominator $P(W_s\cap C_w)$

Hence we will calculate function:
* $M(Y_o|W_s \cap C_w) = P(Y_o)*P(W_s|Y_o)* P(C_w|Y_o)$
* $M(Y_s|W_s \cap C_w) = P(Y_s)*P(W_s|Y_s)* P(C_w|Y_s)$
* Class with maximum M() value will be inferred output

In [8]:
m_Yo = prob_Yo * prob_Ws_given_Yo * prob_Cw_given_Yo
m_Ys = prob_Ys * prob_Ws_given_Ys * prob_Cw_given_Ys
m_Yo, m_Ys

(0.32000000000000006, 0.04000000000000001)

From above inferene output of input test data is Weather='sunny' Car='working' is 'go-out'

In [24]:
# Prediction function

def pred_single(weather, car):
    yo_w = None
    ys_w = None
    yo_c = None
    ys_c = None
    if weather == 'sunny' and car=='working':
        yo_w = prob_Ws_given_Yo
        yo_c = prob_Cw_given_Yo
        ys_w = prob_Ws_given_Ys
        ys_c = prob_Cw_given_Ys
        
    elif weather == 'sunny' and car=='broken':
        yo_w = prob_Ws_given_Yo
        yo_c = prob_Cb_given_Yo
        ys_w = prob_Ws_given_Ys
        ys_c = prob_Cb_given_Ys
        
    elif weather == 'rainy' and car=='working':
        yo_w = prob_Wr_given_Yo
        yo_c = prob_Cw_given_Yo
        ys_w = prob_Wr_given_Ys
        ys_c = prob_Cw_given_Ys
        
    elif weather == 'rainy' and car=='broken':
        yo_w = prob_Wr_given_Yo
        yo_c = prob_Cb_given_Yo
        ys_w = prob_Wr_given_Ys
        ys_c = prob_Cb_given_Ys
    else:
        raise Exception
        
    map_yo = prob_Yo * yo_w * yo_c
    map_ys = prob_Ys * ys_w * ys_c
    
    if map_yo > map_ys:
        return 'go-out'
    else:
        return 'stay-home'
    
        
        
    

test_X = data_df[['Weather','Car']].values
output_list = []
for w,c in test_X:
    out_single = pred_single(w,c)
    output_list.append(out_single)

data_df['pred_class'] = output_list
data_df

Unnamed: 0,Weather,Car,Class,pred_class
0,sunny,working,go-out,go-out
1,rainy,broken,go-out,stay-home
2,sunny,working,go-out,go-out
3,sunny,working,go-out,go-out
4,sunny,working,go-out,go-out
5,rainy,broken,stay-home,stay-home
6,rainy,broken,stay-home,stay-home
7,sunny,working,stay-home,go-out
8,sunny,broken,stay-home,stay-home
9,rainy,broken,stay-home,stay-home
