### Prasad Rajesh Posture  
**Batch**: June 2022  
Data Analytics with Python  

**Task :** Create a model using Naive Bayes Classifier to predict whether the Tennis Match wil happen or not based on the weather conditions.

### `import` Packages

In [1]:
import pandas as pd
import numpy as np

### Loading Dataset 
Data source : http://www.shatterline.com/MachineLearning/data/tennis_anyone.csv

In [2]:
data_source = 'http://www.shatterline.com/MachineLearning/data/tennis_anyone.csv'
df = pd.read_csv(data_source)
df.columns = ['outlook', 'temp', 'humidity', 'wind', 'class']

In [3]:
df.head()

Unnamed: 0,outlook,temp,humidity,wind,class
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


### Features and Target

In [4]:
y = df['class']
X = df.drop(columns=['class'], axis=1)

In [5]:
X.head()

Unnamed: 0,outlook,temp,humidity,wind
0,Sunny,Hot,High,Weak
1,Sunny,Hot,High,Strong
2,Overcast,Hot,High,Weak
3,Rain,Mild,High,Weak
4,Rain,Cool,Normal,Weak


In [6]:
y.head()

0     No
1     No
2    Yes
3    Yes
4    Yes
Name: class, dtype: object

In [7]:
y.value_counts()

Yes    9
No     5
Name: class, dtype: int64

### Likelihood

In [8]:
def feature_probability(X_features, y_target, f, yt_kv):
    ddf = pd.DataFrame({f : X_features[f].values, 'class' : y_target.values})
    xf = X_features[f]
    xf_c = xf.value_counts().to_frame().index.to_list()
    
    each_x = {}
    for xi in xf_c:
        df_x = ddf[ddf[f] == xi]
        each_xy = {}
        for yi in list(yt_kv.keys()):
            df_xy = df_x[df_x['class'] == yi]
            each_xy[yi] = len(df_xy) / yt_kv[yi]
        each_x[xi] = each_xy
        
    return each_x

In [9]:
yt_kv = {'Yes': 9, 'No': 5}
feature_probability(X_features=X, y_target=y, f='outlook', yt_kv=yt_kv)

{'Sunny': {'Yes': 0.2222222222222222, 'No': 0.6},
 'Rain': {'Yes': 0.3333333333333333, 'No': 0.4},
 'Overcast': {'Yes': 0.4444444444444444, 'No': 0.0}}

In [10]:
def compute_likelihood(X_features, y_target):
    yt = y_target.value_counts().to_frame()
    yt_k = yt.index.to_list()
    yt_v = yt.values[:, 0]
    yt_kv = {i : j for (i, j) in zip(yt_k, yt_v)}
    
    X_likelihood = {}
    for col in X_features:
        X_likelihood[col] = feature_probability(
            X_features=X_features, 
            y_target=y_target, 
            f=col, 
            yt_kv=yt_kv
        )
    y_likelihood = {i : j / np.sum(yt.values[:, 0]) for (i, j) in yt_kv.items()}
    
    return X_likelihood, y_likelihood

In [11]:
X_l, y_l = compute_likelihood(X_features=X, y_target=y)

In [12]:
X_l

{'outlook': {'Sunny': {'Yes': 0.2222222222222222, 'No': 0.6},
  'Rain': {'Yes': 0.3333333333333333, 'No': 0.4},
  'Overcast': {'Yes': 0.4444444444444444, 'No': 0.0}},
 'temp': {'Mild': {'Yes': 0.4444444444444444, 'No': 0.4},
  'Hot': {'Yes': 0.2222222222222222, 'No': 0.4},
  'Cool': {'Yes': 0.3333333333333333, 'No': 0.2}},
 'humidity': {'High': {'Yes': 0.3333333333333333, 'No': 0.8},
  'Normal': {'Yes': 0.6666666666666666, 'No': 0.2}},
 'wind': {'Weak': {'Yes': 0.6666666666666666, 'No': 0.4},
  'Strong': {'Yes': 0.3333333333333333, 'No': 0.6}}}

In [13]:
y_l

{'Yes': 0.6428571428571429, 'No': 0.35714285714285715}

### Prediction

In [14]:
# case - 1
p1 = [['Sunny', 'Cool', 'High', 'Strong']]

# case - 2
p2 = [['Sunny', 'Mild', 'Normal', 'Weak'], 
      ['Sunny', 'Mild', 'High', 'Weak'],
      ['Rain', 'Cool', 'Normal', 'Strong']]

print('p1', len(p1))
print('p2', len(p2))

p1 1
p2 3


In [15]:
def predictor(X_new, X_l, y_l):
    cols = list(X_l.keys())
    col_val = {i : j for (i, j) in zip(cols, X_new)}

    lprobs = {}
    for l, v in y_l.items():
        cate_v = [X_l[cn][cl][l] for (cn, cl) in col_val.items()]
        lprobs[l] = round((np.prod(cate_v) * v), 4)

    prob_ks = list(lprobs.keys())
    prob_vs = list(lprobs.values())
    
    return prob_ks[np.argmax(prob_vs)]

def predict(X_new, X_l, y_l):
    if (len(X_new) == 1):
        return predictor(X_new=X_new[0], X_l=X_l, y_l=y_l)
    preds = [predictor(X_new=i, X_l=X_l, y_l=y_l) for i in X_new]
    return preds

In [16]:
#Prediction for case 1
prediction = predict(X_new=p1, X_l=X_l, y_l=y_l)
print(prediction)

No


In [17]:
#Predictions for case 2
prediction = predict(X_new=p2, X_l=X_l, y_l=y_l)
print(prediction)

['Yes', 'No', 'Yes']


The End