## Tennis Problem

#### Import Library

In [46]:
import pandas as pd

### Data

#### Creating the dataset

In [47]:
data = {
    "Day": ["D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"],
    "Outlook": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain", "Overcast", "Sunny", "Sunny", "Rain", "Sunny", "Overcast", "Overcast", "Rain"],
    "Temperature": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool", "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Wind": ["Weak", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Weak", "Weak", "Strong", "Strong", "Weak", "Strong"],
    "PlayTennis": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
}

#### Converting the dictionary into a pandas dataframe

In [48]:
df=pd.DataFrame(data)
df

Unnamed: 0,Day,Outlook,Temperature,Humidity,Wind,PlayTennis
0,D1,Sunny,Hot,High,Weak,No
1,D2,Sunny,Hot,High,Strong,No
2,D3,Overcast,Hot,High,Weak,Yes
3,D4,Rain,Mild,High,Weak,Yes
4,D5,Rain,Cool,Normal,Weak,Yes
5,D6,Rain,Cool,Normal,Strong,No
6,D7,Overcast,Cool,Normal,Strong,Yes
7,D8,Sunny,Mild,High,Weak,No
8,D9,Sunny,Cool,Normal,Weak,Yes
9,D10,Rain,Mild,Normal,Weak,Yes


### Prediction using Naive Bayes estimation

#### Step-1: Calculating prior probabilities 
      P(PlayTennis=Yes) and P(PlayTennis=No) 

In [49]:
def calculate_prior_probability(df, feature,target):
    return len(df[df[feature]==target])/len(df)


#### Step 2: Calculate likelihoods for each feature given PlayTennis=Yes and PlayTennis=No


In [50]:
def caculate_likelihood(df, feature, value, target):
    subset= df[df["PlayTennis"]==target]
    return len(subset[subset[feature]==value])/len(subset)


To make a prediction using the Naive Bayes algorithm, you can use the `naive_bayes_prediction()` function. This function takes in the following parameters:

- `df`: The pandas DataFrame containing the dataset.
- `outlook`: The value of the "Outlook" feature for which you want to make a prediction.
- `Temperature`: The value of the "Temperature" feature for which you want to make a prediction.
- `humidity`: The value of the "Humidity" feature for which you want to make a prediction.
- `wind`: The value of the "Wind" feature for which you want to make a prediction.

Here's an example of how to use the `naive_bayes_prediction()` function:

```python
prediction = naive_bayes_prediction(df, "Sunny", "Hot", "Normal", "Weak")
print(prediction)
```

This will print the predicted class ("Yes" or "No") based on the given feature values.

In [66]:
def naive_bayes_prediction(df, outlook, Temperature, humidity, wind):
    P_Yes=calculate_prior_probability(df,"PlayTennis","Yes")
    P_No=calculate_prior_probability(df,"PlayTennis","No")
    
    P_Outlook_Yes=caculate_likelihood(df, "Outlook", outlook, "Yes")
    P_Temperature_Yes=caculate_likelihood(df, "Temperature", Temperature, "Yes")
    P_Humidity_Yes=caculate_likelihood(df, "Humidity", humidity, "Yes")
    P_Wind_Yes=caculate_likelihood(df, "Wind", wind, "Yes")

    print(f" P_Outlook_Yes= {P_Outlook_Yes}")
    print(f" P_Temperature_Yes= {P_Temperature_Yes}")
    print(f" P_Humidity_Yes= {P_Humidity_Yes}")
    print(f" P_Wind_Yes= {P_Wind_Yes}")
    
    
    P_Outlook_No=caculate_likelihood(df, "Outlook", outlook, "No")
    P_Temperature_No=caculate_likelihood(df, "Temperature", Temperature, "No")
    P_Humidity_No=caculate_likelihood(df, "Humidity", humidity, "No")
    P_Wind_No=caculate_likelihood(df, "Wind", wind, "No")

    print(f"P_Outlook_No= {P_Outlook_No}")
    print(f"P_Temperature_No= {P_Temperature_No}")
    print(f"P_Humidity_No= {P_Humidity_No}")
    print(f"P_Wind_No= {P_Wind_No}")

    
    P_Yes=P_Yes*P_Outlook_Yes*P_Temperature_Yes*P_Humidity_Yes*P_Wind_Yes
    P_No=P_No*P_Outlook_No*P_Temperature_No*P_Humidity_No*P_Wind_No

    print(f"P_yes= {P_Yes} and P_No= {P_No}")
    
    return "Yes" if P_Yes>P_No else "No"

In [67]:
naive_bayes_prediction(df, "Sunny", "Hot", "Normal", "Weak")

 P_Outlook_Yes= 0.2222222222222222
 P_Temperature_Yes= 0.2222222222222222
 P_Humidity_Yes= 0.6666666666666666
 P_Wind_Yes= 0.6666666666666666
P_Outlook_No= 0.6
P_Temperature_No= 0.4
P_Humidity_No= 0.2
P_Wind_No= 0.4
P_yes= 0.014109347442680775 and P_No= 0.006857142857142858


'Yes'

In [68]:
class NaiveBayesClassifier:
    def __init__(self, df):
        self.df=df

    def calculate_prior_probability(self, feature,target):
        return len(self.df[self.df[feature]==target])/len(self.df)
    
    def caculate_likelihood(self, feature, value, target):
        subset= self.df[self.df["PlayTennis"]==target]
        return len(subset[subset[feature]==value])/len(subset)
    
    def naive_bayes_prediction(self, outlook, Temperature, humidity, wind):
        P_Yes=self.calculate_prior_probability("PlayTennis","Yes")
        P_No=self.calculate_prior_probability("PlayTennis","No")
        
        P_Outlook_Yes=self.caculate_likelihood("Outlook", outlook, "Yes")
        P_Temperature_Yes=self.caculate_likelihood("Temperature", Temperature, "Yes")
        P_Humidity_Yes=self.caculate_likelihood("Humidity", humidity, "Yes")
        P_Wind_Yes=self.caculate_likelihood("Wind", wind, "Yes")

        print(f" P_Outlook_Yes= {P_Outlook_Yes}")
        print(f" P_Temperature_Yes= {P_Temperature_Yes}")
        print(f" P_Humidity_Yes= {P_Humidity_Yes}")
        print(f" P_Wind_Yes= {P_Wind_Yes}")
        
        
        P_Outlook_No=self.caculate_likelihood("Outlook", outlook, "No")
        P_Temperature_No=self.caculate_likelihood("Temperature", Temperature, "No")
        P_Humidity_No=self.caculate_likelihood("Humidity", humidity, "No")
        P_Wind_No=self.caculate_likelihood("Wind", wind, "No")

        print(f"P_Outlook_No= {P_Outlook_No}")
        print(f"P_Temperature_No= {P_Temperature_No}")
        print(f"P_Humidity_No= {P_Humidity_No}")
        print(f"P_Wind_No= {P_Wind_No}")

        
        P_Yes=P_Yes*P_Outlook_Yes*P_Temperature_Yes*P_Humidity_Yes*P_Wind_Yes
        P_No=P_No*P_Outlook_No*P_Temperature_No*P_Humidity_No*P_Wind_No

        print(f"P_yes= {P_Yes} and P_No= {P_No}")
        
        return "Yes" if P_Yes>P_No else "No"

In [74]:
nv=NaiveBayesClassifier(df)
prediction=nv.naive_bayes_prediction("Rain", "Hot", "High", "Strong")
print(prediction)

 P_Outlook_Yes= 0.3333333333333333
 P_Temperature_Yes= 0.2222222222222222
 P_Humidity_Yes= 0.3333333333333333
 P_Wind_Yes= 0.3333333333333333
P_Outlook_No= 0.4
P_Temperature_No= 0.4
P_Humidity_No= 0.8
P_Wind_No= 0.6
P_yes= 0.005291005291005291 and P_No= 0.027428571428571438
No
