In [1]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.naive_bayes import CategoricalNB

In [2]:
data = {
    "Outlook": [
        "Sunny", "Sunny", "Overcast", "Rainy", "Rainy", "Rainy", "Overcast",
        "Sunny", "Sunny", "Rainy", "Sunny", "Overcast", "Overcast", "Rainy"
    ],
    "Temperature": [
        "Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool",
        "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"
    ],
    "Humidity": [
        "High", "High", "High", "High", "Normal", "Normal", "Normal",
        "High", "Normal", "Normal", "Normal", "High", "Normal", "High"
    ],
    "Windy": [
        False, True, False, False, False, True, True,
        False, False, False, True, True, False, True
    ],
    "Play": [
        "No", "No", "Yes", "Yes", "Yes", "No", "Yes",
        "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,Sunny,Hot,High,False,No
1,Sunny,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Rainy,Mild,High,False,Yes
4,Rainy,Cool,Normal,False,Yes
5,Rainy,Cool,Normal,True,No
6,Overcast,Cool,Normal,True,Yes
7,Sunny,Mild,High,False,No
8,Sunny,Cool,Normal,False,Yes
9,Rainy,Mild,Normal,False,Yes


In [4]:
encoder = OrdinalEncoder()
X = encoder.fit_transform(df[["Outlook", "Temperature", "Humidity", "Windy"]])
y = df["Play"].map({"No": 0, "Yes": 1}).values

feature_mapping = {}
for i, col in enumerate(["Outlook", "Temperature", "Humidity", "Windy"]):
    categories = encoder.categories_[i].tolist()
    feature_mapping[col] = {cat: idx for idx, cat in enumerate(categories)}

print("Feature encoding (category -> integer):")
for k, v in feature_mapping.items():
    print(k, ":", v)

Feature encoding (category -> integer):
Outlook : {'Overcast': 0, 'Rainy': 1, 'Sunny': 2}
Temperature : {'Cool': 0, 'Hot': 1, 'Mild': 2}
Humidity : {'High': 0, 'Normal': 1}
Windy : {False: 0, True: 1}


In [5]:
model = CategoricalNB()
model.fit(X, y)

accuracy = model.score(X, y)
print(f"Training accuracy on the dataset: {accuracy:.2f}")

Training accuracy on the dataset: 0.93


In [9]:
outlook = "Sunny"
temperature = "Mild"
humidity = "Normal"
windy = False

label, probabilities = predict_play(outlook, temperature, humidity, windy)

print("Prediction for new day:")
print("Outlook:", outlook)
print("Temperature:", temperature)
print("Humidity:", humidity)
print("Windy:", windy)
print("-> Should we play?:", label)
print("-> Probabilities [No, Yes]:", probabilities)

Prediction for new day:
Outlook: Sunny
Temperature: Mild
Humidity: Normal
Windy: False
-> Should we play?: Yes
-> Probabilities [No, Yes]: [0.23217141 0.76782859]


