# Case Study: Predicting Tennis Match Playability Using Naïve Bayes Classification

In [None]:
#Import Packages
import numpy as np
import pandas as pd
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 1: Create a sample dataset

In [None]:
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cold', 'Cold', 'Cold', 'Mild', 'Cold', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Low', 'Low', 'Low', 'High', 'Low', 'Low', 'Low', 'High', 'Low', 'High'],
    'Windy': ['False', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'True'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})
data.shape

In [None]:
# Display the dataset
print("Step 1: Display the dataset")
display(data)

# Step 2: Encode categorical variables

In [None]:
label_encoders = {}
for col in ['Outlook', 'Temperature', 'Humidity', 'Windy', 'Play']:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

In [None]:
# Display the encoded dataset
print("Step 2: Display the encoded dataset")
display(data)

# Step 3: Split features and target

In [None]:
X = data[['Outlook', 'Temperature', 'Humidity', 'Windy']]
y = data['Play']

# Step 4: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Step 5: Train Naïve Bayes classifier

In [None]:
model = CategoricalNB()
model.fit(X_train, y_train)

# Step 6: Predict probability estimates on test set

In [None]:
probs = model.predict_proba(X_test)
print("Step 6: Predicted probabilities on test set")
display(pd.DataFrame(probs, columns=['P(No)', 'P(Yes)']))

# Step 7: Evaluate the model

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) * 100  # Convert to percentage
print(f"Step 7: Model Accuracy: {accuracy:.2f}%")

# Step 8: Example prediction

In [None]:
sample = np.array([[0, 2, 0, 1]])  # Example: Sunny, Cold, High, True
prediction = model.predict(sample)
predicted_label = label_encoders['Play'].inverse_transform(prediction)
print("Step 8: Prediction for sample input")
print("Predicted Play Decision:", predicted_label[0])