# Conditional Probability

Conditional probability is the probability of an event occurring given that another event has already occurred.



# Bayes’ Theorem

Bayes’ Theorem is a fundamental rule in probability that allows us to update the probability of an event based on new information (evidence).

In [12]:
import pandas as pd
data = {
    "Outlook": ["Rainy", "Rainy", "Overcast", "Sunny", "Sunny", "Sunny", "Overcast", "Rainy", "Rainy", "Sunny", "Rainy", "Overcast", "Overcast", "Sunny"],
    "Temperature": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool", "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Windy": [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
    "Play Golf": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
}
df = pd.DataFrame(data)

df.head()


Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play Golf
0,Rainy,Hot,High,False,No
1,Rainy,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Sunny,Mild,High,False,Yes
4,Sunny,Cool,Normal,False,Yes


In [3]:
from sklearn.preprocessing import LabelEncoder #Lable encoding

In [26]:


encoded_df = pd.DataFrame()  

for each in df.columns:
    encoder = LabelEncoder()
    encoded_df[each] = encoder.fit_transform(df[each])

    print(f"For {each}: {encoder.classes_} -> {list(range(len(encoder.classes_)))}")


For Outlook: ['Overcast' 'Rainy' 'Sunny'] -> [0, 1, 2]
For Temperature: ['Cool' 'Hot' 'Mild'] -> [0, 1, 2]
For Humidity: ['High' 'Normal'] -> [0, 1]
For Windy: [False  True] -> [0, 1]
For Play Golf: ['No' 'Yes'] -> [0, 1]


In [29]:
encoded_df.head()

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play Golf
0,1,1,0,0,0
1,1,1,0,1,0
2,0,1,0,0,1
3,2,2,0,0,1
4,2,0,1,0,1


In [30]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [31]:
data = {
    "Outlook": ["Rainy", "Rainy", "Overcast", "Sunny", "Sunny", "Sunny", "Overcast",
                "Rainy", "Rainy", "Sunny", "Rainy", "Overcast", "Overcast", "Sunny"],
    "Temperature": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool",
                    "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"],
    "Humidity": ["High", "High", "High", "High", "Normal", "Normal", "Normal",
                 "High", "Normal", "Normal", "Normal", "High", "Normal", "High"],
    "Windy": [False, True, False, False, False, True, True,
              False, False, False, True, True, False, True],
    "Play Golf": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes",
                  "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No"]
}

df = pd.DataFrame(data)


In [32]:
encoded_df = pd.DataFrame()

encoders = {}

for col in df.columns:
    le = LabelEncoder()
    encoded_df[col] = le.fit_transform(df[col])
    encoders[col] = le
    print(f"{col}: {list(le.classes_)} -> {list(range(len(le.classes_)))}")


Outlook: ['Overcast', 'Rainy', 'Sunny'] -> [0, 1, 2]
Temperature: ['Cool', 'Hot', 'Mild'] -> [0, 1, 2]
Humidity: ['High', 'Normal'] -> [0, 1]
Windy: [np.False_, np.True_] -> [0, 1]
Play Golf: ['No', 'Yes'] -> [0, 1]


In [33]:
X = encoded_df.drop("Play Golf", axis=1).to_numpy()
y = encoded_df["Play Golf"].to_numpy()


In [34]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=999
)


In [35]:
model = CategoricalNB()
model.fit(X_train, y_train)


0,1,2
,"alpha  alpha: float, default=1.0 Additive (Laplace/Lidstone) smoothing parameter (set alpha=0 and force_alpha=True, for no smoothing).",1.0
,"force_alpha  force_alpha: bool, default=True If False and alpha is less than 1e-10, it will set alpha to 1e-10. If True, alpha will remain unchanged. This may cause numerical errors if alpha is too close to 0. .. versionadded:: 1.2 .. versionchanged:: 1.4  The default value of `force_alpha` changed to `True`.",True
,"fit_prior  fit_prior: bool, default=True Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",True
,"class_prior  class_prior: array-like of shape (n_classes,), default=None Prior probabilities of the classes. If specified, the priors are not adjusted according to the data.",
,"min_categories  min_categories: int or array-like of shape (n_features,), default=None Minimum number of categories per feature. - integer: Sets the minimum number of categories per feature to  `n_categories` for each features. - array-like: shape (n_features,) where `n_categories[i]` holds the  minimum number of categories for the ith column of the input. - None (default): Determines the number of categories automatically  from the training data. .. versionadded:: 0.24",


In [36]:
# Example input: [Outlook, Temperature, Humidity, Windy]
# Sunny=2, Cool=0, High=0, True=1
model.predict_proba([[2, 0, 0, 1]])


array([[0.63626723, 0.36373277]])

In [38]:
train_accuracy = model.score(X_train, y_train)
test_accuracy = model.score(X_test, y_test)

print("Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)



Training Accuracy: 0.9090909090909091
Testing Accuracy: 0.6666666666666666


In [39]:
y_pred = model.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[1 0]
 [1 1]]

Classification Report:
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3

