# **Subjective test**

## Create training dataset

In [88]:
import numpy as np

In [89]:
def create_training_data():
    """Create the training dataset for tennis prediction."""
    data = [["Sunny",    "Hot",  "High",   "Weak",   "No"],
            ["Sunny",    "Hot",  "High",   "Strong", "No"],
            ["Overcast", "Hot",  "High",   "Weak",   "Yes"],
            ["Rain",     "Mild", "High",   "Weak",   "Yes"],
            ["Rain",     "Cool", "Normal", "Weak",   "Yes"],
            ["Rain",     "Cool", "Normal", "Strong", "No"],
            ["Overcast", "Cool", "Normal", "Strong", "Yes"],
            ["Overcast", "Mild", "High",   "Weak",   "No"],
            ["Sunny",    "Cool", "Normal", "Weak",   "Yes"],
            ["Rain",     "Mild", "Normal", "Weak",   "Yes"]]
    return np.array(data)

In [90]:
train_data = create_training_data()
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'No']
 ['Sunny' 'Hot' 'High' 'Strong' 'No']
 ['Overcast' 'Hot' 'High' 'Weak' 'Yes']
 ['Rain' 'Mild' 'High' 'Weak' 'Yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'Yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'No']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'Yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'No']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'Yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'Yes']]


## Compute prior probabilities

In [93]:
def compute_prior_probabilities(train_data):
    """
    Calculate prior probabilities P(Play Tennis = Yes/No).
    Args:
        train_data: Training dataset
    Returns:
        Array of prior probabilities [P(No), P(Yes)]
    """
    class_names = ["No", "Yes"]
    total_samples = len(train_data[:,-1])#final output
    prior_probs = np.zeros(len(class_names))

    prior_probs[1] = np.sum(np.where(train_data[:,-1] == "Yes",1, 0)) / total_samples
    prior_probs[0] = np.sum(np.where(train_data[:,-1] == "No",1, 0)) / total_samples

    return prior_probs

In [94]:
prior_probability = compute_prior_probabilities(train_data=train_data)
print("P(‘Play Tennis’ = No)", prior_probability[0])
print("P(‘Play Tennis’ = Yes)", prior_probability[1])

P(‘Play Tennis’ = No) 0.4
P(‘Play Tennis’ = Yes) 0.6


## Compute conditional probabilities

In [127]:
import numpy as np

def compute_conditional_probabilities(train_data):
    """
    Calculate conditional probabilities P(Feature|Class) for all features.
    Args:
        train_data: Training dataset (numpy array with features and target column)
    Returns:
        Tuple of (conditional_probabilities, feature_values)
    """
    class_names = ["No", "Yes"]
    y_train = train_data[:, -1]
    n_features = train_data.shape[1] - 1  # Exclude target column
    conditional_probs = []
    feature_values = []

    for feature_idx in range(n_features):
        # Get unique values for this feature
        unique_values = np.unique(train_data[:, feature_idx])
        feature_values.append(unique_values)

        # Initialize conditional probability matrix
        # Shape: (number of classes, number of unique feature values)
        feature_cond_probs = np.zeros((len(class_names), len(unique_values)))

        for class_idx, class_name in enumerate(class_names):
            # Get samples for this class
            class_data = train_data[y_train == class_name]
            class_count = len(class_data)

            for value_idx, value_name in enumerate(unique_values):
                # Count occurrences of this feature value in this class
                value_count_in_class = np.sum(class_data[:, feature_idx] == value_name)

                # Calculate conditional probability with Laplace smoothing
                # P(Feature=value|Class=class) = Count(value & class) / Count(class)
                denominator = class_count
                cond_prob = value_count_in_class / denominator

                # Store the probability
                feature_cond_probs[class_idx, value_idx] = cond_prob
                # print(f"class_idx = {class_idx}, class_name = {class_name}")
                # print(f"cond_prob = {cond_prob}")

        conditional_probs.append(feature_cond_probs)
        # print("feature_cond_probs = ",feature_cond_probs)

    return conditional_probs, feature_values

In [132]:
# Test
_, feature_values = compute_conditional_probabilities(train_data=train_data)
print("x1 = ",feature_values[0])
print("x2 = ",feature_values[1])
print("x3 = ",feature_values[2])
print("x4 = ",feature_values[3])

x1 =  ['Overcast' 'Rain' 'Sunny']
x2 =  ['Cool' 'Hot' 'Mild']
x3 =  ['High' 'Normal']
x4 =  ['Strong' 'Weak']


## Get feature index

In [102]:
def get_feature_index(feature_value, feature_values):
    """
    Get the index of a feature value in the feature values array.
    Args:
        feature_value: Value to find
        feature_values: Array of possible feature values
    Returns:
       Index of the feature value
    """
    return np.where(feature_values == feature_value)[0][0]

In [103]:
_, feature_values = compute_conditional_probabilities(train_data)
outlook = feature_values[0]
i1 = get_feature_index("Overcast", outlook)
i2 = get_feature_index("Rain", outlook)
i3 = get_feature_index("Sunny", outlook)

print(i1, i2, i3)

0 1 2


## Train naive bayes model

In [104]:
def train_naive_bayes(train_data):
    """
    Train the Naive Bayes classifier.
    Args:
        train_data: Training dataset
    Returns:
        Tuple of (prior_probabilities, conditional_probabilities, feature_names)
    """

    # Calculate prior probabilities
    prior_probabilities = compute_prior_probabilities(train_data)

    # Calculate conditional probabilities
    conditional_probabilities, feature_names = compute_conditional_probabilities(train_data)

    return prior_probabilities, conditional_probabilities, feature_names


In [105]:
# Train the model
prior_probs, conditional_probs, feature_names = train_naive_bayes(train_data)

## Predict a test sample

In [106]:
def predict_play_tennis(
        X, prior_probabilities, conditional_probabilities, feature_names
):
    """
    Make a prediction for given features.

    Args:
        X: List of feature values [Outlook, Temperature, Humidity, Wind]
        prior_probabilities: Prior probabilities for each class
        condtional_probabilities: Conditional probabilities for each feature
        feature_names: Names/values for each features
    Returns:
        Tuple of (prediction, probabilities)
    """
    class_names = ["No", "Yes"]

    # Get feature indices
    feature_indices = []
    for i, feature_value in enumerate(X):
        feature_indices.append(get_feature_index(feature_value, feature_names[i]))

    # Calculate probabilities for each class
    class_probabilities = []

    for class_idx in range(len(class_names)):
        # Start with prior probability
        prob = prior_probabilities[class_idx]

        # Multiply by conditional probabilities
        for feature_idx, value_idx in enumerate(feature_indices):
            prob *= conditional_probabilities[feature_idx][class_idx, value_idx]

        class_probabilities.append(prob)


    # Normalize probabilities
    total_prob = sum(class_probabilities)
    if total_prob > 0:
        normalized_probs = [p / total_prob for p in class_probabilities]
    else:
        normalized_probs = [0.5, 0.5] # Default if all probabilites are 0

    # Make prediction
    predicted_class_idx = np.argmax(class_probabilities)
    prediction = class_names[predicted_class_idx]

    # Create probability dictionary
    prob_dict = {
        "No": round(normalized_probs[0].item(), 2),
        "Yes": round(normalized_probs[1].item(), 2)
    }

    return prediction, prob_dict

In [155]:
X = ["Sunny","Cool", "High", "Strong"]

prior_probs, conditional_probs, feature_names = train_naive_bayes(train_data)
prediction, prob_dict = predict_play_tennis(
    X, prior_probs, conditional_probs, feature_names
)

if  prediction == "Yes":
    print("Ad should go!")
else:
    print("Ad should not go!")

prediction, prob_dict

Ad should not go!


('No', {'No': 0.87, 'Yes': 0.13})

# **Objective test**

## Binary Classification - Play Tennis

In [134]:
#1 A P("Play Tennis" = "Yes") = 6/10, P("Play Tennis" = "No") = 4/10


In [None]:
# I need more help!!!!!
#2 B P("Play Tennis" = "Yes"|X) ∝ 0.0028
#3 C P("Play Tennis" = "No"| X) ∝ 0.0188


In [142]:
X = ["Sunny","Cool", "High", "Strong"]
prior_probs, conditional_probs, feature_names = train_naive_bayes(train_data)
final_probabilities, prediction = predict(X, prior_probs, conditional_probs, feature_names)
print(f"P(\"Play Tennis\" = \"no\" | X) = {final_probabilities[0]:.4f} ({final_probabilities[0]*100:.2f}%)")
print(f"P(\"Play Tennis\" = \"yes\" | X) = {final_probabilities[1]:.4f} ({final_probabilities[1]*100:.2f}%)")

P("Play Tennis" = "no" | X) = 0.8710 (87.10%)
P("Play Tennis" = "yes" | X) = 0.1290 (12.90%)


In [None]:
#4 B
#5 A
#6 B
#7 C
#8 A

## Multi-label Classification - Traffic Data

In [None]:
# I need more help!!!!!
# 9
# 10
# 11
# 12
# 13
# 14

## Iris Classification

In [None]:
# I need more help!!!!!
# 15
# 16
# 17