# **Subjective test**

## Create training dataset

In [5]:
import numpy as np

In [7]:
def create_training_data():
    """Create the training dataset for tennis prediction."""
    data = [["Sunny",    "Hot",  "High",   "Weak",   "no"],
            ["Sunny",    "Hot",  "High",   "Strong", "no"],
            ["Overcast", "Hot",  "High",   "Weak",   "yes"],
            ["Rain",     "Mild", "High",   "Weak",   "yes"],
            ["Rain",     "Cool", "Normal", "Weak",   "yes"],
            ["Rain",     "Cool", "Normal", "Strong", "no"],
            ["Overcast", "Cool", "Normal", "Strong", "yes"],
            ["Overcast", "Mild", "High",   "Weak",   "no"],
            ["Sunny",    "Cool", "Normal", "Weak",   "yes"],
            ["Rain",     "Mild", "Normal", "Weak",   "yes"]]
    return np.array(data)

In [8]:
train_data = create_training_data()
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


## Compute prior probabilities

In [30]:
def compute_prior_probabilities(train_data):
    """
    Calculate prior probabilities P(Play Tennis = Yes/No).
    Args:
        train_data: Training dataset
    Returns:
        Array of prior probabilities [P(No), P(Yes)]
    """
    class_names = ["no", "yes"]
    total_samples = len(train_data[:,-1])#final output
    prior_probs = np.zeros(len(class_names))

    prior_probs[1] = np.sum(np.where(train_data[:,-1] == "yes",1, 0)) / total_samples
    prior_probs[0] = np.sum(np.where(train_data[:,-1] == "no",1, 0)) / total_samples

    return prior_probs

In [31]:
prior_probability = compute_prior_probabilities(train_data=train_data)
print("P(‘Play Tennis’ = No)", prior_probability[0])
print("P(‘Play Tennis’ = Yes)", prior_probability[1])

P(‘Play Tennis’ = No) 0.4
P(‘Play Tennis’ = Yes) 0.6


## Compute conditional probabilities

In [None]:
def compute_conditional_probabilities(train_data):
    """
    Calculate conditional probabilities P(Feature|Class) for all features.
    Args:
        train_data: Training dataset
    Returns:
        Tuple of (conditional_probabilities, feature_values)
    """
    class_names = ["No", "Yes"]
    n_features = train_data.shape[1] - 1 #Exclude target column
    conditional_probs = []
    feature_values = []

    for feature_idx in range(n_features):
        #Get unique values for this feature
        unique_values = np.unique(train_data[:, feature_idx])
        feature_values.append(unique_values)

        # Initialize conditional probability matrix
        feature_cond_probs = np.zeros(len(class_names), len(unique_values))

        for class_idx, class_name in enumerate(class_names):
            # Get samples for this class
            #...code

            for value_idx, class_name in enumerate(unique_values):
                # Count occurences pf this feature value in this class
                # Calculate conditional probability
                # ...code
                print("TODO")

        conditional_probs.append(feature_cond_probs)
    return conditional_probs, feature_cond_probs


In [None]:
# Test
_, feature_values = compute_conditional_probabilities(train_data=train_data)
print("x1 = ",feature_values[0])
print("x2 = ",feature_values[1])
print("x3 = ",feature_values[2])
print("x4 = ",feature_values[3])

## Get feature index

In [None]:
def get_feature_index(feature_value, feature_values):
    """
    Get the index of a feature value in the feature values array.
    Args:
        feature_value: Value to find
        feature_values: Array of possible feature values
    Returns:
       Index of the feature value
    """
    return #...

In [None]:
_, feature_values = compute_conditional_probabilities(train_data)
outlook = feature_values[0]
i1 = get_feature_index("Overcast", outlook)
i2 = get_feature_index("Rain", outlook)
i3 = get_feature_index("Sunny", outlook)

print(i1, i2, i3)

## Train naive bayes model

In [None]:
def train_naive_bayes(train_data):
    """
    Train the Naive Bayes classifier.
    Args:
        train_data: Training dataset
    Returns:
        Tuple of (prior_probabilities, conditional_probabilities, feature_names)
    """

    # Calculate prior probabilities
    prior_probabilities = #...

    # Calculate conditional probabilities
    conditional_probabilities, feature_names = #...

    return prior_probabilities, conditional_probabilities, feature_names


In [None]:
# Train the model
prior_probs, conditional_probs, feature_names = train_naive_bayes(train_data)

## Predict a test sample

In [None]:
def predict_play_tennis(
        X, prior_probabilities, conditional_probabilities, feature_names
):
    """
    Make a prediction for given features.

    Args:
        X: List of feature values [Outlook, Temperature, Humidity, Wind]
        prior_probabilities: Prior probabilities for each class
        condtional_probabilities: Conditional probabilities for each feature
        feature_names: Names/values for each features
    Returns:
        Tuple of (prediction, probabilities)
    """
    class_names = ["no", "yes"]

    # Get feature indices
    feature_indices = []
    for i, feature_value in enumerate(X):
        feature_indices.append(get_feature_index(feature_value, feature_names[i]))

    # Calculate probabilities for each class
    class_probabilities = []

    for class_idx in range(len(class_names)):
        # Start with prior probability
        # Multiply by conditional probabilities
        #...code

    # Normalize probabilities
    total_prob = sum(class_probabilities)
    if total_prob > 0:
        normalized_probs = [p / total_prob for p in class_probabilities]
    else:
        normalized_probs = [0.5, 0.5] # Default if all probabilites are 0

    # Make prediction
    predicted_class_idx = np.argmax(class_probabilities)
    prediction = class_names[predicted_class_idx]

    # Create probability dictionary
    prob_dict = {
        "No": round(normalized_probs[0].item(), 2),
        "Yes": round(normalized_probs[1].item(), 2)
    }

    return prediction, prob_dict

In [None]:
X = ["Sunny","Cool", "High", "Strong"]

prior_probs, conditional_probs, feature_names = train_naive_bayes(train_data)
prediction, prob_dict = predict_play_tennis(
    X, prior_probs, conditional_probs, feature_names
)

if prediction:
    print("Ad should go!")
else:
    print("Ad should not go!")

# **Objective test**

## Binary Classification - Play Tennis

## Multi-label Classification - Traffic Data

## Iris Classification