In [None]:
import numpy as np
import pandas as pd

# 1. Implementation of Multinomial Naive Bayes (MLE)
def train_multinomial_nb(df, dict_size=1000):
    """
    Estimates probabilities for the Multinomial Event Model.
    Reference: Stanford CS229 Lecture 06 [cite: 4563-4572].
    """
    # Filter by class
    spam_df = df[df['label'] == 1]
    ham_df = df[df['label'] == 0]
    
    # Calculate word counts for each word K in Spam/Ham
    # Numerator: Count of word K in all spam emails
    # Denominator: Total number of words in all spam emails
    phi_k_1 = (spam_df.groupby('word_index').size() + 1) / (len(spam_df) + dict_size)
    phi_k_0 = (ham_df.groupby('word_index').size() + 1) / (len(ham_df) + dict_size)
    
    return phi_k_1, phi_k_0

# 2. Geometric Margin Calculation
def calculate_geometric_margin(w, b, x, y):
    """
    Calculates the Euclidean distance from a point to the hyperplane.
    Reference: Stanford CS229 Lecture 06 [cite: 4787-4791].
    """
    norm_w = np.linalg.norm(w)
    functional_margin = y * (np.dot(w, x) + b)
    geometric_margin = functional_margin / norm_w
    return geometric_margin

# Example Parameters
w = np.array([1, 1])
b = -2
x_test = np.array([3, 3])
y_test = 1 # Correct class

margin = calculate_geometric_margin(w, b, x_test, y_test)
print(f"Geometric Margin: {margin:.4f}")