In [1]:
import numpy as np

email_types = ('phish', 'legit')
links = ('no_link', 'suspicious_link')

prior_probs = (0.10, 0.90)

phish_probs = (0.20, 0.80)   # P(L=0|phish), P(L=1|phish)
legit_probs = (0.95, 0.05)   # P(L=0|legit), P(L=1|legit)


In [2]:
def draw_prior(email_types, probs):
    return np.random.choice(email_types, p=probs)


def draw_model(links, email, phish_probs, legit_probs):
    if email == 'phish':
        return np.random.choice(links, p=phish_probs)
    return np.random.choice(links, p=legit_probs)


def draw_joint(links, email_types, prior_probs, phish_probs, legit_probs):
    e = draw_prior(email_types, prior_probs)
    l = draw_model(links, e, phish_probs, legit_probs)
    return f"{e}_{l}"

def simulator(*args, num_sims=10000):
    return [draw_joint(*args) for _ in range(num_sims)]


In [3]:
num_sims = 100_000

sims = simulator(
    links,
    email_types,
    prior_probs,
    phish_probs,
    legit_probs,
    num_sims=num_sims
)

scenarios, counts = np.unique(sims, return_counts=True)


In [4]:
for s, c in zip(scenarios, counts):
    print(f'{s} occurred with probability {round(c / num_sims, 3)}')


legit_no_link occurred with probability 0.854
legit_suspicious_link occurred with probability 0.046
phish_no_link occurred with probability 0.021
phish_suspicious_link occurred with probability 0.08


In [6]:
approx_joint = {
    "phish_no_link": 0,
    "phish_suspicious_link": 0,
    "legit_no_link": 0,
    "legit_suspicious_link": 0
}

for s, c in zip(scenarios, counts):
    approx_joint[s] = c / num_sims

print("\nApproximate Joint Probability Table\n")

print("               L=0 (no link)    L=1 (suspicious link)")
print("------------------------------------------------------")
print(f"Phish         {approx_joint['phish_no_link']:.3f}           {approx_joint['phish_suspicious_link']:.3f}")
print(f"Legit         {approx_joint['legit_no_link']:.3f}           {approx_joint['legit_suspicious_link']:.3f}")


Approximate Joint Probability Table

               L=0 (no link)    L=1 (suspicious link)
------------------------------------------------------
Phish         0.021           0.080
Legit         0.854           0.046


In [7]:
analytic_joint = {
    "phish_no_link": 0.02,
    "phish_suspicious_link": 0.08,
    "legit_no_link": 0.855,
    "legit_suspicious_link": 0.045
}

print("\nAbsolute Differences (Approx vs Analytic)\n")

for k in analytic_joint:
    diff = abs(approx_joint[k] - analytic_joint[k])
    print(f"{k}: {diff:.4f}")



Absolute Differences (Approx vs Analytic)

phish_no_link: 0.0006
phish_suspicious_link: 0.0001
legit_no_link: 0.0012
legit_suspicious_link: 0.0006
