# Probability Basics

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter
from scipy.stats import randint
from scipy.stats import norm

In [None]:
# Sample space (Rolling a fair six-sided die)
sample_space = {1, 2, 3, 4, 5, 6}

# Event A: Getting odd numbers
A = {1, 3, 5}

# probability P(A)
p_a = len(A) / len(sample_space)
print(f"P(A): {p_a}")

### Quick Check

In [None]:
A = {2, 4, 6}   # all evens in Sample Space
B = {4, 6}      # evens > 3 in Sample Space

len_sample_space = len(sample_space)
p_A = len(A) / len_sample_space
p_B = len(B) / len_sample_space

print(f"p_A = {p_A}, len_sample_space = {len_sample_space}")
print(f"p_B = {p_B}, len_sample_space = {len_sample_space}")

### Marginal Probability
* Probability of an event irrespective of outcome of other variables. Expressed as P(A) or P(B)

In [None]:
# Sample space with probabilities {outcome: prob} (must sum to 1)
sample_space_probs = {1: 0.1, 2: 0.2, 3: 0.1, 4: 0.2, 
                      5: 0.1, 6: 0.3}

# Event A - even numbers
A = { 2, 4, 6 }

p_A = 0
for evt, prob in sample_space_probs.items():
    if evt in A:
        p_A += prob

print(f"Marginal probability of event A -> {p_A}")

# Event B - odd numbers (optimized)
B = { 1, 3, 5 }
B_set = set(B)

p_B = 0
for evt, prob in sample_space_probs.items():
    if evt in B_set:
        p_B += prob

print(f"Marginal probability of event B -> {p_B}")
 

### Joint Probability
* Events A and B occuring at the same time.
* Events A -> Number is even ; B -> Number greater than 3
* A and B are dependent (conditional)
  - P(A and B) = P(A|B) * P(B) - where P(A|B) is probability of A provided B has occured
* A and B are independent
  - P(A and B) = P(A) * P(B)

In [None]:
# Sample space as a set
sample_space = {1, 2, 3, 4, 5, 6}

# Events
A = {2, 4, 6}  # Even
B = {4, 5, 6}  # >3 Conditioning event / conditional sample space

p_B = len(B) / len(sample_space)
if p_B != 0:
    print(f"P(B) -> {p_B}")
    intersection = A & B
    print(f"intersection -> {intersection}")
    p_A_jnt_B = len(intersection) / len(sample_space)
    print(f"Joint P(A âˆ© B): {p_A_jnt_B}")

### Conditional Probability
* What is the probability of getting Even Number (event A) given that the number is greater than 3 (event B). Break into following
  - Roll the dice
  - Got a number > 3
  - Probabilty that it is Even
* P(A|B) = P(A intersection B) | P(B)

In [None]:
# sample space
sample_space = { 1, 2, 3, 4, 5, 6 }

# Event A - even number
A = { 2, 4, 6 }

# Event B - that number is > 3
B = { 4, 5, 6 }

A_intsct_B = A & B

print(f"A intersection B -> {A_intsct_B}")

# Conditional Probability P(A|B)
# given that number is > 3 (event B), probability of A
p_A_given_B = len(A_intsct_B) / len(A)
print(f"P(A given B) = {p_A_given_B}")


### Random Variable
* X be random variable representing sum of the 2 die
* X can take values 2 through 12

In [None]:
# Create Sample space - throw of 2 die
sample_space = set([(x, y) for x in range(1, 7) 
                    for y in range(1, 7) ])

In [None]:
# Set of samples when X = 4 
event_set = set([(x, y) for x, y in sample_space if x + y == 4 ])

In [None]:
# Probability of event_set
p_X = len(event_set) / len(sample_space)
p_X

In [None]:
# Random Variable X which assumes the sum of the 2 die
for sum in range(2, 13):
    event_set = set([(x, y) for x, y in sample_space if x + y == sum ])
    p_X = len(event_set) / len(sample_space)
    print(f"P(X = {sum}) -> {p_X}")


### Random roll of 2 die

In [None]:
num_rolls=1000
d1 = randint.rvs(1, 7, size=num_rolls)
d2 = randint.rvs(1, 7, size=num_rolls)
outcomes = np.array(list(zip(d1.tolist(), d2.tolist())))
outcomes

#### Get the counts for each outcome (x, y)

In [None]:
# Slightly slower method - okay for small arrays
outcome_counts = Counter(map(tuple, outcomes.tolist()))
print(outcome_counts)

In [None]:
# Faster method - suitablefor larger arrays
# Use return_counts=True to get the tally
unique_rows, counts = np.unique(outcomes, 
            axis=0, 
            return_counts=True)
outcome_counts = dict(zip(map(tuple, unique_rows.tolist()), 
                  counts.tolist()))

In [None]:
# Display the results
cell_enabled = False
if cell_enabled:
    print("Outcome Frequencies:")
    for outcome, count in outcome_counts.items():
        print(f"{outcome}: {count} times ({(count / num_rolls) * 100:.2f}%)")

* Get the keys (each tuple), values (count of each tuple) 

In [None]:
outcomes = list(outcome_counts.keys())
frequencies = list(outcome_counts.values())
print("list(outcome_counts.keys()):", outcomes)
print("list(outcome_counts.values()):", frequencies)

* Convert the tuple (x, y) to string format for labels

In [None]:
# Convert your (36, 2) array of pairs into a list of strings
outcome_labels = [str(tuple(row)) for row in outcomes]

#### Plot the frequency of occurence of Tuples

In [None]:
plt.bar(outcome_labels, frequencies, color='skyblue', edgecolor='black')
plt.title("Distribution of Die Rolls")
plt.xlabel("Die Face")
plt.ylabel("Frequency")
plt.xticks(rotation=90)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

## Continous Probability Distribution
* `Probability Density Function` (PDF)

* Generate Standard `Normal or Gaussian Distribution` data

In [None]:
# Generate emperical data 
mu, sigma = 0, 1
data = np.random.normal(mu, sigma, 1000)
np.set_printoptions(precision=3, suppress=True)
print(data[:50])

* Smooth the plot

In [None]:
# Normal Distribution, this is needed to be done to 
# plot a smooth "Probability Density Function"
x = np.linspace(min(data), max(data), 1000)
# print and check data
np.set_printoptions(precision=3, suppress=True)
print(x[:50])

* PDF or the `Normal or Gaussian Distribution`

In [None]:
pdf_normal = (1 / (np.sqrt(2 * np.pi) * sigma)) * \
            np.exp(-((x - mu) ** 2) / (2 * sigma ** 2))
# print and check data
np.set_printoptions(precision=3, suppress=True)
print(pdf_normal[:50])

* Plot the `PDF` for the `Gaussian Distribution`

In [None]:
# Calculate empirical distribution (Histogram with density)
plt.hist(data, bins=30, density=True, alpha=0.6, color='blue', label='Empirical Data')

# Plot the theoretical PDF
plt.plot(x, pdf_normal, 'r', linewidth=2, label='Theoretical PDF')

# Visulaization
plt.title('Comparison of Empirical and Theoretical Distributions')
plt.xlabel('Data Values')
plt.ylabel('Probability Density')
plt.legend()

# Show the plot
plt.show()

## PDF with `scipy.stats` Functions

In [None]:
# Common x-range
x = np.linspace(-10, 20, 1000)

# (mu, sigma, color)
distributions = [
    (0, 1, 'blue'),      # Standard normal
    (5, 2, 'green'),     # Mean shifted
    (0, 0.5, 'red'),     # Smaller variance
    (-3, 1.5, 'orange')  # Negative mean
]

In [None]:
# Plot styling (once)
for mu, sigma, color in distributions:
    y = norm.pdf(x, mu, sigma)
    plt.plot(
        x, y,
        color=color,
        lw=2,
        label=rf"$\mu$={mu}, $\sigma$={sigma}"
    )

plt.title("Normal Distributions with Different Parameters",
          fontsize=18, fontweight='bold')
plt.xlabel("x", fontsize=16, fontweight='bold')
plt.ylabel("Density", fontsize=16, fontweight='bold')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.legend(fontsize=14)
plt.grid(alpha=0.3)

plt.show()