# Probability Distributions: Discrete
- Binomial, Poisson, Geometric, Negative Binomial distributions
- Real examples: Quality control, Traffic modeling, Sports analytics

In [None]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
print('Discrete probability distributions module loaded')

## Binomial Distribution

**PMF**: \( P(X=k) = \binom{n}{k} p^k (1-p)^{n-k} \)

**Parameters**: n (trials), p (success probability)

**Properties**:
- Fixed number of trials
- Independent trials
- Binary outcomes
- Mean = np, Variance = np(1-p)

**Applications**: Quality control, clinical trials, polling

In [None]:
# Binomial distribution
n_trials = 20
p_success = 0.3
binom_dist = stats.binom(n=n_trials, p=p_success)

print(f'Binomial Distribution: B({n_trials}, {p_success})\n')

print('Statistics:')
print(f'  Mean: {binom_dist.mean():.2f}')
print(f'  Std Dev: {binom_dist.std():.2f}')
print(f'  Variance: {binom_dist.var():.2f}\n')

# Probabilities
print('Probabilities:')
for k in [0, 5, 10, 15, 20]:
    prob = binom_dist.pmf(k)
    print(f'  P(X = {k:2d}) = {prob:.6f}')

print()
print('Cumulative:')
for k in [5, 10, 15]:
    cum_prob = binom_dist.cdf(k)
    print(f'  P(X ≤ {k:2d}) = {cum_prob:.4f}')

## Real Example: Manufacturing Quality Control

**Problem**: Inspecting batch of products
**Defect rate**: 5%
**Sample size**: 50 items
**Decision**: Reject batch if ≥4 defects

In [None]:
# Quality control inspection
print('Quality Control: Acceptance Sampling\n')

batch_size = 50
defect_rate = 0.05
threshold = 4

inspection_dist = stats.binom(n=batch_size, p=defect_rate)

print(f'Inspection plan:')
print(f'  Sample size: {batch_size}')
print(f'  True defect rate: {defect_rate*100}%')
print(f'  Reject if defects ≥ {threshold}\n')

# Operating characteristic
print('Expected results:')
print(f'  Expected defects: {inspection_dist.mean():.2f}')
print(f'  Std Dev: {inspection_dist.std():.2f}\n')

# Acceptance probability
p_accept = inspection_dist.cdf(threshold - 1)
p_reject = 1 - p_accept

print('Decision probabilities:')
print(f'  P(Accept batch) = {p_accept*100:.2f}%')
print(f'  P(Reject batch) = {p_reject*100:.2f}%\n')

# Probability of specific outcomes
print('Probability distribution:')
for k in range(0, 8):
    prob = inspection_dist.pmf(k)
    status = 'Accept' if k < threshold else 'Reject'
    print(f'  {k} defects: {prob*100:.2f}% [{status}]')

## Poisson Distribution

**PMF**: \( P(X=k) = \frac{\lambda^k e^{-\lambda}}{k!} \)

**Parameter**: λ (rate)

**Properties**:
- Events in fixed interval
- Mean = Variance = λ
- Approximates binomial when n large, p small

**Applications**: Traffic arrivals, call center, website hits

In [None]:
# Poisson distribution
lambda_rate = 5  # average events per interval
poisson_dist = stats.poisson(mu=lambda_rate)

print(f'Poisson Distribution (λ = {lambda_rate})\n')

print('Statistics:')
print(f'  Mean: {poisson_dist.mean():.2f}')
print(f'  Std Dev: {poisson_dist.std():.2f}')
print(f'  Note: Mean = Variance = λ\n')

print('Probabilities:')
for k in range(0, 11):
    prob = poisson_dist.pmf(k)
    print(f'  P(X = {k:2d}) = {prob:.6f}')

## Real Example: Call Center Staffing

**Problem**: Customer service call center
**Average calls**: 3 per minute
**Task**: Determine staffing levels

In [None]:
# Call center analysis
print('Call Center Staffing Analysis\n')

avg_calls_per_min = 3
call_dist = stats.poisson(mu=avg_calls_per_min)

print(f'Call rate: {avg_calls_per_min} calls/minute average\n')

# Probability of different call volumes
print('Call volume probabilities (per minute):')
for k in range(0, 10):
    prob = call_dist.pmf(k)
    cum_prob = call_dist.cdf(k)
    print(f'  {k} calls: {prob*100:5.2f}% | P(≤{k}) = {cum_prob*100:5.2f}%')

print()

# Staffing decision
print('Staffing recommendations:')
for service_level in [0.90, 0.95, 0.99]:
    required_capacity = poisson_dist.ppf(service_level)
    print(f'  {service_level*100:.0f}% service level: handle up to {int(required_capacity)} calls/min')

print()

# Peak hour (5 calls/min)
peak_dist = stats.poisson(mu=5)
print('Peak hour (5 calls/min):')
print(f'  P(>7 calls) = {1-peak_dist.cdf(7):.4f}')
print(f'  P(≤3 calls) = {peak_dist.cdf(3):.4f}')

## Geometric Distribution

**PMF**: \( P(X=k) = (1-p)^{k-1} p \)

**Parameter**: p (success probability)

**Properties**:
- Trials until first success
- Memoryless
- Mean = 1/p

**Applications**: Time to first failure, customer acquisition

In [None]:
# Geometric distribution
p_success = 0.2
geom_dist = stats.geom(p=p_success)

print(f'Geometric Distribution (p = {p_success})\n')

print('Statistics:')
print(f'  Mean trials until success: {geom_dist.mean():.2f}')
print(f'  Std Dev: {geom_dist.std():.2f}\n')

print('Probabilities (trials until first success):')
for k in range(1, 11):
    prob = geom_dist.pmf(k)
    cum_prob = geom_dist.cdf(k)
    print(f'  {k:2d} trials: {prob*100:5.2f}% | P(≤{k}) = {cum_prob*100:5.2f}%')

## Real Example: Sales Conversion

**Problem**: Cold calling sales
**Conversion rate**: 15%
**Task**: Expected calls until first sale

In [None]:
# Sales conversion analysis
print('Sales Cold Calling Analysis\n')

conversion_rate = 0.15
sales_dist = stats.geom(p=conversion_rate)

print(f'Conversion rate: {conversion_rate*100}%\n')

print('Expected performance:')
print(f'  Average calls until sale: {sales_dist.mean():.1f}')
print(f'  Median calls: {sales_dist.median():.0f}\n')

print('Probabilities:')
print(f'  First sale within 5 calls: {sales_dist.cdf(5)*100:.1f}%')
print(f'  First sale within 10 calls: {sales_dist.cdf(10)*100:.1f}%')
print(f'  Need >20 calls: {(1-sales_dist.cdf(20))*100:.1f}%\n')

# Daily performance (40 calls)
calls_per_day = 40
expected_sales = calls_per_day * conversion_rate
print(f'Daily performance ({calls_per_day} calls):')
print(f'  Expected sales: {expected_sales:.1f}')
print(f'  Probability of at least 1 sale: {(1-(1-conversion_rate)**calls_per_day)*100:.2f}%')

## Negative Binomial Distribution

**PMF**: Trials until r successes

**Parameters**: r (successes), p (probability)

**Properties**:
- Generalization of geometric
- Mean = r/p
- Used for overdispersed count data

**Applications**: Reliability, epidemiology, marketing

In [None]:
# Negative binomial
r_successes = 5
p_success = 0.3
nbinom_dist = stats.nbinom(n=r_successes, p=p_success)

print(f'Negative Binomial (r={r_successes}, p={p_success})\n')

print('Statistics:')
print(f'  Mean trials for {r_successes} successes: {nbinom_dist.mean():.2f}')
print(f'  Std Dev: {nbinom_dist.std():.2f}\n')

print('Probabilities (failures before r successes):')
for k in [0, 5, 10, 15, 20]:
    prob = nbinom_dist.pmf(k)
    print(f'  {k:2d} failures: {prob*100:5.2f}%')

## Summary

### Discrete Distributions:
```python
from scipy import stats

# Binomial
binom = stats.binom(n=trials, p=prob)

# Poisson
poisson = stats.poisson(mu=lambda_rate)

# Geometric
geom = stats.geom(p=prob)

# Negative Binomial
nbinom = stats.nbinom(n=successes, p=prob)

# Common operations
prob = dist.pmf(k)  # P(X = k)
cum_prob = dist.cdf(k)  # P(X ≤ k)
quantile = dist.ppf(p)
samples = dist.rvs(size=n)
```

### Applications Summary:

**Binomial**: Fixed trials, quality control, A/B tests  
**Poisson**: Rare events, arrivals, counts per time  
**Geometric**: Time to first success, customer acquisition  
**Negative Binomial**: Time to r successes, overdispersion  

### Selection Guide:
- **Fixed trials, binary outcome**: Binomial
- **Events in time/space, rare**: Poisson
- **Trials until first success**: Geometric
- **Trials until r successes**: Negative Binomial
- **Count data with overdispersion**: Negative Binomial