# Multivariate Statistics
- Multivariate normal, Mahalanobis distance, MANOVA
- Real examples: Anomaly detection, Multivariate testing

In [1]:
import numpy as np
from scipy import stats
from scipy.spatial import distance
print('Multivariate statistics module loaded')

Multivariate statistics module loaded


## Multivariate Normal Distribution
**Definition**: Extension to multiple dimensions
**Parameters**: μ (mean vector), Σ (covariance matrix)

In [2]:
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]]
mvn = stats.multivariate_normal(mean, cov)

print('Multivariate Normal\n')
samples = mvn.rvs(size=1000)
print(f'Generated {len(samples)} samples')
print(f'Sample mean: {samples.mean(axis=0)}')
print(f'Sample cov:\n{np.cov(samples.T)}')

Multivariate Normal

Generated 1000 samples
Sample mean: [0.05832266 0.06636369]
Sample cov:
[[0.9621052  0.43886369]
 [0.43886369 0.93605002]]


## Mahalanobis Distance
**Definition**: Distance accounting for covariance
**Use**: Outlier detection, similarity measure

In [3]:
# Mahalanobis distance
data = mvn.rvs(size=100)
mean_est = data.mean(axis=0)
cov_est = np.cov(data.T)

# Test point
point = np.array([3, 3])

# Mahalanobis distance
diff = point - mean_est
cov_inv = np.linalg.inv(cov_est)
mahal_dist = np.sqrt(diff @ cov_inv @ diff)

print(f'Mahalanobis distance: {mahal_dist:.4f}')
if mahal_dist > 3:
    print('  Potential outlier (>3 std devs)')

Mahalanobis distance: 3.5301
  Potential outlier (>3 std devs)


## Real Example: Anomaly Detection
**Scenario**: Detect unusual transactions

In [4]:
print('Transaction Anomaly Detection\n')
np.random.seed(42)

# Normal transactions
normal_amount = np.random.normal(100, 20, 1000)
normal_freq = np.random.normal(5, 1, 1000)
transactions = np.column_stack([normal_amount, normal_freq])

# Compute stats
mean_txn = transactions.mean(axis=0)
cov_txn = np.cov(transactions.T)
cov_inv = np.linalg.inv(cov_txn)

# Test new transactions
test_txns = np.array([[95, 5], [150, 3], [500, 15]])
print('Testing transactions:')
for i, txn in enumerate(test_txns):
    diff = txn - mean_txn
    mahal = np.sqrt(diff @ cov_inv @ diff)
    print(f'  Txn {i+1}: amount=${txn[0]:.0f}, freq={txn[1]:.0f}/day')
    print(f'    Mahalanobis: {mahal:.2f}', end='')
    if mahal > 3:
        print(' → ANOMALY')
    else:
        print(' → Normal')

Transaction Anomaly Detection

Testing transactions:
  Txn 1: amount=$95, freq=5/day
    Mahalanobis: 0.29 → Normal
  Txn 2: amount=$150, freq=3/day
    Mahalanobis: 3.21 → ANOMALY
  Txn 3: amount=$500, freq=15/day
    Mahalanobis: 23.08 → ANOMALY


## Summary
```python
# Multivariate normal
mvn = stats.multivariate_normal(mean, cov)
samples = mvn.rvs(size=n)
pdf = mvn.pdf(x)

# Mahalanobis
dist = mahalanobis(x, mean, cov_inv)
```