# Entropy and Information Measures
- Shannon entropy, Mutual information, KL divergence
- Real examples: Feature selection, Compression

In [None]:
import numpy as np
from scipy import stats
from scipy.special import rel_entr
print('Entropy and information module loaded')

## Shannon Entropy
**Definition**: H(X) = -Σ p(x) log₂ p(x)
**Interpretation**: Average information content

In [None]:
# Entropy calculation
probs1 = [0.5, 0.5]  # Fair coin
probs2 = [0.9, 0.1]  # Biased coin
probs3 = [0.25, 0.25, 0.25, 0.25]  # Uniform

entropy1 = stats.entropy(probs1, base=2)
entropy2 = stats.entropy(probs2, base=2)
entropy3 = stats.entropy(probs3, base=2)

print('Shannon Entropy (bits)\n')
print(f'Fair coin [0.5, 0.5]: H = {entropy1:.4f}')
print(f'Biased coin [0.9, 0.1]: H = {entropy2:.4f}')
print(f'4-sided die [0.25, ...]: H = {entropy3:.4f}\n')
print('Higher entropy = more uncertainty')

## Kullback-Leibler Divergence
**Definition**: KL(P||Q) = Σ p(x) log(p(x)/q(x))
**Use**: Measure distance between distributions

In [None]:
p = np.array([0.4, 0.3, 0.3])
q1 = np.array([0.35, 0.35, 0.3])  # Close
q2 = np.array([0.1, 0.1, 0.8])   # Far

kl1 = stats.entropy(p, q1)
kl2 = stats.entropy(p, q2)

print('KL Divergence\n')
print(f'P: {p}')
print(f'Q1: {q1} → KL(P||Q1) = {kl1:.4f}')
print(f'Q2: {q2} → KL(P||Q2) = {kl2:.4f}\n')
print('Larger KL = more different')

## Real Example: Text Compression
**Scenario**: Estimate compression potential

In [None]:
print('Text Compression Analysis\n')
text = 'the quick brown fox jumps over the lazy dog'

# Character frequency
chars, counts = np.unique(list(text), return_counts=True)
probs = counts / counts.sum()

# Entropy
entropy = stats.entropy(probs, base=2)
print(f'Text: "{text}"')
print(f'Unique chars: {len(chars)}')
print(f'Entropy: {entropy:.4f} bits/char\n')

# Compression potential
max_entropy = np.log2(len(chars))
compression_ratio = entropy / max_entropy
print(f'Max entropy (uniform): {max_entropy:.4f} bits/char')
print(f'Compression ratio: {compression_ratio:.2%}')
print(f'Potential savings: {(1-compression_ratio)*100:.1f}%')

## Summary
```python
# Entropy
H = stats.entropy(probs, base=2)

# KL Divergence
KL = stats.entropy(p, q)

# Mutual information
# Use sklearn.feature_selection.mutual_info_*
```