In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Step 1: Fake data
torch.manual_seed(0)
X = torch.randn(10, 4)  # 10 data points, 4 features
y = torch.randint(0, 2, (10,)).float()  # Binary labels

# Step 2: Tiny neural net (1-layer logistic reg)
class SimpleClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))  # outputs probabilities

model = SimpleClassifier()

# Step 3: Get predicted probabilities
with torch.no_grad():
    y_pred = model(X).squeeze()

# Step 4: Compute BCE manually and bit lengths
eps = 1e-9  # to avoid log(0)
log_probs = - (y * torch.log2(y_pred + eps) + (1 - y) * torch.log2(1 - y_pred + eps))

# Total loss = sum of bits needed to encode the labels
total_bits = log_probs.sum()

# Compare with BCE loss from PyTorch
bce_loss = F.binary_cross_entropy(y_pred, y, reduction='sum') / torch.log(torch.tensor(2.0))  # convert from nats to bits

print("Label truths:", y.tolist())
print("Predicted probabilities:", y_pred.tolist())
print("Per-sample bit lengths:", log_probs.tolist())
print(f"Total bits needed: {total_bits.item():.4f}")
print(f"BCE loss (in bits): {bce_loss.item():.4f}")


Label truths: [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
Predicted probabilities: [0.30441972613334656, 0.19742408394813538, 0.43314653635025024, 0.4970138669013977, 0.4872819781303406, 0.5571728348731995, 0.45604804158210754, 0.5146146416664124, 0.3258282244205475, 0.4745754599571228]
Per-sample bit lengths: [1.715866208076477, 0.31729018688201904, 0.8189522624015808, 1.0086419582366943, 1.0371712446212769, 1.1751843690872192, 1.1327422857284546, 1.0427974462509155, 1.6178165674209595, 0.928444504737854]
Total bits needed: 10.7949
BCE loss (in bits): 10.7949


In [3]:
import numpy as np

# 🔹 True label distribution (ground truth)
q = np.array([0.6, 0.3, 0.1])  # Class 0 is most common

# 🔸 Model's predicted distribution
p = np.array([0.5, 0.4, 0.1])  # Slightly off

# Make sure they're proper distributions
assert np.isclose(np.sum(q), 1.0)
assert np.isclose(np.sum(p), 1.0)

# Function to compute entropy
def entropy(dist):
    return -np.sum(dist * np.log2(dist + 1e-12))

# Function to compute cross-entropy
def cross_entropy(q, p):
    return -np.sum(q * np.log2(p + 1e-12))

# Compute values
H_q = entropy(q)
H_qp = cross_entropy(q, p)
KL_q_p = H_qp - H_q

# Display nicely
print("🔹 True distribution q:", q)
print("🔸 Model prediction p:", p)
print()
print(f"Entropy H(q):          {H_q:.4f} bits")
print(f"Cross-Entropy H(q, p): {H_qp:.4f} bits")
print(f"KL(q || p):            {KL_q_p:.4f} bits (extra cost)")


🔹 True distribution q: [0.6 0.3 0.1]
🔸 Model prediction p: [0.5 0.4 0.1]

Entropy H(q):          1.2955 bits
Cross-Entropy H(q, p): 1.3288 bits
KL(q || p):            0.0333 bits (extra cost)


In [5]:
from scipy.stats import binom
n=1000
p=0.1
quantile_exact = binom.ppf(0.99, n, p)
print(f"Exact 99th percentile: {quantile_exact}")

Exact 99th percentile: 123.0


In [6]:
import numpy as np
from scipy.optimize import fsolve

def kl_divergence(q, p):
    return q * np.log(q / p) + (1 - q) * np.log((1 - q) / (1 - p))

n = 1000
p = 0.1
alpha = 0.99

threshold = np.log(1 / (1 - alpha)) / n

# Solve for q such that D(q || p) ≈ threshold
def equation(q):
    return kl_divergence(q, p) - threshold

q_guess = p + 0.05  # Guess a bit above p
q_99 = fsolve(equation, q_guess)[0]
x_99 = int(np.ceil(n * q_99))

print(f"Approximate 99th quantile (q = {q_99:.4f}): x = {x_99}")


Approximate 99th quantile (q = 0.1300): x = 130


In [7]:
import numpy as np
from scipy.optimize import fsolve

def kl(q, p):
    return q * np.log(q / p) + (1 - q) * np.log((1 - q) / (1 - p))

n = 1000
p = 0.1
alpha = 0.99

# Solve D(q || p) = log(1 / (1 - alpha)) / n
target = np.log(1 / (1 - alpha)) / n

def to_solve(q):
    return kl(q, p) - target

q_init = p + 0.05
q_99 = fsolve(to_solve, q_init)[0]
x_99 = int(np.ceil(n * q_99))

print(f"q_99 = {q_99:.4f} --> Approx. 99th quantile: x = {x_99}")


q_99 = 0.1300 --> Approx. 99th quantile: x = 130


In [16]:
-0.1*np.log2(0.1)-0.9*np.log2(0.9)

np.float64(0.4689955935892812)

In [17]:
1000*0.46

460.0

In [18]:
sigma_squared=4

effective_sample_size = 2**(1/2*np.log2(2*np.pi*np.e*sigma_squared))
effective_sample_size

np.float64(8.265462708244986)

In [14]:
2**(1000*0.32)

2.13598703592091e+96

In [19]:
1/3**4>(1/16*1/2*1/4)

True

In [21]:
(0.3*0.3)/(0.3*0.3+0.6*0.7)

0.1764705882352941