# Understaning Minimum Entropy Loss

In [None]:
from utils import *

In [None]:
# quantization parameters
EPSI = 0.5 # quantization step
MAX_SIG = 2.5 # maximum value of the signal (symmetric around 0)
NLEVELS = 2*int(MAX_SIG/EPSI) + 1 # number of quantization levels
LEVELS = th.tensor([i*EPSI for i in range(-int(MAX_SIG/EPSI), 1)] + [i*EPSI for i in range(1, int(MAX_SIG/EPSI)+1)])
print(f"Quantization step: {EPSI}, Number of levels: {NLEVELS}, \nLEVELS: {LEVELS}")
assert len(LEVELS) == NLEVELS

In [None]:
# generate a random signal as a sum of random frequencies
N_FREQS = 5
N_SAMPLES = 100
x = create_random_signal(N_SAMPLES, N_FREQS)

#plot the signal
plt.figure(figsize=(10, 2))
plt.stem(x)
plt.title('Signal')
plt.show()

In [None]:
# quantize the signal
xq = quantize(x, LEVELS)

print(f'levels: {th.unique(xq)}')
print(f'number of levels: {th.unique(xq).shape[0]}')
print(f'calc number of levels: {NLEVELS}')

# plot the quantized signal
plt.figure(figsize=(10, 2))
plt.stem(xq, label='quantized')
plt.title('Quantized Signal')
plt.show()

In [None]:
# soft quantize the signal
temp = 1
xsq, xsa = soft_quantize(x, LEVELS, temp)

# plot the quantized signal
plt.figure(figsize=(10, 5))
plt.subplot(2, 1, 1)
plt.stem(xq, label='quantized')
plt.xticks([]), plt.yticks([])
plt.title('Soft Quantized Signal')

plt.subplot(2, 1, 2)
plt.imshow(xsa.T, aspect='auto', origin='lower', interpolation='none')
plt.xticks([]), plt.yticks([])
plt.title('Soft Assignment')

## Calculate Entropy
### in different ways

In [None]:
# standard entropy, caculated counting the number of times each level appears
h1 = entropy(xq)
print(f'Standard entropy: {h1}')

# ENTROPY IS NOT DIFFERENTIABLE
## But apparently these mutherfuckers found a way to do it
$$
 \frac{\partial{H}}{\partial{r_i}} = \lim_{b \to \infty} \sum_{j=0}^{|S|} [1 + \ln p(s_j)] * R(r_i - s_j)
$$

with $R$:

$$
R(r_i - s_j) = \frac{b}{|r|\varepsilon^b} \frac{(r_i - s_j)^{b-1}}{\left[\frac{(r_i -
s_j)^b}{\varepsilon^b} + 1\right]^2} $$

Master thesis version:

$$ 
R = \frac{b}{\left( \text{size}(rq) \cdot \varepsilon^b \right)} \cdot \frac{(rq - s_j)^{b-1}}{\left( \frac{(rq - s_j)^b}{\varepsilon^b} + 1 \right)^2}
$$

In [None]:
# # see what this fucking function actually looks like
# import numpy as np
# def dentropy(rq, b=10.0, ε=0.1):
#     symbols, counts = np.unique(rq, return_counts=True)
#     p = counts/len(rq)
#     # logp = np.log2(p + 1e-8)
#     logp = np.log(p + 1e-8)
#     H = -np.sum(p*logp) # entropy
#     sizer = len(rq)
#     DH = 0
#     for j in range(len(symbols)):
#         DH += (1+logp[j])*b / (sizer*ε**b) * (rq-symbols[j])**(b-1) / (((rq-symbols[j])/ε)**b+1)**2
#     return H, DH

# H, DH = dentropy(xq, b=10, ε=EPSI)

# print(f'Entropy: {H:.2f}')
# print(f'Gradient: {DH}')

In [None]:
# def dentropy2(rq, ε=0.1): # importance sampling based entropy calculation #https://en.wikipedia.org/wiki/Kernel_density_estimation
#     def normal(x, μ, σ): return np.exp(-0.5*((x-μ)/σ)**2)/(σ*np.sqrt(2*π))
    
#     # sample m points from a isotropic gaussian
#     m = 300
#     samples = np.random.randn(m)
#     # samples = np.linspace(-1, 1, m)
#     likelihoods = normal(samples, 0, 1)

#     σ = 5*ε

#     #calculate pdf of the quantized signal
#     tot = 0
#     for s,l in zip(samples, likelihoods):
#         p = np.mean(normal(s, rq, σ))
#         ent = -p*np.log(p+1e-8) / l
#         tot += ent
#     entropy = tot/m 

#     return entropy

# H = dentropy2(xq, ε=EPSI)
# print(f'Entropy: {H:.2f}')

## Let's see if there is correlation between the differentiable functions and the real entropy

In [None]:
# test on a lot of tries
Hx, Hy = [], []
h1_loss = HLoss1(LEVELS) 
h2_loss = HLoss2(LEVELS) 
# h3_loss = HLoss3(LEVELS) 
for _ in range(2000):
    # generate a random signal as a sum of random frequencies
    x = create_random_signal(N_SAMPLES, N_FREQS)

    # X
    # measure entropy of the quantizedsignal
    Hx.append(entropy(quantize(x, LEVELS))) 
    # Hx.append(h2_loss(x.view(1,-1)).item())

    # Y
    # Hy.append(h1_loss(x.view(1,-1)).item())
    Hy.append(h2_loss(x.view(1,-1)).item())
    # Hy.append(h3_loss(x.view(1,-1)).item())

Hx, Hy = th.tensor(Hx), th.tensor(Hy)

# get the best linear fit between Hx and Hy
A = th.vstack([Hx, th.ones(len(Hx))]).T
m, c = th.linalg.lstsq(A, Hy, rcond=None)[0]
print(f'best fit: y = {m:.2f}x + {c:.2f}')

plt.figure(figsize=(10, 5))
plt.scatter(Hx, Hy, s=1)
plt.plot(Hx, m*Hx + c, color='red')
plt.xlabel('Entropy 1')
plt.ylabel('Entropy 2')
# plt.ylim([0, 8])
plt.show()
