单元测试：验证 `plcoding.source` 模块是否工作正常，以及能否实现对 `torch` 库的 `tensor` 数据的压缩

In [1]:
import numpy as np
import torch
from plcoding.source import *

In [2]:
# 随机生成一个信源、并产生一批待压缩离散符号序列
N, q = (1 << 14), 256
src_prob = np.random.rand(q); src_prob /= np.sum(src_prob)
entropy = (-np.log2(src_prob) * src_prob).sum()
sym = np.random.choice(a=q, p=src_prob, size=(N,))

In [3]:
# 使用这些符号的PMF来进行压缩
pmf = np.tile(src_prob, (N, 1))
data = encode_pmf(pmf, sym)
sym_ = decode_pmf(pmf, data)
# 对比当次压缩率与平均最优界
assert (sym == sym_).all()
print(f"Shannon bound:   \t{entropy / np.log2(q) * 100:.2f}%")
print(f"Compression rate:\t{len(data) * 8 / N / np.log2(q) * 100:.2f}%")

Shannon bound:   	96.26%
Compression rate:	96.51%


In [4]:
# 使用这些符号的CDF来进行压缩
src_cprob = np.concatenate([np.array([0]), np.round(np.cumsum(src_prob) * (1 << 16)).astype(np.int16)])
cdf_troch = torch.from_numpy(np.tile(src_cprob, (N, 1)))
sym_torch = torch.from_numpy(sym)
data_cdf = encode_int16_cdf(cdf_troch, sym_torch)
sym_torch_ = decode_int16_cdf(cdf_troch, data_cdf)
# 对比当次压缩率与平均最优界
assert (sym_torch == sym_torch_).all()
print(f"Shannon bound:   \t{entropy / np.log2(q) * 100:.2f}%")
print(f"Compression rate:\t{len(data_cdf) * 8 / N / np.log2(q) * 100:.2f}%")

Shannon bound:   	96.26%
Compression rate:	96.51%
