In [None]:
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import Generator, PCG64

G = Generator(PCG64())


n = 100

def stick_breaking(n: int, alpha: float) -> np.ndarray:
    """
    Draws n samples from a stick-breaking process with beta distribution intensity alpha.

    :param n: number of samples
    :param alpha: intensity parameter of the beta distribution
    :returns: stick lengths
    """
    betas = G.beta(a=1.0, b=alpha, size=n)
    betas[1:] *= np.cumprod(1.0 - betas[:-1])
    weights = np.sort(betas)[::-1]
    return weights

def mandelbrot(x: np.ndarray, m: float, c: float, B: float):
    return m / np.power(c + x, B)

def plot_stick_lengths(stick_lengths: np.ndarray, alpha: float, B: float) -> None:
    """
    Plots -log2(sticks)
    :param sticks: list of stick lenghts
    """
    n = len(stick_lengths)
    x = np.arange(n) + 1
    y = mandelbrot(x, stick_lengths[0], 0.0, B)
    
    fig = plt.figure(figsize=(16, 9), dpi=400)
    subplot = fig.add_subplot(111, facecolor='white')
    subplot.plot(np.log2(x), -np.log2(stick_lengths), label=str('-log2(stick_lengths)'))
    subplot.plot(np.log2(x), -np.log2(y), label=str('-log2(mandelbrot)'))
    plt.legend()
    plt.show()

In [None]:
from numpy.linalg import norm

def zipf_law_norm(sample: np.ndarray, ord=None) -> float:
    n = len(sample)
    x = sample / sample[0] * (1 + np.arange(n))
    return norm(x - np.ones(n), ord=ord)

assert zipf_law_norm(np.array([24, 12, 8, 6, 4.8, 4])) == 1.1102230246251565e-16

In [None]:
from sys import float_info

def grid_alpha_sample(start: float, stop: float, step: float,
        num_iter: int) -> tuple:
    min_sample_norm = float_info.max
    for alpha in np.arange(start, stop, step):
        for _ in range(num_iter):
            sample = stick_breaking(n, alpha)
            sample_norm = zipf_law_norm(sample)
            if min_sample_norm > sample_norm:
                min_sample_norm = sample_norm
                min_alpha, min_sample = alpha, sample
    return min_alpha, min_sample

num_iter = 10000
step = 1.0 / 8.0
test_alpha = grid_alpha_sample(step, 250, step, num_iter)[0]
indent, step = 25, 1.0 / 16.0
alpha, sample = grid_alpha_sample(
    max(step, test_alpha - indent), test_alpha + indent, step, num_iter)
print(alpha)
print(sample)

In [None]:
fig = plt.figure(figsize=(16, 9), dpi=400)
subplot = fig.add_subplot(111, facecolor='white', title='stick_lengths')
subplot.plot(sample, label=str(alpha))
plt.legend()
plt.show()

In [None]:
fig = plt.figure(figsize=(16, 9), dpi=400)
subplot = fig.add_subplot(111, facecolor='white', title='stick_lengths')
subplot.plot(sample, label=str(alpha))
subplot.plot(mandelbrot(np.arange(n) + 1, sample[0], 0.0, 1.0), label='mandelbrot')
plt.legend()
plt.show()

In [None]:
plot_stick_lengths(sample, alpha, 1.0)