In [161]:
import altair as alt
import pandas as pd

# Create sample data
df = pd.DataFrame({
    'x': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'y': [2, 4, 5, 4, 6, 7, 8, 7, 9, 10]
})

# Create scatter plot with regression line
chart = alt.Chart(df).mark_point(opacity=0.7).encode(
    x='x',
    y='y'
) + alt.Chart(df).mark_line(color='red').encode(
    x='x',
    y=alt.Y('y', aggregate='mean')
)

chart.properties(title='Altair Regression Plot')

In [4]:
from math import log
log(5) / log(4)

1.160964047443681

# Notes

I want to compare convergence rates for Poisson and Pareto.

I'm going to assume a population mean of 15 minutes (TTR). 

In [6]:
from scipy.stats import poisson

## Poisson distribution

mean = $ \lambda $

Let's assume a mean of 15 minutes, and show the PMF from 0 to 8 hours (480 minutes)

In [9]:
mn = 15

In [11]:
poisson.pmf(2,mn)

np.float64(3.4414011056455366e-05)

In [18]:
import numpy as np

# Set the lambda parameter
lam = mn

# Generate x values (possible number of events)
x = np.arange(0, 480)  

# Calculate the PMF values
pmf_values = poisson.pmf(x, lam)

# Create a DataFrame
df = pd.DataFrame({
    'x': x,
    'PMF': pmf_values
})

xmax = 40

# Create the Altair chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('x:Q', title='Number of Events', scale=alt.Scale(domain=(0, xmax))),
    y=alt.Y('PMF:Q', title='Probability'),
    tooltip=['x', 'PMF']
).properties(
    title=f'Poisson Distribution PMF (λ = {lam})',
    width=600,
    height=400
)

# Display the chart
chart

## Pareto distribution

Wikipedia: <https://en.wikipedia.org/wiki/Pareto_distribution>

There are two parameters: $\alpha$, $x_m$

pdf = $\frac{\alpha x^\alpha_m}{x^{\alpha+1}}$

mean = $\frac{\alpha x_m}{\alpha - 1}$

SciPy: <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pareto.html>

pdf = $\frac{b}{x^{b+1}}$

From the docs:

> The probability density above is defined in the “standardized” form. To shift and/or scale the distribution use the loc and scale parameters.

I want a b of $\frac{\log(5)}{\log(4)}$, I think. 

And then I need to figrue out the scale to get a mean of 15.

In [25]:
from scipy.stats import pareto

b = log(5)/log(4)
scale = mn / pareto.stats(b, moments='m')
pareto.stats(b, scale=scale, moments='m')

np.float64(15.0)

In [30]:
# Generate x values (possible number of events)
x = np.arange(0, 480)  

# Calculate the pdf values
pdf_values = pareto.pdf(x, b=b, scale=scale)


# Create a DataFrame
df = pd.DataFrame({
    'x': x,
    'PDF': pdf_values
})


xmax = 40

# Create the Altair chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('x:Q', title='Number of Events', scale=alt.Scale(domain=(0, xmax))),
    y=alt.Y('PDF:Q', title='Probability'),
    tooltip=['x', 'PDF']
).properties(
    title=f'Pareto Distribution PDF (b = {b:.2f}, scale = {scale:.2f})',
    width=600,
    height=400
)

# Display the chart
chart

# Convergence to the mean

Let's do a plot of average poisson and pareto and show how the sample rate of convergence to the mean. They both should converge to a mean of 15. 

We'll do 1000 points

In [51]:
N = 1000

mu = mn
rvs_poisson = poisson.rvs(mu=mu, size=N)
avg_poisson = np.cumsum(rvs_poisson) / np.arange(1, N+1)
df_poisson = pd.DataFrame({
    'x': np.arange(1, N+1),
    'MEAN': avg_poisson
})

In [53]:
chart = alt.Chart(df_poisson).mark_line(color='red').encode(
    x='x',
    y=alt.Y('MEAN', aggregate='mean', scale=alt.Scale(domain=(10, 20)))
)

chart.properties(title='Poisson', width=600, height=400)

In [63]:
N = 5000
b = log(5)/log(4)
scale = mn / pareto.stats(b, moments='m')

rvs_pareto = pareto.rvs(b, scale=scale, size=N)

In [64]:
avg_pareto = np.cumsum(rvs_pareto) / np.arange(1, N+1)
df_pareto = pd.DataFrame({
    'x': np.arange(1, N+1),
    'MEAN': avg_pareto
})

chart = alt.Chart(df_pareto).mark_line(color='red').encode(
    x='x',
    y=alt.Y('MEAN', aggregate='mean')
)

chart.properties(title='Pareto', width=600, height=400)

# Zeta (zipf) distribution

scipy: <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.zipf.html>

Want a mena of 15.

The mean is a little tricky, because it's a function of a Riemann function, which has no closed-form solution


In [65]:
from scipy.stats import zipf

In [160]:
zipf.stats(2.04251395975, moments='m')

np.float64(15.000000015276777)

In [None]:
a = 2.04251395975