In [1]:
import powerlawrs 
import polars as pl
import numpy as np

In [2]:
file = "../reference_data/blackouts.txt"

# polars and pandas do NOT do a good job detecting headers, do not rely on them.
df = pl.read_csv(file, has_header=False)
data = df.to_series()

# API
## Fitting proceedure

In [3]:
# 1. Generate an alpha paramater via MLE for every x_min in the data 
x_mins, alphas = powerlawrs.estimation.find_alphas_fast(data)
print(f"n: {len(data)}, n_x_mins: {len(x_mins)}, n_alphas: {len(alphas)}")

n: 211, n_x_mins: 210, n_alphas: 210


In [4]:
# 2. Find the pair with the lowest KS statistic. This is the estimated best fit.
best_fit = powerlawrs.gof.gof(data, alphas=alphas, x_mins=x_mins)
print(f"{best_fit}")

Fitment(x_min=230000, alpha=1.2726372198302858, D=0.06067379629443781, len_tail=59)


Steps 1 and 2 above are abstracted away via ```powerlawrs.fit()```

## Parameter uncertainty

In [5]:
xm_std, a_std = powerlawrs.estimation.param_est(data, m=1000)
print(f"stdev (sample) x_min: {xm_std}, stdev (sample) alpha: {a_std}")

stdev (sample) x_min: 80391.46976724302, stdev (sample) alpha: 0.25567994835529795


## Hypothesis test 

In [6]:
# Run the experiment
# Set a minimum precsion of our p value of the KS test.
precision = 0.01 # p value should be accurate to with 0.01 
H0 = powerlawrs.hypothesis.hypothesis_test(data, precision, best_fit.alpha, best_fit.x_min, best_fit.D)

Generating M = 2500 simulated datasets of length n = 211 with tail size 59 and probability of the tail P(tail|data) = 0.2796208530805687


In [7]:
# hypothesis_test() calls powerlawrs.util.sim.calculate_sim_params() to determine the number of simulated datasets required given the desired precision. 
simparams_dict = powerlawrs.util.sim.calculate_sim_params(precision, data, best_fit.x_min)

In [8]:
# Which will require 2500 synthetic datasets of length 211. 59 of the 211 samples will be drawn from a Pareto Type I with the paramaters found above
simparams_dict

{'num_sims_m': 2500,
 'sim_len_n': 211,
 'n_tail': 59,
 'p_tail': 0.2796208530805687}

## Stats module

In [9]:
powerlawrs.stats.descriptive.mean(data)

253868.68246445496

In [10]:
powerlawrs.stats.descriptive.variance(data, 1)

372476564023.59814

In [11]:
powerlawrs.stats.random.random_choice(data, 3)

[490000.0, 660000.0, 113200.0]

In [12]:
powerlawrs.stats.random.random_uniform(3)

[0.5137942417750683, 0.4455520019532906, 0.7127623684774624]

In [13]:
# Define a standard normal CDF in Python
import math
norm_cdf = lambda x: 0.5 * (1 + math.erf(x / math.sqrt(2.0)))

sorted_data = [-1.1, -0.5, 0.1, 0.2, 1.5]

# Call your Rust function, passing the Python function as an argument
(d_plus, d_minus, d_max) = powerlawrs.stats.ks.ks_1sam_sorted(sorted_data, norm_cdf)

print(f"D+: {d_plus}")
print(f"D-: {d_minus}")
print(f"D max: {d_max}")

D+: 0.22074029056089706
D-: 0.13982783727702897
D max: 0.22074029056089706


## Util module

In [14]:
powerlawrs.util.linspace(0,10,5)

[0.0, 2.5, 5.0, 7.5, 10.0]

In [15]:
simparams_dict = powerlawrs.util.sim.calculate_sim_params(0.01, data, 230000)
simparams_dict

{'num_sims_m': 2500,
 'sim_len_n': 211,
 'n_tail': 59,
 'p_tail': 0.2796208530805687}

In [16]:
# convert simparams dict to rust struct
simparams_struct = powerlawrs.util.sim.PySimParams(**simparams_dict)

# use the struct as an argument
sim_data = powerlawrs.util.sim.generate_synthetic_datasets(data, 230000, simparams_struct, 1.27)

In [17]:
#Note the library does not yet impliment zeta distribution for discrete data. 
pl.from_numpy(np.array(sim_data))

column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26,column_27,column_28,column_29,column_30,column_31,column_32,column_33,column_34,column_35,column_36,…,column_174,column_175,column_176,column_177,column_178,column_179,column_180,column_181,column_182,column_183,column_184,column_185,column_186,column_187,column_188,column_189,column_190,column_191,column_192,column_193,column_194,column_195,column_196,column_197,column_198,column_199,column_200,column_201,column_202,column_203,column_204,column_205,column_206,column_207,column_208,column_209,column_210
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
407650.477093,29000.0,45000.0,242881.024909,20000.0,18000.0,274627.693661,268229.121678,244577.605328,296882.663145,60000.0,304979.793336,778107.02407,20000.0,25000.0,2900.0,15000.0,10000.0,75000.0,266238.251857,243812.558134,7500.0,252133.767998,120000.0,5300.0,25000.0,426041.886729,25000.0,90000.0,163000.0,164500.0,234670.83557,114500.0,51000.0,1.6586e6,60000.0,569204.152577,…,53000.0,43000.0,147000.0,24506.0,130000.0,26334.0,281516.260344,426484.446353,133000.0,40000.0,500112.162883,870533.139543,30001.0,33000.0,327372.615733,929000.946287,30000.0,973300.807765,124000.0,19000.0,163000.0,12000.0,337325.651879,58000.0,236761.174462,95630.0,190000.0,160000.0,40000.0,130000.0,32000.0,200000.0,55000.0,15000.0,90000.0,70000.0,2.8747e6
830541.988714,146000.0,376995.2124,50000.0,35000.0,25000.0,70000.0,56000.0,18819.0,25000.0,66005.0,40000.0,122000.0,562508.72078,30001.0,30000.0,32000.0,18819.0,12000.0,18000.0,9000.0,1.0150e6,59000.0,252688.829026,51000.0,5.3015e6,233543.575004,106850.0,43696.0,24000.0,941511.605988,45000.0,100000.0,29000.0,14273.0,36073.0,92000.0,…,160000.0,724209.651469,409177.508778,280155.037147,627127.641293,120000.0,839373.371408,145000.0,1646.0,43000.0,166000.0,160000.0,1800.0,50000.0,1.8223e6,25000.0,25000.0,210882.0,207200.0,25000.0,115000.0,397066.098147,337845.932966,451890.723522,2900.0,173000.0,644724.970073,318928.585662,1646.0,232377.328141,70000.0,65000.0,50000.0,148000.0,29000.0,601118.570671,32000.0
148000.0,50000.0,207200.0,100000.0,133000.0,513609.604735,18819.0,43000.0,158000.0,532818.991367,251022.058376,100000.0,75000.0,191000.0,50000.0,2900.0,363436.842135,18819.0,128000.0,12000.0,624596.251868,10300.0,40000.0,580663.444955,60000.0,456814.995202,805212.168744,91000.0,15000.0,243248.226582,9000.0,128000.0,20000.0,66005.0,62000.0,26334.0,5300.0,…,130000.0,100000.0,380458.359012,65000.0,100000.0,100000.0,55000.0,112000.0,367948.990635,190000.0,9000.0,50000.0,1.5542e6,120000.0,1.4056e6,2.1897e6,207200.0,526276.70063,70000.0,95630.0,203000.0,288547.965053,70000.0,40000.0,241751.382179,317950.46284,114500.0,331134.091703,25000.0,45000.0,70000.0,115000.0,43000.0,574422.923662,235699.212103,1.1705e6,100000.0
115000.0,148000.0,95000.0,50000.0,5.2067e8,686907.384168,191000.0,797102.814212,92000.0,11000.0,173000.0,299586.960169,56000.0,90000.0,60000.0,145000.0,114500.0,37000.0,244176.929411,90000.0,366069.75085,130000.0,58000.0,160000.0,262115.844884,146000.0,43696.0,232094.092417,70000.0,25000.0,11529.0,80000.0,10000.0,40000.0,74000.0,114500.0,11529.0,…,39500.0,63500.0,450427.608662,158000.0,50000.0,10000.0,39500.0,70000.0,71000.0,40000.0,163000.0,55000.0,40000.0,572312.616942,264882.312815,25000.0,7500.0,2.1965e6,24000.0,18000.0,74000.0,95630.0,210882.0,70000.0,234266.365803,445734.252373,338671.551195,10000.0,20000.0,9000.0,50462.0,236747.352563,160000.0,51000.0,173000.0,240848.683086,1.9868e6
365481.590695,12000.0,39500.0,3.8909e6,18351.0,90000.0,232320.605209,60000.0,261289.730167,2000.0,1.7645e6,65000.0,273173.513658,35000.0,51000.0,378627.401373,71000.0,230442.294037,82500.0,60000.0,30001.0,19000.0,55000.0,24506.0,50000.0,60000.0,244725.727453,231809.193367,70000.0,115000.0,128000.0,29000.0,114500.0,4.4434e6,160000.0,29000.0,200000.0,…,11529.0,210882.0,1646.0,245678.647445,50462.0,59000.0,471616.96042,409181.469484,2.2633e6,60000.0,92000.0,51000.0,271321.04816,71000.0,71000.0,431875.813273,342957.534462,55000.0,81000.0,30001.0,1.6394e6,258911.969999,1.2782e6,146000.0,50000.0,344387.287978,203000.0,494126.889549,579377.811095,10000.0,65000.0,100000.0,158000.0,1646.0,11529.0,948007.89177,40911.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
7500.0,81000.0,40911.0,50000.0,60000.0,272607.436637,37000.0,19000.0,32000.0,256957.07635,17000.0,32000.0,25000.0,39500.0,271865.854047,200000.0,65000.0,1.4358e6,268632.694644,113200.0,301700.502336,160000.0,281255.095443,975002.307073,18000.0,18000.0,294140.169604,1.1198e6,80000.0,399352.642423,115000.0,48000.0,46000.0,302999.467021,50000.0,81000.0,80000.0,…,304517.87144,263496.727518,120000.0,106850.0,1646.0,309004.537356,18351.0,236844.949742,39500.0,1000.0,25000.0,51000.0,25000.0,70000.0,235481.849927,75000.0,114500.0,58000.0,32000.0,191000.0,10000.0,80000.0,124000.0,95000.0,5.6131e6,63500.0,329789.044057,122000.0,24000.0,203000.0,43000.0,399791.55862,43000.0,309344.876225,60000.0,38500.0,75000.0
4150.0,438400.222758,24000.0,60000.0,1800.0,50462.0,20000.0,50000.0,75000.0,769303.449459,219000.0,71000.0,375963.824825,38500.0,128000.0,158000.0,106850.0,915501.246497,422369.05696,24000.0,160000.0,219000.0,160000.0,740723.592318,290631.259619,207200.0,18819.0,166000.0,1.2571e6,100000.0,173000.0,19000.0,29000.0,348979.579875,158000.0,65000.0,112000.0,…,82500.0,331926.013375,1000.0,38500.0,478570.592498,292750.477971,92000.0,51000.0,166000.0,112000.0,493480.60704,11529.0,243729.593603,2000.0,12000.0,20000.0,5300.0,160000.0,190000.0,173000.0,148000.0,74000.0,2.4998e6,25000.0,100000.0,71000.0,130000.0,120000.0,65000.0,122000.0,2000.0,394931.782581,459650.549051,424026.680391,8000.0,1.0399e6,55000.0
145000.0,412044.859989,70000.0,36073.0,36073.0,2.3405e6,120000.0,122000.0,81000.0,71000.0,59000.0,160000.0,580002.063167,114500.0,36073.0,18351.0,91000.0,230657.770724,55000.0,71000.0,40911.0,130000.0,55000.0,937274.73403,231101.396809,371840.578583,24506.0,630348.062672,492243.276518,166000.0,993774.686422,112000.0,242193.857397,40911.0,60000.0,145000.0,1.5085e6,…,1800.0,59000.0,25000.0,241540.354521,160000.0,300712.630995,259293.432024,142000.0,65000.0,627295.808207,112000.0,43696.0,777535.561379,15000.0,30500.0,66005.0,90000.0,32000.0,1800.0,7500.0,112000.0,66005.0,50000.0,723584.620171,106850.0,251989.188209,1000.0,3.0095e6,43696.0,429756.71589,114500.0,1000.0,19000.0,70000.0,29000.0,71000.0,43696.0
80000.0,235566.492972,158000.0,30500.0,439608.116289,30000.0,43000.0,239545.479052,210882.0,50000.0,637314.507939,245092.339772,25000.0,239085.974287,2.4294e6,11000.0,43696.0,10000.0,10300.0,300965.486698,20000.0,29900.0,751211.815417,33000.0,60000.0,163000.0,604717.344162,173000.0,270629.542913,1.0759e6,70000.0,379557.451586,32000.0,70000.0,43696.0,50000.0,191000.0,…,58000.0,1.2046e6,60000.0,11529.0,335416.174882,232638.403786,382351.718592,255677.107091,91000.0,282227.133832,24506.0,71000.0,2.1708e6,45000.0,166000.0,145000.0,30000.0,66005.0,15000.0,15000.0,56000.0,56000.0,7500.0,268164.056971,5300.0,130000.0,100000.0,237631.172911,70000.0,100000.0,1.9962e6,163000.0,74000.0,126000.0,244996.782881,50000.0,30500.0


# Distributions
## Generic Power-Law

In [18]:
# instantiate the class
pl_class = powerlawrs.dist.powerlaw.Powerlaw(2.2726, 230000)

In [19]:
# pdf
pl_class.pdf(500000)

9.47430869971139e-07

In [20]:
# cdf
pl_class.cdf(500000)

0.627757791147596

In [21]:
# ccdf
pl_class.ccdf(500000)

0.372242208852404

In [22]:
# rv
# generate random U(0,1)
u = np.random.rand()
pl_class.rv(u)

287494.7827630799

## Pareto Type I

In [23]:
# instantiate the class
pareto_class = powerlawrs.dist.pareto.Pareto(1.2726, 230000)

In [24]:
# pdf
pareto_class.pdf(500000)

9.474308699711417e-07

In [25]:
# cdf
pareto_class.cdf(500000)

0.6277577911475959

In [26]:
# ccdf
pareto_class.ccdf(500000)

0.3722422088524041

In [27]:
# rv
# generate random U(0,1)
u = np.random.rand()
pareto_class.rv(u)

317916.55245199485

## Exponential

In [28]:
# instantiate the class
expo_class = powerlawrs.dist.exponential.Exponential(1.5)

In [29]:
# pdf
expo_class.pdf(2)

0.07468060255179593

In [30]:
# cdf
expo_class.cdf(2)

0.950212931632136

In [31]:
# ccdf
expo_class.ccdf(2)

0.04978706836786395

In [32]:
# rv
# generate random U(0,1)
u = np.random.rand()
expo_class.rv(u)

0.6435778122971114