In [1]:
import numpy as np
import os

In [2]:
data_size = 10**4
sim_size = 10**5
n_iterations = 5
n_bootstraps = 500
epsilon = 1e-8
rcond = 1e-3

rng_seed = 5048
rng = np.random.default_rng(seed = rng_seed )

mu_true, var_true = 0.2, 0.81
mu_gen, var_gen = 0.0, 1.0


#smearing = 0.00001
smearing = 0.25
#smearing = 0.50
#smearing = 0.75

min_bin_width = 0.20


output_dir = 'input-samples-smearing-%.2f-v1a' % smearing

In [3]:
truth = rng.normal(mu_true, np.sqrt(var_true), (n_bootstraps, data_size))
data = rng.normal(truth, smearing)
gen = rng.normal(mu_gen, np.sqrt(var_gen), (n_bootstraps, sim_size))
sim = rng.normal(gen, smearing)

In [4]:

bins = [truth.min()]
i = 0
while bins[-1] < truth.max() and i < len(bins):
    for binhigh in np.linspace(bins[i] + epsilon, truth.max(), 200):
        in_bin = (truth[0] > bins[i]) & (truth[0] < binhigh)
        in_reco_bin = (data[0] > bins[i]) & (data[0] < binhigh)
        if np.sum(in_bin) > 0:
            purity = np.sum(in_bin & in_reco_bin) / np.sum(in_bin)
            if purity > (0.5):
                binwid = binhigh - bins[-1]
                if binwid < min_bin_width :
                    binhigh = bins[-1] + min_bin_width
                #print(f"{binhigh = }, {purity = }")
                print(' %3d :  binhigh = %7.3f, purity = %7.3f' % (i, binhigh, purity))
                i += 1
                bins.append(binhigh)
                break
    else:
        break
        
#-- add a last bin
bins.append(truth.max())
        
bins = np.array(bins[1:-1])
bin_widths = np.diff(bins)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
n_bins = len(bins) - 1

   0 :  binhigh =  -3.339, purity =   1.000
   1 :  binhigh =  -2.920, purity =   1.000
   2 :  binhigh =  -2.720, purity =   1.000
   3 :  binhigh =  -2.520, purity =   0.625
   4 :  binhigh =  -2.105, purity =   0.611
   5 :  binhigh =  -1.748, purity =   0.532
   6 :  binhigh =  -1.375, purity =   0.508
   7 :  binhigh =  -0.991, purity =   0.539
   8 :  binhigh =  -0.600, purity =   0.516
   9 :  binhigh =  -0.206, purity =   0.534
  10 :  binhigh =   0.186, purity =   0.520
  11 :  binhigh =   0.572, purity =   0.514
  12 :  binhigh =   0.950, purity =   0.509
  13 :  binhigh =   1.357, purity =   0.525
  14 :  binhigh =   1.759, purity =   0.508
  15 :  binhigh =   2.149, purity =   0.508
  16 :  binhigh =   2.535, purity =   0.515
  17 :  binhigh =   2.943, purity =   0.528
  18 :  binhigh =   3.211, purity =   0.571
  19 :  binhigh =   3.803, purity =   1.000


In [6]:
os.makedirs(output_dir, exist_ok=True)

out_file = os.path.join(output_dir, 'config.txt')

# List of parameters with their formatting
params = [
    ('data_size', data_size, '%d'),
    ('sim_size', sim_size, '%d'),
    ('n_bootstraps', n_bootstraps, '%d'),
    ('rng_seed', rng_seed, '%d'),
    ('mu_true', mu_true, '%.4f'),
    ('mu_gen', mu_gen, '%.4f'),
    ('var_true', var_true, '%.4f'),
    ('var_gen', var_gen, '%.4f'),
    ('smearing', smearing, '%.2f'),
    ('min_bin_width', min_bin_width, '%.4f')
]

# Write parameters to the config file
with open(out_file, 'w') as text_file:
    for name, value, fmt in params:
        text_file.write(f'{name} {fmt % value}\n')

# Read and print the contents of the config file
with open(out_file, 'r') as f:
    print(f.read())

data_size 10000
sim_size 100000
n_bootstraps 500
rng_seed 5048
mu_true 0.2000
mu_gen 0.0000
var_true 0.8100
var_gen 1.0000
smearing 0.25
min_bin_width 0.2000



In [7]:
binary_file_path = os.path.join(output_dir, 'binning.npy')
text_file_path = os.path.join(output_dir, 'binning.txt')

# Save numpy arrays to the binary file
print(f'Saving binning in {binary_file_path}')
with open(binary_file_path, 'wb') as f:
    np.save(f, bins)
    np.save(f, bin_widths)
    np.save(f, bin_centers)
    np.save(f, n_bins)

# Prepare data for the text file
binning_data = {
    'bins': bins,
    'bin_widths': bin_widths,
    'bin_centers': bin_centers,
    'n_bins': n_bins
}

# Write data to the text file
print(f'Saving in human readable format: {text_file_path}')
with open(text_file_path, 'w') as text_file:
    for name, array in binning_data.items():
        text_file.write(f'{name} {array}\n')

# Read and display the contents of the text file
with open(text_file_path, 'r') as f:
    content = f.read()
    print(content)

Saving binning in input-samples-smearing-0.25-v1a/binning.npy
Saving in human readable format: input-samples-smearing-0.25-v1a/binning.txt
bins [-3.33891579 -2.92013449 -2.72013449 -2.52013449 -2.10473433 -1.7479722
 -1.37525438 -0.99112856 -0.60008591 -0.2064736   0.18558461  0.57225777
  0.95006562  1.3565748   1.75879418  2.14907009  2.53517848  2.94306071
  3.21113133  3.80271041]
bin_widths [0.4187813  0.2        0.2        0.41540016 0.35676213 0.37271782
 0.38412581 0.39104265 0.39361232 0.39205821 0.38667316 0.37780785
 0.40650918 0.40221938 0.39027591 0.38610839 0.40788223 0.26807061
 0.59157908]
bin_centers [-3.12952514 -2.82013449 -2.62013449 -2.31243441 -1.92635326 -1.56161329
 -1.18319147 -0.79560724 -0.40327975 -0.01044449  0.37892119  0.76116169
  1.15332021  1.55768449  1.95393214  2.34212429  2.7391196   3.07709602
  3.50692087]
n_bins 19



In [8]:
for si in range(n_bootstraps):
    out_file = os.path.join(output_dir, f'sample-{si:04d}.npz')
    np.savez_compressed(out_file, truth=truth[si, :], data=data[si, :],
                        gen=gen[si, :], sim=sim[si, :])
    
    if si % 50 == 0:
        print(f'{si:4d} : Saved sample to file {out_file}')

   0 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0000.npz
  50 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0050.npz
 100 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0100.npz
 150 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0150.npz
 200 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0200.npz
 250 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0250.npz
 300 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0300.npz
 350 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0350.npz
 400 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0400.npz
 450 : Saved sample to file input-samples-smearing-0.25-v1a/sample-0450.npz
